@kolanut/language-packs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +119 -0
- package/LICENSE +17 -0
- package/README.md +237 -0
- package/badges/coverage.json +6 -0
- package/badges/packs.json +6 -0
- package/badges/targets.json +6 -0
- package/badges/tokens.json +6 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +1 -0
- package/dist/load-pack.d.ts +6 -0
- package/dist/load-pack.js +31 -0
- package/dist/types.d.ts +33 -0
- package/dist/types.js +1 -0
- package/pack.schema.json +86 -0
- package/package.json +61 -0
- package/packs/PACK_SCOPE.md +68 -0
- package/packs/afrikaans/keywords.json +329 -0
- package/packs/afrikaans/pack.json +361 -0
- package/packs/amharic/keywords.json +329 -0
- package/packs/amharic/pack.json +359 -0
- package/packs/arabic/keywords.json +329 -0
- package/packs/arabic/pack.json +369 -0
- package/packs/bambara/keywords.json +299 -0
- package/packs/bambara/pack.json +332 -0
- package/packs/by-country.json +183 -0
- package/packs/by-region.json +49 -0
- package/packs/coverage-summary.json +424 -0
- package/packs/french/keywords.json +329 -0
- package/packs/french/pack.json +381 -0
- package/packs/fulfulde/keywords.json +299 -0
- package/packs/fulfulde/pack.json +336 -0
- package/packs/hausa/keywords.json +329 -0
- package/packs/hausa/pack.json +363 -0
- package/packs/igbo/keywords.json +329 -0
- package/packs/igbo/pack.json +360 -0
- package/packs/index.json +397 -0
- package/packs/kinyarwanda/keywords.json +329 -0
- package/packs/kinyarwanda/pack.json +359 -0
- package/packs/languages-roadmap.json +28 -0
- package/packs/lingala/keywords.json +329 -0
- package/packs/lingala/pack.json +360 -0
- package/packs/logical-tokens.json +120 -0
- package/packs/luganda/keywords.json +329 -0
- package/packs/luganda/pack.json +359 -0
- package/packs/nigerian-pidgin/keywords.json +332 -0
- package/packs/nigerian-pidgin/pack.json +362 -0
- package/packs/official-target-keywords.json +166 -0
- package/packs/oromo/keywords.json +329 -0
- package/packs/oromo/pack.json +360 -0
- package/packs/portuguese-africa/keywords.json +299 -0
- package/packs/portuguese-africa/pack.json +334 -0
- package/packs/sesotho/keywords.json +299 -0
- package/packs/sesotho/pack.json +330 -0
- package/packs/setswana/keywords.json +299 -0
- package/packs/setswana/pack.json +331 -0
- package/packs/shona/keywords.json +329 -0
- package/packs/shona/pack.json +359 -0
- package/packs/somali/keywords.json +329 -0
- package/packs/somali/pack.json +362 -0
- package/packs/swahili/keywords.json +329 -0
- package/packs/swahili/pack.json +365 -0
- package/packs/target-coverage.json +1083 -0
- package/packs/tigrinya/keywords.json +329 -0
- package/packs/tigrinya/pack.json +360 -0
- package/packs/twi/keywords.json +329 -0
- package/packs/twi/pack.json +360 -0
- package/packs/wolof/keywords.json +329 -0
- package/packs/wolof/pack.json +361 -0
- package/packs/xhosa/keywords.json +329 -0
- package/packs/xhosa/pack.json +360 -0
- package/packs/yoruba/keywords.json +329 -0
- package/packs/yoruba/pack.json +363 -0
- package/packs/zulu/keywords.json +329 -0
- package/packs/zulu/pack.json +360 -0
package/CONTRIBUTING.md
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Contributing to Kola Language Packs
|
|
2
|
+
|
|
3
|
+
Thank you for helping make programming accessible to African developers in their native languages! Your contribution, whether it's a new language pack, an improvement to an existing one, or feedback, is valuable to our community.
|
|
4
|
+
|
|
5
|
+
## Who can contribute?
|
|
6
|
+
|
|
7
|
+
We welcome contributions from:
|
|
8
|
+
|
|
9
|
+
- **Native speakers** — Essential for new language packs and accuracy improvements
|
|
10
|
+
- **Linguists and educators** — Help us ensure translations are culturally appropriate and pedagogically sound
|
|
11
|
+
- **Developers** — Integrate these packs into other tools and platforms
|
|
12
|
+
|
|
13
|
+
## Language pack structure
|
|
14
|
+
|
|
15
|
+
Each language pack lives in its own folder under `packs/<name>/` and contains two key files:
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
packs/yoruba/
|
|
19
|
+
├── pack.json ← Main file with all metadata and keyword mappings
|
|
20
|
+
└── keywords.json ← Keyword mappings only (for easier tooling integration)
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
**Before you start:** Please read [packs/PACK_SCOPE.md](./packs/PACK_SCOPE.md) first. It explains the difference between language codes and country codes, and helps you choose the right scope when a country has multiple languages.
|
|
24
|
+
|
|
25
|
+
### Required metadata: Geographic scope
|
|
26
|
+
|
|
27
|
+
Every language pack must clearly declare which language variant it represents and where it's used. This helps contributors know exactly what belongs in each pack:
|
|
28
|
+
|
|
29
|
+
```json
|
|
30
|
+
{
|
|
31
|
+
"name": "nigerian-pidgin",
|
|
32
|
+
"languageCode": "pcm",
|
|
33
|
+
"locale": "pcm-NG",
|
|
34
|
+
"countries": ["NG"],
|
|
35
|
+
"regions": ["West Africa"],
|
|
36
|
+
"scopeNote": "Nigerian Pidgin (Naija) only. Not other West African creoles."
|
|
37
|
+
}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
| Field | Example | What it means |
|
|
41
|
+
|-------|---------|---------------|
|
|
42
|
+
| `languageCode` | `pcm` | ISO 639 language code (e.g., Nigerian Pidgin). **Important:** This is the language (`pcm`), not the country (`ng`). |
|
|
43
|
+
| `locale` | `pcm-NG` | BCP-47 locale tag combining language and primary country |
|
|
44
|
+
| `countries` | `["NG"]` | ISO 3166-1 country codes where this language variant is spoken |
|
|
45
|
+
| `regions` | `["West Africa"]` | Geographic grouping for browsing and organization |
|
|
46
|
+
| `scopeNote` | (free text) | Explains what's included in this pack and what belongs in future packs. Example: "Nigerian Pidgin only, not Cameroonian or Ghanaian creoles." |
|
|
47
|
+
|
|
48
|
+
You can browse packs in `packs/index.json`, filter by country in `packs/by-country.json`, or by region in `packs/by-region.json`.
|
|
49
|
+
|
|
50
|
+
**Translation quality:** Prefer phrasing you would actually use when teaching code. If you borrowed terms from another project or glossary, say so in the PR. Leave `reviewStatus` as `starter` — maintainers update it after review.
|
|
51
|
+
|
|
52
|
+
### The token registry: What you need to translate
|
|
53
|
+
|
|
54
|
+
**Every pack must translate all logical tokens in `packs/logical-tokens.json`** (currently **99 tokens** across core, standard, and advanced tiers).
|
|
55
|
+
|
|
56
|
+
**Important rules:**
|
|
57
|
+
- Don't create custom token keys — propose new tokens in a separate PR
|
|
58
|
+
- Minimum enforced: `IF`, `FOR`, `FUNCTION`, `RETURN`, `PRINT`
|
|
59
|
+
- New tokens may ship with English placeholders until a native speaker PR replaces them
|
|
60
|
+
|
|
61
|
+
See `packs/target-coverage.json` for how each token maps to JavaScript, Python, TypeScript, Go, and Rust.
|
|
62
|
+
|
|
63
|
+
### How to provide translations
|
|
64
|
+
|
|
65
|
+
You can provide either a single translation or multiple aliases (for regional variants or different phrasings):
|
|
66
|
+
|
|
67
|
+
**Single translation:**
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"LET": "make we say"
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
**Multiple aliases (recommended):**
|
|
75
|
+
```json
|
|
76
|
+
{
|
|
77
|
+
"LET": ["make we say", "make", "let"]
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Best practice:** When providing multiple aliases, include the English fallback as the last option. This helps learners transition between languages.
|
|
82
|
+
|
|
83
|
+
### Transpilation targets
|
|
84
|
+
|
|
85
|
+
Every pack must declare all five transpile backends (the transpiler picks the subset it needs per target language):
|
|
86
|
+
|
|
87
|
+
```json
|
|
88
|
+
"targets": ["javascript", "python", "typescript", "go", "rust"]
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Review process
|
|
92
|
+
|
|
93
|
+
Here's what happens after you submit your contribution:
|
|
94
|
+
|
|
95
|
+
1. **Submit your PR** — Please include only one language pack per pull request to make reviews easier
|
|
96
|
+
2. **Native speaker review** — We'll find a native speaker to review your translations for accuracy and naturalness
|
|
97
|
+
3. **Automated validation + coverage** — Our CI runs `npm test` (validation + official keyword coverage) to check technical correctness
|
|
98
|
+
4. **Merge and release** — Once approved, we'll merge your PR and publish a patch release to npm
|
|
99
|
+
|
|
100
|
+
**Estimated review time:** Most PRs are reviewed within 1-2 weeks, though this can vary depending on reviewer availability for less common languages.
|
|
101
|
+
|
|
102
|
+
## Code of conduct
|
|
103
|
+
|
|
104
|
+
We celebrate linguistic diversity and regional variations. Many African languages have multiple dialects and regional variants — and that's a strength, not a problem!
|
|
105
|
+
|
|
106
|
+
**Our approach:**
|
|
107
|
+
- **Embrace multiple variants** — Use alias arrays to include different regional expressions rather than choosing one "correct" form
|
|
108
|
+
- **Be inclusive** — Nigerian Pidgin in Lagos sounds different from Port Harcourt; Yoruba in Nigeria differs from Benin. Include variations when appropriate
|
|
109
|
+
- **Respect all contributors** — Be kind and constructive in reviews. Remember that language is personal and cultural
|
|
110
|
+
|
|
111
|
+
## Need help?
|
|
112
|
+
|
|
113
|
+
If you have questions, encounter issues, or need clarification:
|
|
114
|
+
|
|
115
|
+
- **Open a GitHub issue** in the `kolanutTechnologies/kola-language-packs` repository
|
|
116
|
+
- **Tag your issue** appropriately (`question`, `new-language`, `bug`, etc.)
|
|
117
|
+
- **Be specific** — Include details about which language you're working on and what you need help with
|
|
118
|
+
|
|
119
|
+
We're here to support you throughout the contribution process!
|
package/LICENSE
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
Copyright 2026 Kolanut Technologies Ltd
|
|
6
|
+
|
|
7
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
you may not use this file except in compliance with the License.
|
|
9
|
+
You may obtain a copy of the License at
|
|
10
|
+
|
|
11
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
|
|
13
|
+
Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
See the License for the specific language governing permissions and
|
|
17
|
+
limitations under the License.
|
package/README.md
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# @kolanut/language-packs
|
|
2
|
+
|
|
3
|
+
**Public open-source package** · Apache 2.0 · [Kolanut Technologies Ltd](https://kolacode.africa)
|
|
4
|
+
|
|
5
|
+
Language packs for African-language programming: a consistent set of **logical programming concepts**, mapped to **native-language phrases**, with enough structure for tools to transpile to **JavaScript, Python, TypeScript, Go, and Rust**.
|
|
6
|
+
|
|
7
|
+
If you’re building an editor, a transpiler, a linter, or a learning tool, this repo is the shared “source of truth”.
|
|
8
|
+
|
|
9
|
+
## Why this exists (the gap we’re filling)
|
|
10
|
+
|
|
11
|
+
Most tools that try “programming in African languages” run into the same wall:
|
|
12
|
+
|
|
13
|
+
- **There’s no single public, spec-backed registry** that says *“these are the official reserved words for each target language”* and *“these are the logical concepts they map to”*.
|
|
14
|
+
- Even when translation resources exist, they’re often **not packaged, not versioned, and not reusable** across tools.
|
|
15
|
+
|
|
16
|
+
This project is our attempt to fix that properly:
|
|
17
|
+
|
|
18
|
+
- A canonical **logical token registry** (the concepts)
|
|
19
|
+
- A canonical **official keyword list per programming target** (the specs)
|
|
20
|
+
- A growing set of **African language packs** (the human-language layer)
|
|
21
|
+
- Validation so everyone can build on top of the same foundation with confidence
|
|
22
|
+
|
|
23
|
+
## What’s in this repo
|
|
24
|
+
|
|
25
|
+
- **25 shipped African language packs** (and a roadmap for more)
|
|
26
|
+
- **99 logical tokens** that every pack maps (shared across all programming targets)
|
|
27
|
+
- **Schemas + validation** to keep packs consistent
|
|
28
|
+
- **Coverage checks** against official keyword lists for each target language
|
|
29
|
+
|
|
30
|
+
The data lives under [`packs/`](./packs/). The package published to npm is `@kolanut/language-packs`.
|
|
31
|
+
|
|
32
|
+
<!-- metrics:start -->
|
|
33
|
+
|
|
34
|
+
## At a glance
|
|
35
|
+
|
|
36
|
+
| What we cover | Shipped | Planned | Source of truth |
|
|
37
|
+
|---|---:|---:|---|
|
|
38
|
+
| **African language packs** | 25 | +40 | [`packs/coverage-summary.json`](./packs/coverage-summary.json) · [`packs/languages-roadmap.json`](./packs/languages-roadmap.json) |
|
|
39
|
+
| **Programming targets** | 5 | +9 | [`packs/coverage-summary.json`](./packs/coverage-summary.json) · [`packs/languages-roadmap.json`](./packs/languages-roadmap.json) |
|
|
40
|
+
| **Logical tokens** | 99 | — | [`packs/logical-tokens.json`](./packs/logical-tokens.json) |
|
|
41
|
+
| **Keyword coverage gaps** | 0 | — | [`packs/coverage-summary.json`](./packs/coverage-summary.json) |
|
|
42
|
+
|
|
43
|
+
<!-- metrics:end -->
|
|
44
|
+
## Keyword coverage (0 gaps)
|
|
45
|
+
|
|
46
|
+
We track coverage against official reserved keywords for each transpile target. Some concepts are “structural” (they don’t have a 1:1 keyword in a given language, but still need a consistent logical token).
|
|
47
|
+
|
|
48
|
+
Source of truth: [`packs/coverage-summary.json`](./packs/coverage-summary.json)
|
|
49
|
+
|
|
50
|
+
| Target | Spec keywords | Mapped | Gaps | Score |
|
|
51
|
+
|--------|-------------:|-------:|-----:|------:|
|
|
52
|
+
| JavaScript | 38 | 37 direct + 1 structural | 0 | 100% |
|
|
53
|
+
| Python | 39 | 38 direct + 1 structural | 0 | 100% |
|
|
54
|
+
| TypeScript | 64 tracked† | 63 direct + 1 structural | 0 | 100% |
|
|
55
|
+
| Go | 25 | 25 | 0 | 100% |
|
|
56
|
+
| Rust | 39 | 38 direct + 1 structural | 0 | 100% |
|
|
57
|
+
|
|
58
|
+
†TypeScript has no single official keyword count in the Handbook; 64 is our tracked reserved/modifier set for coverage (see notes in `official-target-keywords.json`).
|
|
59
|
+
|
|
60
|
+
## Spec sources (traceable and linkable)
|
|
61
|
+
|
|
62
|
+
Source of truth: [`packs/official-target-keywords.json`](./packs/official-target-keywords.json)
|
|
63
|
+
|
|
64
|
+
| Target | Spec keywords | Spec source |
|
|
65
|
+
|--------|-------------:|------------|
|
|
66
|
+
| JavaScript | 38 | [ECMA-262 §12.7.2 ReservedWord](https://tc39.es/ecma262/#sec-keywords-and-reserved-words) |
|
|
67
|
+
| Python | 39 | [Python 3.12 — keywords](https://docs.python.org/3/reference/lexical_analysis.html#keywords) (35 hard + 4 soft) |
|
|
68
|
+
| TypeScript | 64 tracked† | [TypeScript Handbook](https://www.typescriptlang.org/docs/handbook/intro.html) (no official count) |
|
|
69
|
+
| Go | 25 | [Go spec — Keywords](https://go.dev/ref/spec#Keywords) |
|
|
70
|
+
| Rust | 39 | [Rust Reference — strict keywords](https://doc.rust-lang.org/reference/keywords.html) |
|
|
71
|
+
|
|
72
|
+
## Install
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
npm install @kolanut/language-packs
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Use it in code
|
|
79
|
+
|
|
80
|
+
```typescript
|
|
81
|
+
import { listPackNames, loadPack, flattenKeywords } from '@kolanut/language-packs';
|
|
82
|
+
|
|
83
|
+
const packs = await listPackNames(); // e.g. 25 packs
|
|
84
|
+
|
|
85
|
+
const yoruba = await loadPack('yoruba');
|
|
86
|
+
const keywords = flattenKeywords(yoruba);
|
|
87
|
+
// { IF: ['ṣé', 'if'], FOR: ['fun', 'for'], ... } — maps 99 logical tokens
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Use the raw JSON (no TS required)
|
|
91
|
+
|
|
92
|
+
If you prefer to consume JSON directly (Rust/Go/Python/CLI tools), start here:
|
|
93
|
+
|
|
94
|
+
- [`packs/index.json`](./packs/index.json): pack manifest (locale, region, countries, status)
|
|
95
|
+
- [`packs/logical-tokens.json`](./packs/logical-tokens.json): the 99-token registry (the thing packs must fully map)
|
|
96
|
+
- [`packs/by-country.json`](./packs/by-country.json): `NG` → `["yoruba", "igbo", ...]`
|
|
97
|
+
- [`packs/by-region.json`](./packs/by-region.json): region → pack names
|
|
98
|
+
- [`packs/coverage-summary.json`](./packs/coverage-summary.json): auto-generated coverage report
|
|
99
|
+
- [`packs/languages-roadmap.json`](./packs/languages-roadmap.json): shipped vs planned packs
|
|
100
|
+
|
|
101
|
+
## Languages shipped (25)
|
|
102
|
+
|
|
103
|
+
These are the packs currently shipped in v0.1.
|
|
104
|
+
|
|
105
|
+
- Flags are shown for the **primary locale** (quick scanning). Many packs apply to multiple countries—see [`packs/index.json`](./packs/index.json) for the full list.
|
|
106
|
+
- **On GitHub, flags render as images**, so they should display even if your laptop doesn’t support emoji flags. (In some local Markdown previews, they may appear as squares.)
|
|
107
|
+
|
|
108
|
+
**West Africa**
|
|
109
|
+
- 🇳🇬 Yorùbá (`yo-NG`)
|
|
110
|
+
- 🇳🇬 Nigerian Pidgin (`pcm-NG`)
|
|
111
|
+
- 🇳🇬 Hausa (`ha-NG`)
|
|
112
|
+
- 🇳🇬 Igbo (`ig-NG`)
|
|
113
|
+
- 🇳🇬 Fulfulde (`ff-NG`)
|
|
114
|
+
- 🇲🇱 Bambara (`bm-ML`)
|
|
115
|
+
- 🇬🇭 Twi (`tw-GH`)
|
|
116
|
+
- 🇸🇳 Wolof (`wo-SN`)
|
|
117
|
+
- 🇸🇳 French (Africa) (`fr-SN`)
|
|
118
|
+
|
|
119
|
+
**East Africa**
|
|
120
|
+
- 🇰🇪 Swahili (`sw-KE`)
|
|
121
|
+
- 🇪🇹 Amharic (`am-ET`)
|
|
122
|
+
- 🇪🇹 Oromo (`om-ET`)
|
|
123
|
+
- 🇪🇷 Tigrinya (`ti-ER`)
|
|
124
|
+
- 🇷🇼 Kinyarwanda (`rw-RW`)
|
|
125
|
+
- 🇺🇬 Luganda (`lg-UG`)
|
|
126
|
+
|
|
127
|
+
**Central Africa**
|
|
128
|
+
- 🇨🇩 Lingala (`ln-CD`)
|
|
129
|
+
|
|
130
|
+
**Horn of Africa**
|
|
131
|
+
- 🇸🇴 Somali (`so-SO`)
|
|
132
|
+
|
|
133
|
+
**North / East Africa**
|
|
134
|
+
- 🇪🇬 Arabic (Africa) (`ar-EG`)
|
|
135
|
+
|
|
136
|
+
**Southern Africa**
|
|
137
|
+
- 🇿🇦 isiZulu (`zu-ZA`)
|
|
138
|
+
- 🇿🇦 isiXhosa (`xh-ZA`)
|
|
139
|
+
- 🇿🇦 Afrikaans (`af-ZA`)
|
|
140
|
+
- 🇱🇸 Sesotho (`st-LS`)
|
|
141
|
+
- 🇧🇼 Setswana (`tn-BW`)
|
|
142
|
+
- 🇿🇼 Shona (`sn-ZW`)
|
|
143
|
+
- 🇦🇴 Portuguese (Africa) (`pt-AO`)
|
|
144
|
+
|
|
145
|
+
Full metadata (including all countries per pack): [`packs/index.json`](./packs/index.json).
|
|
146
|
+
|
|
147
|
+
## Language quality (and how it improves)
|
|
148
|
+
|
|
149
|
+
Most packs start at `reviewStatus: "starter"`. That’s intentional: we’d rather ship useful starter packs quickly, then improve them through native-speaker review.
|
|
150
|
+
|
|
151
|
+
If you care about linguistic accuracy or preferred terminology for a specific community, open a PR — see [`CONTRIBUTING.md`](./CONTRIBUTING.md).
|
|
152
|
+
|
|
153
|
+
## Roadmap (what’s next)
|
|
154
|
+
|
|
155
|
+
We track two roadmaps:
|
|
156
|
+
|
|
157
|
+
- **Planned African languages** (by region + priority): [`packs/languages-roadmap.json`](./packs/languages-roadmap.json)
|
|
158
|
+
- **Planned programming targets**: C, C++, Java, C#, Kotlin, Swift, Dart, Ruby, PHP
|
|
159
|
+
|
|
160
|
+
Regional focus (how we’re sequencing the work):
|
|
161
|
+
|
|
162
|
+
- **Phase 1 — West Africa + Central Africa**: highest immediate demand and strong community contributor base
|
|
163
|
+
- **Phase 2 — East Africa + Horn of Africa**: expand coverage where multilingual education is already strong
|
|
164
|
+
- **Phase 3 — North Africa + Southern Africa + Indian Ocean**: fill remaining gaps and add country-specific variants where needed
|
|
165
|
+
|
|
166
|
+
High-priority upcoming packs currently include:
|
|
167
|
+
|
|
168
|
+
- West Africa: Akan (`ak-GH`)
|
|
169
|
+
- Central Africa: Cameroon Pidgin / Kamtok (`wes-CM`)
|
|
170
|
+
|
|
171
|
+
## Contributing
|
|
172
|
+
|
|
173
|
+
Contributions are welcome—especially from native speakers and educators.
|
|
174
|
+
|
|
175
|
+
- **Improve an existing pack**: add better phrasing, aliases for dialects, or clearer scope notes
|
|
176
|
+
- **Add a new pack**: follow the template, map all 99 tokens, and run validation
|
|
177
|
+
|
|
178
|
+
Before contributing, please read [`packs/PACK_SCOPE.md`](./packs/PACK_SCOPE.md). It explains:
|
|
179
|
+
|
|
180
|
+
- language codes vs country codes (e.g. `pcm` vs `NG`)
|
|
181
|
+
- how we handle cross-border languages
|
|
182
|
+
- when a dialect needs a new pack vs an alias in the same pack
|
|
183
|
+
|
|
184
|
+
Quick start:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
git clone https://github.com/kolanutTechnologies/kola-language-packs.git
|
|
188
|
+
cd kola-language-packs
|
|
189
|
+
npm install
|
|
190
|
+
npm test # validate + keyword coverage
|
|
191
|
+
npm run build
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Full guide: [`CONTRIBUTING.md`](./CONTRIBUTING.md).
|
|
195
|
+
|
|
196
|
+
## Contributors
|
|
197
|
+
|
|
198
|
+
This project only works if real people show up with real language knowledge. Thank you to everyone who’s contributed time, expertise, and care.
|
|
199
|
+
|
|
200
|
+
[](https://github.com/kolanutTechnologies/kola-language-packs/graphs/contributors)
|
|
201
|
+
|
|
202
|
+
## FAQ
|
|
203
|
+
|
|
204
|
+
### Is `packs/logical-tokens.json` exhaustive?
|
|
205
|
+
|
|
206
|
+
It’s exhaustive for **this project’s current scope**: a shared registry of **logical concepts** needed to map official reserved keywords across five target languages (plus a small set of “structural” concepts).
|
|
207
|
+
|
|
208
|
+
It is **not** a full programming-language grammar. It intentionally does **not** try to model:
|
|
209
|
+
|
|
210
|
+
- punctuation (`;`, `{}`, `()`)
|
|
211
|
+
- operators (`+`, `==`, `??`, `:=`)
|
|
212
|
+
- comment syntax (`//`, `#`, `/* */`)
|
|
213
|
+
- every syntax feature that isn’t keyword-driven
|
|
214
|
+
|
|
215
|
+
If we expand into full syntax coverage later, that would likely be a separate registry (or multiple registries) rather than inflating “logical tokens” beyond readability.
|
|
216
|
+
|
|
217
|
+
## Maintainers (optional)
|
|
218
|
+
|
|
219
|
+
This section is only relevant if you’re maintaining the package or editing generated pack data.
|
|
220
|
+
|
|
221
|
+
<details>
|
|
222
|
+
<summary>Maintainer notes (publishing + regeneration)</summary>
|
|
223
|
+
|
|
224
|
+
- Publishing: `npm publish --access public` (package is `@kolanut/language-packs`)
|
|
225
|
+
- Regenerate derived pack files after changing the source definitions:
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
npm run bootstrap
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
</details>
|
|
232
|
+
|
|
233
|
+
## License
|
|
234
|
+
|
|
235
|
+
Apache 2.0 — see [LICENSE](./LICENSE).
|
|
236
|
+
|
|
237
|
+
Some imported terminology may carry additional license terms (e.g. Mafoko/NOODL). Check source licenses before bulk import and cite them in your PR.
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { flattenKeywords, getPacksRoot, listPackNames, loadPack, resolveKeywords, } from './load-pack.js';
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { LanguagePack, ResolvedKeyword } from './types.js';
|
|
2
|
+
export declare function getPacksRoot(): string;
|
|
3
|
+
export declare function listPackNames(): Promise<string[]>;
|
|
4
|
+
export declare function loadPack(name: string): Promise<LanguagePack>;
|
|
5
|
+
export declare function resolveKeywords(pack: LanguagePack): ResolvedKeyword[];
|
|
6
|
+
export declare function flattenKeywords(pack: LanguagePack): Record<string, string[]>;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { readFile } from 'node:fs/promises';
|
|
2
|
+
import { dirname, join } from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
const rootDir = join(dirname(fileURLToPath(import.meta.url)), '..');
|
|
5
|
+
export function getPacksRoot() {
|
|
6
|
+
return join(rootDir, 'packs');
|
|
7
|
+
}
|
|
8
|
+
export async function listPackNames() {
|
|
9
|
+
const indexPath = join(getPacksRoot(), 'index.json');
|
|
10
|
+
const raw = await readFile(indexPath, 'utf8');
|
|
11
|
+
const index = JSON.parse(raw);
|
|
12
|
+
return index.packs.map((pack) => pack.name);
|
|
13
|
+
}
|
|
14
|
+
export async function loadPack(name) {
|
|
15
|
+
const packPath = join(getPacksRoot(), name, 'pack.json');
|
|
16
|
+
const raw = await readFile(packPath, 'utf8');
|
|
17
|
+
return JSON.parse(raw);
|
|
18
|
+
}
|
|
19
|
+
export function resolveKeywords(pack) {
|
|
20
|
+
return Object.entries(pack.keywords).map(([logical, value]) => ({
|
|
21
|
+
logical,
|
|
22
|
+
phrases: Array.isArray(value) ? value : [value],
|
|
23
|
+
}));
|
|
24
|
+
}
|
|
25
|
+
export function flattenKeywords(pack) {
|
|
26
|
+
const out = {};
|
|
27
|
+
for (const [logical, value] of Object.entries(pack.keywords)) {
|
|
28
|
+
out[logical] = Array.isArray(value) ? value : [value];
|
|
29
|
+
}
|
|
30
|
+
return out;
|
|
31
|
+
}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
export type KeywordValue = string | string[];
|
|
2
|
+
export interface LanguagePack {
|
|
3
|
+
name: string;
|
|
4
|
+
languageCode: string;
|
|
5
|
+
locale: string;
|
|
6
|
+
countries: string[];
|
|
7
|
+
regions: string[];
|
|
8
|
+
scopeNote?: string;
|
|
9
|
+
reviewStatus?: 'starter' | 'community-reviewed' | 'partner-verified';
|
|
10
|
+
recommendedPartners?: string[];
|
|
11
|
+
version: string;
|
|
12
|
+
displayName?: string;
|
|
13
|
+
description?: string;
|
|
14
|
+
contributors?: string[];
|
|
15
|
+
targets: Array<'javascript' | 'python' | 'typescript' | 'go' | 'rust'>;
|
|
16
|
+
keywords: Record<string, KeywordValue>;
|
|
17
|
+
}
|
|
18
|
+
export interface PackIndexEntry {
|
|
19
|
+
name: string;
|
|
20
|
+
languageCode: string;
|
|
21
|
+
locale: string;
|
|
22
|
+
displayName?: string;
|
|
23
|
+
countries: string[];
|
|
24
|
+
regions: string[];
|
|
25
|
+
version: string;
|
|
26
|
+
}
|
|
27
|
+
export interface PackIndex {
|
|
28
|
+
packs: PackIndexEntry[];
|
|
29
|
+
}
|
|
30
|
+
export interface ResolvedKeyword {
|
|
31
|
+
logical: string;
|
|
32
|
+
phrases: string[];
|
|
33
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/pack.schema.json
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://github.com/kolanutTechnologies/kola-language-packs/pack.schema.json",
|
|
4
|
+
"title": "Kola Language Pack",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"required": ["name", "languageCode", "locale", "countries", "regions", "version", "targets", "keywords"],
|
|
7
|
+
"properties": {
|
|
8
|
+
"name": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"pattern": "^[a-z][a-z0-9-]*$",
|
|
11
|
+
"description": "Pack identifier, e.g. yoruba, pidgin"
|
|
12
|
+
},
|
|
13
|
+
"languageCode": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"pattern": "^[a-z]{2,3}$",
|
|
16
|
+
"description": "ISO 639 primary language subtag (e.g. yo, pcm, ha). Not a country code."
|
|
17
|
+
},
|
|
18
|
+
"locale": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"pattern": "^[a-z]{2,3}(-[A-Z]{2})?$",
|
|
21
|
+
"description": "BCP-47 language tag with optional region (e.g. pcm-NG, yo-NG)"
|
|
22
|
+
},
|
|
23
|
+
"countries": {
|
|
24
|
+
"type": "array",
|
|
25
|
+
"description": "ISO 3166-1 alpha-2 country codes where this pack variant applies",
|
|
26
|
+
"items": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"pattern": "^[A-Z]{2}$"
|
|
29
|
+
},
|
|
30
|
+
"minItems": 1
|
|
31
|
+
},
|
|
32
|
+
"regions": {
|
|
33
|
+
"type": "array",
|
|
34
|
+
"description": "Broad geographic scope for browsing (e.g. West Africa)",
|
|
35
|
+
"items": { "type": "string", "minLength": 1 },
|
|
36
|
+
"minItems": 1
|
|
37
|
+
},
|
|
38
|
+
"scopeNote": {
|
|
39
|
+
"type": "string",
|
|
40
|
+
"description": "Contributor guidance when a language spans countries or has distinct regional variants"
|
|
41
|
+
},
|
|
42
|
+
"reviewStatus": {
|
|
43
|
+
"type": "string",
|
|
44
|
+
"enum": ["starter", "community-reviewed", "partner-verified"],
|
|
45
|
+
"description": "Verification level of keyword translations"
|
|
46
|
+
},
|
|
47
|
+
"recommendedPartners": {
|
|
48
|
+
"type": "array",
|
|
49
|
+
"description": "Organizations to contact for verified terminology (language academies, universities, community NLP groups)",
|
|
50
|
+
"items": { "type": "string" }
|
|
51
|
+
},
|
|
52
|
+
"version": {
|
|
53
|
+
"type": "string",
|
|
54
|
+
"pattern": "^\\d+\\.\\d+\\.\\d+$"
|
|
55
|
+
},
|
|
56
|
+
"displayName": { "type": "string" },
|
|
57
|
+
"description": { "type": "string" },
|
|
58
|
+
"contributors": {
|
|
59
|
+
"type": "array",
|
|
60
|
+
"items": { "type": "string" }
|
|
61
|
+
},
|
|
62
|
+
"targets": {
|
|
63
|
+
"type": "array",
|
|
64
|
+
"items": {
|
|
65
|
+
"type": "string",
|
|
66
|
+
"enum": ["javascript", "python", "typescript", "go", "rust"]
|
|
67
|
+
},
|
|
68
|
+
"minItems": 1
|
|
69
|
+
},
|
|
70
|
+
"keywords": {
|
|
71
|
+
"type": "object",
|
|
72
|
+
"description": "Logical token → native phrase(s). Value is string or string[].",
|
|
73
|
+
"additionalProperties": {
|
|
74
|
+
"oneOf": [
|
|
75
|
+
{ "type": "string", "minLength": 1 },
|
|
76
|
+
{
|
|
77
|
+
"type": "array",
|
|
78
|
+
"items": { "type": "string", "minLength": 1 },
|
|
79
|
+
"minItems": 1
|
|
80
|
+
}
|
|
81
|
+
]
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
"additionalProperties": false
|
|
86
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kolanut/language-packs",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Community-governed keyword maps for native African language programming — by Kolanut Technologies",
|
|
5
|
+
"license": "Apache-2.0",
|
|
6
|
+
"author": "Kolanut Technologies Ltd (https://kolacode.africa)",
|
|
7
|
+
"publishConfig": {
|
|
8
|
+
"access": "public"
|
|
9
|
+
},
|
|
10
|
+
"type": "module",
|
|
11
|
+
"main": "./dist/index.js",
|
|
12
|
+
"types": "./dist/index.d.ts",
|
|
13
|
+
"exports": {
|
|
14
|
+
".": "./dist/index.js",
|
|
15
|
+
"./packs/*": "./packs/*",
|
|
16
|
+
"./schema": "./pack.schema.json"
|
|
17
|
+
},
|
|
18
|
+
"files": [
|
|
19
|
+
"dist",
|
|
20
|
+
"packs",
|
|
21
|
+
"badges",
|
|
22
|
+
"pack.schema.json",
|
|
23
|
+
"LICENSE",
|
|
24
|
+
"README.md",
|
|
25
|
+
"CONTRIBUTING.md"
|
|
26
|
+
],
|
|
27
|
+
"scripts": {
|
|
28
|
+
"build": "tsc",
|
|
29
|
+
"validate": "node scripts/validate.mjs",
|
|
30
|
+
"coverage": "node scripts/generate-coverage.mjs",
|
|
31
|
+
"bootstrap": "node scripts/bootstrap-packs.mjs",
|
|
32
|
+
"readme:metrics": "node scripts/update-readme-metrics.mjs",
|
|
33
|
+
"test": "npm run validate && npm run coverage",
|
|
34
|
+
"prepublishOnly": "npm run build && npm run validate"
|
|
35
|
+
},
|
|
36
|
+
"repository": {
|
|
37
|
+
"type": "git",
|
|
38
|
+
"url": "https://github.com/kolanutTechnologies/kola-language-packs.git"
|
|
39
|
+
},
|
|
40
|
+
"keywords": [
|
|
41
|
+
"kolanut",
|
|
42
|
+
"kola-code",
|
|
43
|
+
"kolatech",
|
|
44
|
+
"african-languages",
|
|
45
|
+
"yoruba",
|
|
46
|
+
"pidgin",
|
|
47
|
+
"hausa",
|
|
48
|
+
"igbo",
|
|
49
|
+
"swahili",
|
|
50
|
+
"localization",
|
|
51
|
+
"programming-languages",
|
|
52
|
+
"definition-maps"
|
|
53
|
+
],
|
|
54
|
+
"devDependencies": {
|
|
55
|
+
"@types/node": "^20.0.0",
|
|
56
|
+
"typescript": "^5.5.0"
|
|
57
|
+
},
|
|
58
|
+
"engines": {
|
|
59
|
+
"node": ">=18"
|
|
60
|
+
}
|
|
61
|
+
}
|