generaltranslation 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,7 +10,7 @@ A language toolkit for AI developers
10
10
  npm i generaltranslation
11
11
  ```
12
12
 
13
- ## Functions
13
+ ## Toolkit
14
14
 
15
15
  ### getLanguageName
16
16
 
@@ -1,4 +1,4 @@
1
- const CodeToLanguage = {
1
+ {
2
2
  "ab": "Abkhazian",
3
3
  "aa": "Afar",
4
4
  "af": "Afrikaans",
@@ -182,6 +182,4 @@ const CodeToLanguage = {
182
182
  "yo": "Yoruba",
183
183
  "za": "Zhuang",
184
184
  "zu": "Zulu"
185
- }
186
-
187
- module.exports = { CodeToLanguage }
185
+ }
@@ -0,0 +1,227 @@
1
+ {
2
+ "abkhazian": "ab",
3
+ "afar": "aa",
4
+ "afrikaans": "af",
5
+ "akan": "ak",
6
+ "albanian": "sq",
7
+ "amharic": "am",
8
+ "arabic": "ar",
9
+ "aragonese": "an",
10
+ "armenian": "hy",
11
+ "assamese": "as",
12
+ "avaric": "av",
13
+ "avestan": "ae",
14
+ "aymara": "ay",
15
+ "azerbaijani": "az",
16
+ "bambara": "bm",
17
+ "bashkir": "ba",
18
+ "basque": "eu",
19
+ "belarusian": "be",
20
+ "bengali": "bn",
21
+ "bislama": "bi",
22
+ "bosnian": "bs",
23
+ "breton": "br",
24
+ "bulgarian": "bg",
25
+ "burmese": "my",
26
+ "catalan": "ca",
27
+ "valencian": "ca",
28
+ "chamorro": "ch",
29
+ "chechen": "ce",
30
+ "chichewa": "ny",
31
+ "chewa": "ny",
32
+ "nyanja": "ny",
33
+ "chinese": "zh",
34
+ "mandarin": "zh",
35
+ "church slavonic": "cu",
36
+ "old slavonic": "cu",
37
+ "old church slavonic": "cu",
38
+ "slavonic": "cu",
39
+ "chuvash": "cv",
40
+ "cornish": "kw",
41
+ "corsican": "co",
42
+ "cree": "cr",
43
+ "croatian": "hr",
44
+ "czech": "cs",
45
+ "danish": "da",
46
+ "divehi": "dv",
47
+ "dhivehi": "dv",
48
+ "maldivian": "dv",
49
+ "dutch": "nl",
50
+ "flemish": "nl",
51
+ "dzongkha": "dz",
52
+ "english": "en",
53
+ "esperanto": "eo",
54
+ "estonian": "et",
55
+ "ewe": "ee",
56
+ "faroese": "fo",
57
+ "fijian": "fj",
58
+ "finnish": "fi",
59
+ "french": "fr",
60
+ "western frisian": "fy",
61
+ "frisian": "fy",
62
+ "fulah": "ff",
63
+ "gaelic": "gd",
64
+ "scottish gaelic": "gd",
65
+ "galician": "gl",
66
+ "ganda": "lg",
67
+ "georgian": "ka",
68
+ "german": "de",
69
+ "greek": "el",
70
+ "modern greek": "el",
71
+ "kalaallisut": "kl",
72
+ "greenlandic": "kl",
73
+ "guarani": "gn",
74
+ "gujarati": "gu",
75
+ "haitian": "ht",
76
+ "haitian creole": "ht",
77
+ "hausa": "ha",
78
+ "hebrew": "he",
79
+ "herero": "hz",
80
+ "hindi": "hi",
81
+ "hiri motu": "ho",
82
+ "hungarian": "hu",
83
+ "icelandic": "is",
84
+ "ido": "io",
85
+ "igbo": "ig",
86
+ "indonesian": "id",
87
+ "interlingua": "ia",
88
+ "interlingue": "ie",
89
+ "occidental": "ie",
90
+ "inuktitut": "iu",
91
+ "inupiaq": "ik",
92
+ "irish": "ga",
93
+ "italian": "it",
94
+ "japanese": "ja",
95
+ "javanese": "jv",
96
+ "kannada": "kn",
97
+ "kanuri": "kr",
98
+ "kashmiri": "ks",
99
+ "kazakh": "kk",
100
+ "central khmer": "km",
101
+ "kikuyu": "ki",
102
+ "gikuyu": "ki",
103
+ "kinyarwanda": "rw",
104
+ "kirghiz": "ky",
105
+ "kyrgyz": "ky",
106
+ "komi": "kv",
107
+ "kongo": "kg",
108
+ "korean": "ko",
109
+ "kuanyama": "kj",
110
+ "kwanyama": "kj",
111
+ "kurdish": "ku",
112
+ "lao": "lo",
113
+ "latin": "la",
114
+ "latvian": "lv",
115
+ "limburgan": "li",
116
+ "limburger": "li",
117
+ "limburgish": "li",
118
+ "lingala": "ln",
119
+ "lithuanian": "lt",
120
+ "luba-katanga": "lu",
121
+ "luxembourgish": "lb",
122
+ "letzeburgesch": "lb",
123
+ "macedonian": "mk",
124
+ "malagasy": "mg",
125
+ "malay": "ms",
126
+ "malayalam": "ml",
127
+ "maltese": "mt",
128
+ "manx": "gv",
129
+ "maori": "mi",
130
+ "marathi": "mr",
131
+ "marshallese": "mh",
132
+ "mongolian": "mn",
133
+ "nauru": "na",
134
+ "navajo": "nv",
135
+ "navaho": "nv",
136
+ "north ndebele": "nd",
137
+ "south ndebele": "nr",
138
+ "northern ndebele": "nd",
139
+ "southern ndebele": "nr",
140
+ "ndonga": "ng",
141
+ "nepali": "ne",
142
+ "norwegian": "no",
143
+ "norwegian bokmål": "nb",
144
+ "norwegian nynorsk": "nn",
145
+ "sichuan yi": "ii",
146
+ "nuosu": "ii",
147
+ "occitan": "oc",
148
+ "ojibwa": "oj",
149
+ "ojibwe": "oj",
150
+ "oriya": "or",
151
+ "odia": "or",
152
+ "oromo": "om",
153
+ "ossetian": "os",
154
+ "ossetic": "os",
155
+ "pali": "pi",
156
+ "pashto": "ps",
157
+ "pushto": "ps",
158
+ "persian": "fa",
159
+ "polish": "pl",
160
+ "portuguese": "pt",
161
+ "punjabi": "pa",
162
+ "panjabi": "pa",
163
+ "quechua": "qu",
164
+ "romanian": "ro",
165
+ "moldavian": "ro",
166
+ "moldovan": "ro",
167
+ "romansh": "rm",
168
+ "rundi": "rn",
169
+ "kirundi": "rn",
170
+ "russian": "ru",
171
+ "northern sami": "se",
172
+ "sami": "se",
173
+ "samoan": "sm",
174
+ "sango": "sg",
175
+ "sanskrit": "sa",
176
+ "sardinian": "sc",
177
+ "serbian": "sr",
178
+ "shona": "sn",
179
+ "sindhi": "sd",
180
+ "sinhala": "si",
181
+ "sinhalese": "si",
182
+ "slovak": "sk",
183
+ "slovenian": "sl",
184
+ "slovene": "sl",
185
+ "somali": "so",
186
+ "southern sotho": "st",
187
+ "sotho": "st",
188
+ "spanish": "es",
189
+ "castilian": "es",
190
+ "sundanese": "su",
191
+ "swahili": "sw",
192
+ "swati": "ss",
193
+ "swedish": "sv",
194
+ "tagalog": "tl",
195
+ "tahitian": "ty",
196
+ "tajik": "tg",
197
+ "tamil": "ta",
198
+ "tatar": "tt",
199
+ "telugu": "te",
200
+ "thai": "th",
201
+ "tibetan": "bo",
202
+ "tigrinya": "ti",
203
+ "tonga": "to",
204
+ "tongan": "to",
205
+ "tsonga": "ts",
206
+ "tswana": "tn",
207
+ "turkish": "tr",
208
+ "turkmen": "tk",
209
+ "twi": "tw",
210
+ "uighur": "ug",
211
+ "uyghur": "ug",
212
+ "ukrainian": "uk",
213
+ "urdu": "ur",
214
+ "uzbek": "uz",
215
+ "venda": "ve",
216
+ "vietnamese": "vi",
217
+ "volapük": "vo",
218
+ "walloon": "wa",
219
+ "welsh": "cy",
220
+ "wolof": "wo",
221
+ "xhosa": "xh",
222
+ "yiddish": "yi",
223
+ "yoruba": "yo",
224
+ "zhuang": "za",
225
+ "chuang": "za",
226
+ "zulu": "zu"
227
+ }
@@ -0,0 +1,3 @@
1
+ {
2
+ "grc": "Ancient Greek"
3
+ }
@@ -0,0 +1,3 @@
1
+ {
2
+ "ancient greek": "grc"
3
+ }
package/codes/codes.js ADDED
@@ -0,0 +1,30 @@
1
+ // ----- IMPORTS ----- //
2
+
3
+ const CodeToLanguage = require('./639-1/CodeToLanguage.json');
4
+ const LanguageToCode = require('./639-1/LanguageToCode.json');
5
+
6
+ // only for languages which have no two-letter code
7
+ const CodeToLanguageTriletter = require('./639-3/CodeToLanguageTriletter.json');
8
+ const LanguageToCodeTriletter = require('./639-3/LanguageToCodeTriletter.json');
9
+
10
+ // ----- LANGUAGE CODES ----- //
11
+
12
+ // Returns the name of a language from an ISO 639 code or an array of codes
13
+ const _mapCodeToLanguage = code => CodeToLanguage[code] || CodeToLanguageTriletter[code] || '';
14
+ const getLanguageName = codes => {
15
+ return Array.isArray(codes) ? codes.map(_mapCodeToLanguage) : _mapCodeToLanguage(codes);
16
+ }
17
+
18
+ // Returns an ISO 639 code from a language name or an array of language names
19
+ // Preferentially returns two-letter codes
20
+ const _mapLanguageToCode = language => {
21
+ const lowerCaseLanguage = language.toLowerCase();
22
+ return LanguageToCode[lowerCaseLanguage] || LanguageToCodeTriletter[lowerCaseLanguage] || '';
23
+ }
24
+ const getLanguageCode = languages => {
25
+ return Array.isArray(languages) ? languages.map(_mapLanguageToCode) : _mapLanguageToCode(languages);
26
+ }
27
+
28
+ module.exports = {
29
+ getLanguageName, getLanguageCode
30
+ }
package/index.js CHANGED
@@ -1,11 +1,15 @@
1
- const { CodeToLanguage } = require('./languages/639-1.js');
2
- const { CodeToLanguageTriletter } = require('./languages/639-3.js');
1
+ // `generaltranslation` language toolkit
2
+ // © 2024, General Translation, Inc.
3
3
 
4
- // Gets the name of a language from an ISO 639 code
5
- function getLanguageName(code) {
6
- return ((code.length === 2) ? (CodeToLanguage[code]) : (CodeToLanguageTriletter[code])) || '';
7
- }
4
+ // ----- IMPORTS ----- //
8
5
 
9
- module.exports = {
10
- getLanguageName
6
+ const { getLanguageName, getLanguageCode } = require('./codes/codes.js');
7
+
8
+ const { getModelInfo, getModelLanguages, isLanguageSupported } = require('./models/models.js');
9
+
10
+ // ----- EXPORTS ----- //
11
+
12
+ module.exports = {
13
+ getLanguageName, getLanguageCode,
14
+ getModelInfo, getModelLanguages, isLanguageSupported
11
15
  }
@@ -0,0 +1,28 @@
1
+ {
2
+ "gpt-4": "gpt-4",
3
+ "gpt-4-0613": "gpt-4",
4
+ "gpt-4-1106-preview": "gpt-4",
5
+ "gpt-4-0125-preview": "gpt-4",
6
+ "gpt-4-vision-preview": "gpt-4",
7
+ "gpt-4-turbo-preview": "gpt-4",
8
+ "gpt-3.5-turbo": "gpt-3.5-turbo",
9
+ "gpt-3.5-turbo-16k": "gpt-3.5-turbo",
10
+ "gpt-3.5-turbo-16k-0613": "gpt-3.5-turbo",
11
+ "gpt-3.5-turbo-0301": "gpt-3.5-turbo",
12
+ "gpt-3.5-turbo-0613": "gpt-3.5-turbo",
13
+ "gpt-3.5-turbo-1106": "gpt-3.5-turbo",
14
+ "gpt-3.5-turbo-0125": "gpt-3.5-turbo",
15
+ "gpt-3.5-turbo-instruct": "gpt-3.5-turbo",
16
+ "gpt-3.5-turbo-instruct-0914": "gpt-3.5-turbo",
17
+ "open-mistral-7b": "mistral-7b",
18
+ "mixtral": "mixtral-8x7b",
19
+ "open-mixtral-8x7b": "mixtral-8x7b",
20
+ "mistral-small-2402": "mistral-small",
21
+ "mistral-small-latest": "mistral-small",
22
+ "mistral-medium-2312": "mistral-medium",
23
+ "mistral-medium-latest": "mistral-medium",
24
+ "mistral-large-2402": "mistral-large",
25
+ "mistral-large-latest": "mistral-large",
26
+ "claude-3-sonnet-20240229": "claude-3-sonnet",
27
+ "claude-3-opus-20240229": "claude-3-opus"
28
+ }
@@ -0,0 +1,44 @@
1
+ {
2
+ "gpt-4": {
3
+ "languages": [
4
+ "en", "fr", "de", "es", "it",
5
+ "zh", "ja", "ko",
6
+ "grc", "la"
7
+ ]
8
+ },
9
+ "gpt-3.5-turbo": {
10
+ "languages": [
11
+ "en", "fr", "de", "es", "it"
12
+ ]
13
+ },
14
+ "mistral-7b": {
15
+ "languages": ["en"]
16
+ },
17
+ "mixtral-8x7b": {
18
+ "languages": ["en", "fr", "de", "es", "it"]
19
+ },
20
+ "mistral-small": {
21
+ "languages": ["en", "fr", "de", "es", "it"]
22
+ },
23
+ "mistral-medium": {
24
+ "languages": ["en", "fr", "de", "es", "it"]
25
+ },
26
+ "mistral-large": {
27
+ "languages": ["en", "fr", "de", "es", "it"]
28
+ },
29
+ "claude-3-sonnet": {
30
+ "languages": [
31
+ "en", "fr", "de", "es", "it"
32
+ ]
33
+ },
34
+ "claude-3-opus": {
35
+ "languages": [
36
+ "en", "fr", "de", "es", "it"
37
+ ]
38
+ },
39
+ "gemini-pro": {
40
+ "languages": [
41
+ "en", "fr", "de", "es", "it"
42
+ ]
43
+ }
44
+ }
@@ -0,0 +1,32 @@
1
+ // ----- IMPORTS ----- //
2
+
3
+ const AliasToModel = require('./data/AliasToModel.json');
4
+ const Models = require('./data/Models.json')
5
+
6
+ // ----- MODEL INFORMATION ----- //
7
+
8
+ // Get all info about a given model
9
+ // Returns an object or null if invalid
10
+ const getModelInfo = model => {
11
+ model = model?.toLowerCase();
12
+ modelName = AliasToModel[model] ? AliasToModel[model] : model;
13
+ return Models[modelName];
14
+ }
15
+
16
+ // Get all languages known to be compatible with a given LLM
17
+ // Returns an array of languages codes, [] if unknown
18
+ const getModelLanguages = model => {
19
+ return getModelInfo(model)?.languages || [];
20
+ }
21
+
22
+ // Returns true if a model is rated for a language
23
+ // Returns false otherwise
24
+ const isLanguageSupported = (model, code) => {
25
+ return getModelLanguages(model)?.includes(language);
26
+ }
27
+
28
+ module.exports = {
29
+ getModelInfo,
30
+ getModelLanguages,
31
+ isLanguageSupported
32
+ }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "generaltranslation",
3
- "version": "1.0.2",
4
- "description": "AI language toolkit in JS",
3
+ "version": "1.0.4",
4
+ "description": "A language toolkit for AI developers",
5
5
  "main": "index.js",
6
6
  "scripts": {
7
7
  "test": "echo \"Error: no test specified\" && exit 1"
@@ -1,188 +0,0 @@
1
- const CodeToLanguageTriletter = {
2
- "abk": "Abkhazian",
3
- "aar": "Afar",
4
- "afr": "Afrikaans",
5
- "aka": "Akan",
6
- "sqi": "Albanian",
7
- "amh": "Amharic",
8
- "ara": "Arabic",
9
- "arg": "Aragonese",
10
- "hye": "Armenian",
11
- "asm": "Assamese",
12
- "ava": "Avaric",
13
- "ave": "Avestan",
14
- "aym": "Aymara",
15
- "aze": "Azerbaijani",
16
- "bam": "Bambara",
17
- "bak": "Bashkir",
18
- "eus": "Basque",
19
- "bel": "Belarusian",
20
- "ben": "Bengali",
21
- "bis": "Bislama",
22
- "bos": "Bosnian",
23
- "bre": "Breton",
24
- "bul": "Bulgarian",
25
- "mya": "Burmese",
26
- "cat": "Catalan",
27
- "cha": "Chamorro",
28
- "che": "Chechen",
29
- "nya": "Chichewa",
30
- "zho": "Chinese",
31
- "chu": "Slavonic",
32
- "chv": "Chuvash",
33
- "cor": "Cornish",
34
- "cos": "Corsican",
35
- "cre": "Cree",
36
- "hrv": "Croatian",
37
- "ces": "Czech",
38
- "dan": "Danish",
39
- "div": "Divehi",
40
- "nld": "Dutch",
41
- "dzo": "Dzongkha",
42
- "eng": "English",
43
- "epo": "Esperanto",
44
- "est": "Estonian",
45
- "ewe": "Ewe",
46
- "fao": "Faroese",
47
- "fij": "Fijian",
48
- "fin": "Finnish",
49
- "fra": "French",
50
- "fry": "Frisian",
51
- "ful": "Fulah",
52
- "gla": "Gaelic",
53
- "glg": "Galician",
54
- "lug": "Ganda",
55
- "kat": "Georgian",
56
- "deu": "German",
57
- "ell": "Greek",
58
- "kal": "Kalaallisut",
59
- "grn": "Guarani",
60
- "guj": "Gujarati",
61
- "hat": "Haitian Creole",
62
- "hau": "Hausa",
63
- "heb": "Hebrew",
64
- "her": "Herero",
65
- "hin": "Hindi",
66
- "hmo": "Hiri Motu",
67
- "hun": "Hungarian",
68
- "isl": "Icelandic",
69
- "ido": "Ido",
70
- "ibo": "Igbo",
71
- "ind": "Indonesian",
72
- "ina": "Interlingua",
73
- "ile": "Interlingue",
74
- "iku": "Inuktitut",
75
- "ipk": "Inupiaq",
76
- "gle": "Irish",
77
- "ita": "Italian",
78
- "jpn": "Japanese",
79
- "jav": "Javanese",
80
- "kan": "Kannada",
81
- "kau": "Kanuri",
82
- "kas": "Kashmiri",
83
- "kaz": "Kazakh",
84
- "khm": "Khmer",
85
- "kik": "Kikuyu",
86
- "kin": "Kinyarwanda",
87
- "kir": "Kyrgyz",
88
- "kom": "Komi",
89
- "kon": "Kongo",
90
- "kor": "Korean",
91
- "kua": "Kuanyama",
92
- "kur": "Kurdish",
93
- "lao": "Lao",
94
- "lat": "Latin",
95
- "lav": "Latvian",
96
- "lim": "Limburgan",
97
- "lin": "Lingala",
98
- "lit": "Lithuanian",
99
- "lub": "Luba-Katanga",
100
- "ltz": "Luxembourgish",
101
- "mkd": "Macedonian",
102
- "mlg": "Malagasy",
103
- "msa": "Malay",
104
- "mal": "Malayalam",
105
- "mlt": "Maltese",
106
- "glv": "Manx",
107
- "mri": "Maori",
108
- "mar": "Marathi",
109
- "mah": "Marshallese",
110
- "mon": "Mongolian",
111
- "nau": "Nauru",
112
- "nav": "Navajo",
113
- "nde": "Northern Ndebele",
114
- "nbl": "Southern Ndebele",
115
- "ndo": "Ndonga",
116
- "nep": "Nepali",
117
- "nor": "Norwegian",
118
- "nob": "Norwegian Bokmål",
119
- "nno": "Norwegian Nynorsk",
120
- "iii": "Nuosu",
121
- "oci": "Occitan",
122
- "oji": "Ojibwa",
123
- "ori": "Oriya",
124
- "orm": "Oromo",
125
- "oss": "Ossetian",
126
- "pli": "Pali",
127
- "pus": "Pashto",
128
- "fas": "Persian",
129
- "pol": "Polish",
130
- "por": "Portuguese",
131
- "pan": "Punjabi",
132
- "que": "Quechua",
133
- "ron": "Romanian",
134
- "roh": "Romansh",
135
- "run": "Rundi",
136
- "rus": "Russian",
137
- "sme": "Northern Sami",
138
- "smo": "Samoan",
139
- "sag": "Sango",
140
- "san": "Sanskrit",
141
- "srd": "Sardinian",
142
- "srp": "Serbian",
143
- "sna": "Shona",
144
- "snd": "Sindhi",
145
- "sin": "Sinhalese",
146
- "slk": "Slovak",
147
- "slv": "Slovenian",
148
- "som": "Somali",
149
- "sot": "Sotho",
150
- "spa": "Spanish",
151
- "sun": "Sundanese",
152
- "swa": "Swahili",
153
- "ssw": "Swati",
154
- "swe": "Swedish",
155
- "tgl": "Tagalog",
156
- "tah": "Tahitian",
157
- "tgk": "Tajik",
158
- "tam": "Tamil",
159
- "tat": "Tatar",
160
- "tel": "Telugu",
161
- "tha": "Thai",
162
- "bod": "Tibetan",
163
- "tir": "Tigrinya",
164
- "ton": "Tonga",
165
- "tso": "Tsonga",
166
- "tsn": "Tswana",
167
- "tur": "Turkish",
168
- "tuk": "Turkmen",
169
- "twi": "Twi",
170
- "uig": "Uyghur",
171
- "ukr": "Ukrainian",
172
- "urd": "Urdu",
173
- "uzb": "Uzbek",
174
- "ven": "Venda",
175
- "vie": "Vietnamese",
176
- "vol": "Volapük",
177
- "wln": "Walloon",
178
- "cym": "Welsh",
179
- "wol": "Wolof",
180
- "xho": "Xhosa",
181
- "yid": "Yiddish",
182
- "yor": "Yoruba",
183
- "zha": "Zhuang",
184
- "zul": "Zulu",
185
- "grc": "Ancient Greek"
186
- }
187
-
188
- module.exports = { CodeToLanguageTriletter }