jtcg_locale_detector 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +37 -0
- data/PACKAGING_SUMMARY.md +195 -0
- data/README.md +226 -0
- data/bin/locale-detector +159 -0
- data/jtcg_locale_detector.gemspec +48 -0
- data/lib/locale_detector/client.rb +163 -0
- data/lib/locale_detector/detector.rb +46 -0
- data/lib/locale_detector/version.rb +3 -0
- data/lib/locale_detector.rb +25 -0
- data/locale_detector.gemspec +46 -0
- data/python/cli.py +220 -0
- data/python/requirements.txt +8 -0
- data/python/src/__init__.py +10 -0
- data/python/src/__pycache__/__init__.cpython-311.pyc +0 -0
- data/python/src/__pycache__/__init__.cpython-313.pyc +0 -0
- data/python/src/__pycache__/locale_data.cpython-311.pyc +0 -0
- data/python/src/__pycache__/locale_data.cpython-313.pyc +0 -0
- data/python/src/__pycache__/locale_detector.cpython-311.pyc +0 -0
- data/python/src/__pycache__/locale_detector.cpython-313.pyc +0 -0
- data/python/src/artifacts/fasttext/lid.176.bin +0 -0
- data/python/src/artifacts/fasttext/lid.176.ftz +0 -0
- data/python/src/download_fasttext.py +69 -0
- data/python/src/locale_data.py +178 -0
- data/python/src/locale_detector.py +534 -0
- data/python/src/locale_detector_c.c +403 -0
- data/python/src/locale_detector_c.h +37 -0
- data/python/src/locale_detector_cy.cpp +23126 -0
- data/python/src/locale_detector_cy.cpython-311-darwin.so +0 -0
- data/python/src/locale_detector_cy.cpython-313-darwin.so +0 -0
- data/python/src/locale_detector_cy.html +6460 -0
- data/python/src/locale_detector_cy.pyx +501 -0
- data/python/src/utils/__init__.py +1 -0
- data/python/src/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- data/python/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
- data/python/src/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
- data/python/src/utils/__pycache__/data_utils.cpython-313.pyc +0 -0
- data/python/src/utils/data_utils.py +50 -0
- data/python/src/utils/data_utils_cy.cpp +10086 -0
- data/python/src/utils/data_utils_cy.cpython-311-darwin.so +0 -0
- data/python/src/utils/data_utils_cy.cpython-313-darwin.so +0 -0
- data/python/src/utils/data_utils_cy.html +600 -0
- data/python/src/utils/data_utils_cy.pyx +94 -0
- data/python/src/zhon/__init__.py +7 -0
- data/python/src/zhon/__pycache__/__init__.cpython-311.pyc +0 -0
- data/python/src/zhon/__pycache__/hanzi.cpython-311.pyc +0 -0
- data/python/src/zhon/__pycache__/pinyin.cpython-311.pyc +0 -0
- data/python/src/zhon/__pycache__/zhuyin.cpython-311.pyc +0 -0
- data/python/src/zhon/cedict/__init__.py +14 -0
- data/python/src/zhon/cedict/__pycache__/__init__.cpython-311.pyc +0 -0
- data/python/src/zhon/cedict/__pycache__/all.cpython-311.pyc +0 -0
- data/python/src/zhon/cedict/__pycache__/simplified.cpython-311.pyc +0 -0
- data/python/src/zhon/cedict/__pycache__/traditional.cpython-311.pyc +0 -0
- data/python/src/zhon/cedict/all.py +4 -0
- data/python/src/zhon/cedict/simplified.py +4 -0
- data/python/src/zhon/cedict/traditional.py +4 -0
- data/python/src/zhon/hanzi.py +81 -0
- data/python/src/zhon/pinyin.py +187 -0
- data/python/src/zhon/zhuyin.py +46 -0
- metadata +198 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
LOCALE_MAP = {
|
|
2
|
+
"zh": {"i18n_code": "zh", "name": "Chinese"},
|
|
3
|
+
"af": {"i18n_code": "af", "name": "Afrikaans"},
|
|
4
|
+
"als": {"i18n_code": "not_supported", "name": "Tosk Albanian"},
|
|
5
|
+
"am": {"i18n_code": "am", "name": "Amharic"},
|
|
6
|
+
"an": {"i18n_code": "not_supported", "name": "Aragonese"},
|
|
7
|
+
"ar": {"i18n_code": "ar", "name": "Arabic"},
|
|
8
|
+
"arz": {"i18n_code": "not_supported", "name": "Egyptian Arabic"},
|
|
9
|
+
"as": {"i18n_code": "as", "name": "Assamese"},
|
|
10
|
+
"ast": {"i18n_code": "not_supported", "name": "Asturian"},
|
|
11
|
+
"av": {"i18n_code": "not_supported", "name": "Avaric"},
|
|
12
|
+
"az": {"i18n_code": "az", "name": "Azerbaijani"},
|
|
13
|
+
"azb": {"i18n_code": "not_supported", "name": "South Azerbaijani"},
|
|
14
|
+
"ba": {"i18n_code": "not_supported", "name": "Bashkir"},
|
|
15
|
+
"bar": {"i18n_code": "not_supported", "name": "Bavarian"},
|
|
16
|
+
"bcl": {"i18n_code": "not_supported", "name": "Central Bikol"},
|
|
17
|
+
"be": {"i18n_code": "be", "name": "Belarusian"},
|
|
18
|
+
"bg": {"i18n_code": "bg", "name": "Bulgarian"},
|
|
19
|
+
"bh": {"i18n_code": "not_supported", "name": "Bihari languages"},
|
|
20
|
+
"bn": {"i18n_code": "bn", "name": "Bengali"},
|
|
21
|
+
"bo": {"i18n_code": "bo", "name": "Tibetan"},
|
|
22
|
+
"bpy": {"i18n_code": "not_supported", "name": "Bishnupriya Manipuri"},
|
|
23
|
+
"br": {"i18n_code": "br", "name": "Breton"},
|
|
24
|
+
"bs": {"i18n_code": "bs", "name": "Bosnian"},
|
|
25
|
+
"bxr": {"i18n_code": "not_supported", "name": "Russia Buriat"},
|
|
26
|
+
"ca": {"i18n_code": "ca", "name": "Catalan"},
|
|
27
|
+
"cbk": {"i18n_code": "not_supported", "name": "Chavacano"},
|
|
28
|
+
"ce": {"i18n_code": "not_supported", "name": "Chechen"},
|
|
29
|
+
"ceb": {"i18n_code": "ceb", "name": "Cebuano"},
|
|
30
|
+
"ckb": {"i18n_code": "ckb", "name": "Sorani Kurdish"},
|
|
31
|
+
"co": {"i18n_code": "not_supported", "name": "Corsican"},
|
|
32
|
+
"cs": {"i18n_code": "cs", "name": "Czech"},
|
|
33
|
+
"cv": {"i18n_code": "not_supported", "name": "Chuvash"},
|
|
34
|
+
"cy": {"i18n_code": "cy", "name": "Welsh"},
|
|
35
|
+
"da": {"i18n_code": "da", "name": "Danish"},
|
|
36
|
+
"de": {"i18n_code": "de", "name": "German"},
|
|
37
|
+
"diq": {"i18n_code": "not_supported", "name": "Zazaki"},
|
|
38
|
+
"dsb": {"i18n_code": "dsb", "name": "Lower Sorbian"},
|
|
39
|
+
"dty": {"i18n_code": "not_supported", "name": "Doteli"},
|
|
40
|
+
"dv": {"i18n_code": "dv", "name": "Divehi"},
|
|
41
|
+
"el": {"i18n_code": "el", "name": "Greek"},
|
|
42
|
+
"eml": {"i18n_code": "not_supported", "name": "Emilian-Romagnol"},
|
|
43
|
+
"en": {"i18n_code": "en-US", "name": "English"},
|
|
44
|
+
"eo": {"i18n_code": "eo", "name": "Esperanto"},
|
|
45
|
+
"es": {"i18n_code": "es", "name": "Spanish"},
|
|
46
|
+
"et": {"i18n_code": "et", "name": "Estonian"},
|
|
47
|
+
"eu": {"i18n_code": "eu", "name": "Basque"},
|
|
48
|
+
"fa": {"i18n_code": "fa", "name": "Persian"},
|
|
49
|
+
"fi": {"i18n_code": "fi", "name": "Finnish"},
|
|
50
|
+
"fr": {"i18n_code": "fr", "name": "French"},
|
|
51
|
+
"frr": {"i18n_code": "not_supported", "name": "Northern Frisian"},
|
|
52
|
+
"fy": {"i18n_code": "fy", "name": "Western Frisian"},
|
|
53
|
+
"ga": {"i18n_code": "ga", "name": "Irish"},
|
|
54
|
+
"gd": {"i18n_code": "gd", "name": "Scottish Gaelic"},
|
|
55
|
+
"gl": {"i18n_code": "gl", "name": "Galician"},
|
|
56
|
+
"gn": {"i18n_code": "not_supported", "name": "Guarani"},
|
|
57
|
+
"gom": {"i18n_code": "not_supported", "name": "Goan Konkani"},
|
|
58
|
+
"gu": {"i18n_code": "gu", "name": "Gujarati"},
|
|
59
|
+
"gv": {"i18n_code": "gv", "name": "Manx"},
|
|
60
|
+
"he": {"i18n_code": "he", "name": "Hebrew"},
|
|
61
|
+
"hi": {"i18n_code": "hi", "name": "Hindi"},
|
|
62
|
+
"hif": {"i18n_code": "not_supported", "name": "Fiji Hindi"},
|
|
63
|
+
"hr": {"i18n_code": "hr", "name": "Croatian"},
|
|
64
|
+
"hsb": {"i18n_code": "hsb", "name": "Upper Sorbian"},
|
|
65
|
+
"ht": {"i18n_code": "not_supported", "name": "Haitian"},
|
|
66
|
+
"hu": {"i18n_code": "hu", "name": "Hungarian"},
|
|
67
|
+
"hy": {"i18n_code": "hy", "name": "Armenian"},
|
|
68
|
+
"ia": {"i18n_code": "not_supported", "name": "Interlingua"},
|
|
69
|
+
"id": {"i18n_code": "id", "name": "Indonesian"},
|
|
70
|
+
"ie": {"i18n_code": "not_supported", "name": "Interlingue"},
|
|
71
|
+
"ilo": {"i18n_code": "not_supported", "name": "Ilocano"},
|
|
72
|
+
"io": {"i18n_code": "not_supported", "name": "Ido"},
|
|
73
|
+
"is": {"i18n_code": "is", "name": "Icelandic"},
|
|
74
|
+
"it": {"i18n_code": "it", "name": "Italian"},
|
|
75
|
+
"ja": {"i18n_code": "ja", "name": "Japanese"},
|
|
76
|
+
"jbo": {"i18n_code": "not_supported", "name": "Lojban"},
|
|
77
|
+
"jv": {"i18n_code": "jv", "name": "Javanese"},
|
|
78
|
+
"ka": {"i18n_code": "ka", "name": "Georgian"},
|
|
79
|
+
"kk": {"i18n_code": "kk", "name": "Kazakh"},
|
|
80
|
+
"km": {"i18n_code": "km", "name": "Khmer"},
|
|
81
|
+
"kn": {"i18n_code": "kn", "name": "Kannada"},
|
|
82
|
+
"ko": {"i18n_code": "ko", "name": "Korean"},
|
|
83
|
+
"krc": {"i18n_code": "not_supported", "name": "Karachay-Balkar"},
|
|
84
|
+
"ku": {"i18n_code": "ku", "name": "Kurdish"},
|
|
85
|
+
"kv": {"i18n_code": "not_supported", "name": "Komi"},
|
|
86
|
+
"kw": {"i18n_code": "not_supported", "name": "Cornish"},
|
|
87
|
+
"ky": {"i18n_code": "ky", "name": "Kyrgyz"},
|
|
88
|
+
"la": {"i18n_code": "not_supported", "name": "Latin"},
|
|
89
|
+
"lb": {"i18n_code": "lb", "name": "Luxembourgish"},
|
|
90
|
+
"lez": {"i18n_code": "not_supported", "name": "Lezghian"},
|
|
91
|
+
"li": {"i18n_code": "not_supported", "name": "Limburgish"},
|
|
92
|
+
"lmo": {"i18n_code": "not_supported", "name": "Lombard"},
|
|
93
|
+
"lo": {"i18n_code": "lo", "name": "Lao"},
|
|
94
|
+
"lrc": {"i18n_code": "not_supported", "name": "Northern Luri"},
|
|
95
|
+
"lt": {"i18n_code": "lt", "name": "Lithuanian"},
|
|
96
|
+
"lv": {"i18n_code": "lv", "name": "Latvian"},
|
|
97
|
+
"mai": {"i18n_code": "not_supported", "name": "Maithili"},
|
|
98
|
+
"mg": {"i18n_code": "mg", "name": "Malagasy"},
|
|
99
|
+
"mhr": {"i18n_code": "not_supported", "name": "Meadow Mari"},
|
|
100
|
+
"min": {"i18n_code": "not_supported", "name": "Minangkabau"},
|
|
101
|
+
"mk": {"i18n_code": "mk", "name": "Macedonian"},
|
|
102
|
+
"ml": {"i18n_code": "ml", "name": "Malayalam"},
|
|
103
|
+
"mn": {"i18n_code": "mn", "name": "Mongolian"},
|
|
104
|
+
"mr": {"i18n_code": "mr", "name": "Marathi"},
|
|
105
|
+
"mrj": {"i18n_code": "not_supported", "name": "Hill Mari"},
|
|
106
|
+
"ms": {"i18n_code": "ms", "name": "Malay"},
|
|
107
|
+
"mt": {"i18n_code": "mt", "name": "Maltese"},
|
|
108
|
+
"mwl": {"i18n_code": "not_supported", "name": "Mirandese"},
|
|
109
|
+
"my": {"i18n_code": "my", "name": "Burmese"},
|
|
110
|
+
"myv": {"i18n_code": "not_supported", "name": "Erzya"},
|
|
111
|
+
"mzn": {"i18n_code": "not_supported", "name": "Mazanderani"},
|
|
112
|
+
"nah": {"i18n_code": "not_supported", "name": "Nahuatl"},
|
|
113
|
+
"nap": {"i18n_code": "not_supported", "name": "Neapolitan"},
|
|
114
|
+
"nds": {"i18n_code": "not_supported", "name": "Low German"},
|
|
115
|
+
"ne": {"i18n_code": "ne", "name": "Nepali"},
|
|
116
|
+
"new": {"i18n_code": "not_supported", "name": "Newar"},
|
|
117
|
+
"nl": {"i18n_code": "nl", "name": "Dutch"},
|
|
118
|
+
"nn": {"i18n_code": "not_supported", "name": "Norwegian Nynorsk"},
|
|
119
|
+
"no": {"i18n_code": "not_supported", "name": "Norwegian"},
|
|
120
|
+
"oc": {"i18n_code": "not_supported", "name": "Occitan"},
|
|
121
|
+
"or": {"i18n_code": "or", "name": "Oriya"},
|
|
122
|
+
"os": {"i18n_code": "os", "name": "Ossetian"},
|
|
123
|
+
"pa": {"i18n_code": "pa", "name": "Punjabi"},
|
|
124
|
+
"pam": {"i18n_code": "not_supported", "name": "Pampanga"},
|
|
125
|
+
"pfl": {"i18n_code": "not_supported", "name": "Palatine German"},
|
|
126
|
+
"pl": {"i18n_code": "pl", "name": "Polish"},
|
|
127
|
+
"pms": {"i18n_code": "not_supported", "name": "Piedmontese"},
|
|
128
|
+
"pnb": {"i18n_code": "not_supported", "name": "Western Punjabi"},
|
|
129
|
+
"ps": {"i18n_code": "ps", "name": "Pashto"},
|
|
130
|
+
"pt": {"i18n_code": "pt", "name": "Portuguese"},
|
|
131
|
+
"qu": {"i18n_code": "qu", "name": "Quechua"},
|
|
132
|
+
"rm": {"i18n_code": "rm", "name": "Romansh"},
|
|
133
|
+
"ro": {"i18n_code": "ro", "name": "Romanian"},
|
|
134
|
+
"ru": {"i18n_code": "ru", "name": "Russian"},
|
|
135
|
+
"rue": {"i18n_code": "not_supported", "name": "Rusyn"},
|
|
136
|
+
"sa": {"i18n_code": "sa", "name": "Sanskrit"},
|
|
137
|
+
"sah": {"i18n_code": "not_supported", "name": "Yakut"},
|
|
138
|
+
"sc": {"i18n_code": "not_supported", "name": "Sardinian"},
|
|
139
|
+
"scn": {"i18n_code": "not_supported", "name": "Sicilian"},
|
|
140
|
+
"sco": {"i18n_code": "not_supported", "name": "Scots"},
|
|
141
|
+
"sd": {"i18n_code": "not_supported", "name": "Sindhi"},
|
|
142
|
+
"sh": {"i18n_code": "not_supported", "name": "Serbo-Croatian"},
|
|
143
|
+
"si": {"i18n_code": "si", "name": "Sinhala"},
|
|
144
|
+
"sk": {"i18n_code": "sk", "name": "Slovak"},
|
|
145
|
+
"sl": {"i18n_code": "sl", "name": "Slovenian"},
|
|
146
|
+
"so": {"i18n_code": "so", "name": "Somali"},
|
|
147
|
+
"sq": {"i18n_code": "sq", "name": "Albanian"},
|
|
148
|
+
"sr": {"i18n_code": "sr", "name": "Serbian"},
|
|
149
|
+
"su": {"i18n_code": "not_supported", "name": "Sundanese"},
|
|
150
|
+
"sv": {"i18n_code": "sv", "name": "Swedish"},
|
|
151
|
+
"sw": {"i18n_code": "sw", "name": "Swahili"},
|
|
152
|
+
"ta": {"i18n_code": "ta", "name": "Tamil"},
|
|
153
|
+
"te": {"i18n_code": "te", "name": "Telugu"},
|
|
154
|
+
"tg": {"i18n_code": "tg", "name": "Tajik"},
|
|
155
|
+
"th": {"i18n_code": "th", "name": "Thai"},
|
|
156
|
+
"tk": {"i18n_code": "tk", "name": "Turkmen"},
|
|
157
|
+
"tl": {"i18n_code": "not_supported", "name": "Tagalog"},
|
|
158
|
+
"tr": {"i18n_code": "tr", "name": "Turkish"},
|
|
159
|
+
"tt": {"i18n_code": "not_supported", "name": "Tatar"},
|
|
160
|
+
"tyv": {"i18n_code": "not_supported", "name": "Tuvinian"},
|
|
161
|
+
"ug": {"i18n_code": "ug", "name": "Uyghur"},
|
|
162
|
+
"uk": {"i18n_code": "uk", "name": "Ukrainian"},
|
|
163
|
+
"ur": {"i18n_code": "ur", "name": "Urdu"},
|
|
164
|
+
"uz": {"i18n_code": "uz", "name": "Uzbek"},
|
|
165
|
+
"vec": {"i18n_code": "not_supported", "name": "Venetian"},
|
|
166
|
+
"vep": {"i18n_code": "not_supported", "name": "Veps"},
|
|
167
|
+
"vi": {"i18n_code": "vi", "name": "Vietnamese"},
|
|
168
|
+
"vls": {"i18n_code": "not_supported", "name": "West Flemish"},
|
|
169
|
+
"vo": {"i18n_code": "not_supported", "name": "Volapük"},
|
|
170
|
+
"wa": {"i18n_code": "not_supported", "name": "Walloon"},
|
|
171
|
+
"war": {"i18n_code": "not_supported", "name": "Waray-Waray"},
|
|
172
|
+
"wuu": {"i18n_code": "not_supported", "name": "Wu Chinese"},
|
|
173
|
+
"xal": {"i18n_code": "not_supported", "name": "Kalmyk"},
|
|
174
|
+
"xmf": {"i18n_code": "not_supported", "name": "Mingrelian"},
|
|
175
|
+
"yi": {"i18n_code": "yi", "name": "Yiddish"},
|
|
176
|
+
"yo": {"i18n_code": "yo", "name": "Yoruba"},
|
|
177
|
+
"yue": {"i18n_code": "not_supported", "name": "Cantonese"},
|
|
178
|
+
}
|