jtcg_locale_detector 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +37 -0
- data/PACKAGING_SUMMARY.md +195 -0
- data/README.md +226 -0
- data/bin/locale-detector +159 -0
- data/jtcg_locale_detector.gemspec +48 -0
- data/lib/locale_detector/client.rb +163 -0
- data/lib/locale_detector/detector.rb +46 -0
- data/lib/locale_detector/version.rb +3 -0
- data/lib/locale_detector.rb +25 -0
- data/locale_detector.gemspec +46 -0
- data/python/cli.py +220 -0
- data/python/requirements.txt +8 -0
- data/python/src/__init__.py +10 -0
- data/python/src/__pycache__/__init__.cpython-311.pyc +0 -0
- data/python/src/__pycache__/__init__.cpython-313.pyc +0 -0
- data/python/src/__pycache__/locale_data.cpython-311.pyc +0 -0
- data/python/src/__pycache__/locale_data.cpython-313.pyc +0 -0
- data/python/src/__pycache__/locale_detector.cpython-311.pyc +0 -0
- data/python/src/__pycache__/locale_detector.cpython-313.pyc +0 -0
- data/python/src/artifacts/fasttext/lid.176.bin +0 -0
- data/python/src/artifacts/fasttext/lid.176.ftz +0 -0
- data/python/src/download_fasttext.py +69 -0
- data/python/src/locale_data.py +178 -0
- data/python/src/locale_detector.py +534 -0
- data/python/src/locale_detector_c.c +403 -0
- data/python/src/locale_detector_c.h +37 -0
- data/python/src/locale_detector_cy.cpp +23126 -0
- data/python/src/locale_detector_cy.cpython-311-darwin.so +0 -0
- data/python/src/locale_detector_cy.cpython-313-darwin.so +0 -0
- data/python/src/locale_detector_cy.html +6460 -0
- data/python/src/locale_detector_cy.pyx +501 -0
- data/python/src/utils/__init__.py +1 -0
- data/python/src/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- data/python/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
- data/python/src/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
- data/python/src/utils/__pycache__/data_utils.cpython-313.pyc +0 -0
- data/python/src/utils/data_utils.py +50 -0
- data/python/src/utils/data_utils_cy.cpp +10086 -0
- data/python/src/utils/data_utils_cy.cpython-311-darwin.so +0 -0
- data/python/src/utils/data_utils_cy.cpython-313-darwin.so +0 -0
- data/python/src/utils/data_utils_cy.html +600 -0
- data/python/src/utils/data_utils_cy.pyx +94 -0
- data/python/src/zhon/__init__.py +7 -0
- data/python/src/zhon/__pycache__/__init__.cpython-311.pyc +0 -0
- data/python/src/zhon/__pycache__/hanzi.cpython-311.pyc +0 -0
- data/python/src/zhon/__pycache__/pinyin.cpython-311.pyc +0 -0
- data/python/src/zhon/__pycache__/zhuyin.cpython-311.pyc +0 -0
- data/python/src/zhon/cedict/__init__.py +14 -0
- data/python/src/zhon/cedict/__pycache__/__init__.cpython-311.pyc +0 -0
- data/python/src/zhon/cedict/__pycache__/all.cpython-311.pyc +0 -0
- data/python/src/zhon/cedict/__pycache__/simplified.cpython-311.pyc +0 -0
- data/python/src/zhon/cedict/__pycache__/traditional.cpython-311.pyc +0 -0
- data/python/src/zhon/cedict/all.py +4 -0
- data/python/src/zhon/cedict/simplified.py +4 -0
- data/python/src/zhon/cedict/traditional.py +4 -0
- data/python/src/zhon/hanzi.py +81 -0
- data/python/src/zhon/pinyin.py +187 -0
- data/python/src/zhon/zhuyin.py +46 -0
- metadata +198 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""Constants for processing Pinyin strings."""
|
|
3
|
+
|
|
4
|
+
from string import whitespace
|
|
5
|
+
from re import escape
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
_a = "a\u0101\u00E0\u00E1\u01CE"
|
|
9
|
+
_e = "e\u0113\u00E9\u011B\u00E8"
|
|
10
|
+
_i = "i\u012B\u00ED\u01D0\u00EC"
|
|
11
|
+
_o = "o\u014D\u00F3\u01D2\u00F2"
|
|
12
|
+
_u = "u\u016B\u00FA\u01D4\u00F9"
|
|
13
|
+
_v = "v\u00FC\u01D6\u01D8\u01DA\u01DC"
|
|
14
|
+
|
|
15
|
+
_lowercase_vowels = _a + _e + _i + _o + _u + _v
|
|
16
|
+
_uppercase_vowels = _lowercase_vowels.upper()
|
|
17
|
+
_lowercase_consonants = "bpmfdtnlgkhjqxzcsrwy"
|
|
18
|
+
_uppercase_consonants = _lowercase_consonants.upper()
|
|
19
|
+
|
|
20
|
+
#: A string containing every Pinyin vowel (lowercase and uppercase).
|
|
21
|
+
vowels = _lowercase_vowels + _uppercase_vowels
|
|
22
|
+
|
|
23
|
+
#: A string containing every Pinyin consonant (lowercase and uppercase).
|
|
24
|
+
consonants = _lowercase_consonants + _uppercase_consonants
|
|
25
|
+
|
|
26
|
+
#: A string containing every lowercase Pinyin character.
|
|
27
|
+
lowercase = _lowercase_consonants + _lowercase_vowels
|
|
28
|
+
|
|
29
|
+
#: A string containing every uppercase Pinyin character.
|
|
30
|
+
uppercase = _uppercase_consonants + _uppercase_vowels
|
|
31
|
+
|
|
32
|
+
#: A string containing all Pinyin marks that have special meaning:
|
|
33
|
+
#: middle dot and numbers for tones, colon for easily writing \u00FC ('u:'),
|
|
34
|
+
#: hyphen for connecting syllables within words, and apostrophe for
|
|
35
|
+
#: separating a syllable beginning with a vowel from the previous syllable
|
|
36
|
+
#: in its word. All of these marks can be used within a valid Pinyin word.
|
|
37
|
+
marks = "·012345:-'"
|
|
38
|
+
|
|
39
|
+
#: A string containing valid punctuation marks that are not stops.
|
|
40
|
+
non_stops = """"#$%&'()*+,-/\\:;<=>@[]^_`{|}~"""
|
|
41
|
+
|
|
42
|
+
#: A string containing valid stop punctuation marks.
|
|
43
|
+
stops = ".!?"
|
|
44
|
+
|
|
45
|
+
#: A string containing all punctuation marks.
|
|
46
|
+
punctuation = non_stops + stops
|
|
47
|
+
|
|
48
|
+
#: A string containing all printable Pinyin characters, marks, punctuation,
|
|
49
|
+
#: and whitespace.
|
|
50
|
+
printable = vowels + consonants + marks[:-3] + whitespace + punctuation
|
|
51
|
+
|
|
52
|
+
_a_vowels = {"a": _a, "e": _e, "i": _i, "o": _o, "u": _u, "v": _v}
|
|
53
|
+
_n_vowels = {"a": "a", "e": "e", "i": "i", "o": "o", "u": "u", "v": "v\u00FC"}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _build_syl(vowels, tone_numbers=False):
|
|
57
|
+
"""Builds a Pinyin syllable re pattern.
|
|
58
|
+
|
|
59
|
+
Syllables can be preceded by a middle dot (tone mark). Syllables that end
|
|
60
|
+
in a consonant are only valid if they aren't followed directly by a vowel
|
|
61
|
+
with no apostrophe in between.
|
|
62
|
+
|
|
63
|
+
The rough approach used to validate a Pinyin syllable is:
|
|
64
|
+
1. Get the longest valid syllable.
|
|
65
|
+
2. If it ends in a consonant make sure it's not followed directly by a
|
|
66
|
+
vowel (hyphens and apostrophes don't count).
|
|
67
|
+
3. If the above didn't match, repeat for the next longest valid match.
|
|
68
|
+
|
|
69
|
+
Lookahead assertions are used to ensure that hyphens and apostrophes are
|
|
70
|
+
only considered valid if used correctly. This helps to weed out non-Pinyin
|
|
71
|
+
strings.
|
|
72
|
+
|
|
73
|
+
"""
|
|
74
|
+
# This is the end-of-syllable-consonant lookahead assertion.
|
|
75
|
+
consonant_end = "(?![{a}{e}{i}{o}{u}{v}]|u:)".format(
|
|
76
|
+
a=_a, e=_e, i=_i, o=_o, u=_u, v=_v
|
|
77
|
+
)
|
|
78
|
+
_vowels = vowels.copy()
|
|
79
|
+
for v, s in _vowels.items():
|
|
80
|
+
if len(s) > 1:
|
|
81
|
+
_vowels[v] = "[{}]".format(s)
|
|
82
|
+
return (
|
|
83
|
+
"(?:\u00B7|\u2027)?"
|
|
84
|
+
"(?:"
|
|
85
|
+
"(?:(?:[zcs]h|[gkh])u%(a)sng%(consonant_end)s)|"
|
|
86
|
+
"(?:[jqx]i%(o)sng%(consonant_end)s)|"
|
|
87
|
+
"(?:[nljqx]i%(a)sng%(consonant_end)s)|"
|
|
88
|
+
"(?:(?:[zcs]h?|[dtnlgkhrjqxy])u%(a)sn%(consonant_end)s)|"
|
|
89
|
+
"(?:(?:[zcs]h|[gkh])u%(a)si)|"
|
|
90
|
+
"(?:(?:[zc]h?|[rdtnlgkhsy])%(o)sng%(consonant_end)s)|"
|
|
91
|
+
"(?:(?:[zcs]h?|[rbpmfdtnlgkhw])?%(e)sng%(consonant_end)s)|"
|
|
92
|
+
"(?:(?:[zcs]h?|[rbpmfdtnlgkhwy])?%(a)sng%(consonant_end)s)|"
|
|
93
|
+
"(?:[bpmdtnljqxy]%(i)sng%(consonant_end)s)|"
|
|
94
|
+
"(?:[bpmdtnljqx]i%(a)sn%(consonant_end)s)|"
|
|
95
|
+
"(?:[bpmdtnljqx]i%(a)so)|"
|
|
96
|
+
"(?:[nl](?:v|u:|\u00FC)%(e)s)|"
|
|
97
|
+
"(?:[nl](?:%(v)s|u:))|"
|
|
98
|
+
"(?:[jqxy]u%(e)s)|"
|
|
99
|
+
"(?:[bpmnljqxy]%(i)sn%(consonant_end)s)|"
|
|
100
|
+
"(?:[mdnljqx]i%(u)s)|"
|
|
101
|
+
"(?:[bpmdtnljqx]i%(e)s)|"
|
|
102
|
+
"(?:[dljqx]i%(a)s)|"
|
|
103
|
+
"(?:(?:[zcs]h?|[rdtnlgkhxqjy])%(u)sn%(consonant_end)s)|"
|
|
104
|
+
"(?:(?:[zcs]h?|[rdtgkh])u%(i)s)|"
|
|
105
|
+
"(?:(?:[zcs]h?|[rdtnlgkh])u%(o)s)|"
|
|
106
|
+
"(?:(?:[zcs]h|[rgkh])u%(a)s)|"
|
|
107
|
+
"(?:(?:[zcs]h?|[rbpmfdngkhw])?%(e)sn%(consonant_end)s)|"
|
|
108
|
+
"(?:(?:[zcs]h?|[rbpmfdtnlgkhwy])?%(a)sn%(consonant_end)s)|"
|
|
109
|
+
"(?:(?:[zcs]h?|[rpmfdtnlgkhy])?%(o)su)|"
|
|
110
|
+
"(?:(?:[zcs]h?|[rbpmdtnlgkhy])?%(a)so)|"
|
|
111
|
+
"(?:(?:[zs]h|[bpmfdtnlgkhwz])?%(e)si)|"
|
|
112
|
+
"(?:(?:[zcs]h?|[bpmdtnlgkhw])?%(a)si)|"
|
|
113
|
+
"(?:(?:[zcs]h?|[rjqxybpmdtnl])%(i)s)|"
|
|
114
|
+
"(?:(?:[zcs]h?|[rwbpmfdtnlgkhjqxwy])%(u)s)|"
|
|
115
|
+
"(?:%(e)s(?:r%(consonant_end)s)?)|"
|
|
116
|
+
"(?:(?:[zcs]h?|[rmdtnlgkhy])%(e)s)|"
|
|
117
|
+
"(?:[bpmfwyl]?%(o)s)|"
|
|
118
|
+
"(?:(?:[zcs]h|[bpmfdtnlgkhzcswy])?%(a)s)|"
|
|
119
|
+
"(?:r%(consonant_end)s)"
|
|
120
|
+
")" + ("[0-5]?" if tone_numbers else "")
|
|
121
|
+
) % {
|
|
122
|
+
"consonant_end": consonant_end,
|
|
123
|
+
"a": _vowels["a"],
|
|
124
|
+
"e": _vowels["e"],
|
|
125
|
+
"i": _vowels["i"],
|
|
126
|
+
"o": _vowels["o"],
|
|
127
|
+
"u": _vowels["u"],
|
|
128
|
+
"v": _vowels["v"],
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _build_word(syl, vowels):
|
|
133
|
+
"""Builds a Pinyin word re pattern from a Pinyin syllable re pattern.
|
|
134
|
+
|
|
135
|
+
A word is defined as a series of consecutive valid Pinyin syllables
|
|
136
|
+
with optional hyphens and apostrophes interspersed. Hyphens must be
|
|
137
|
+
followed immediately by another valid Pinyin syllable. Apostrophes must be
|
|
138
|
+
followed by another valid Pinyin syllable that starts with an 'a', 'e', or
|
|
139
|
+
'o'.
|
|
140
|
+
|
|
141
|
+
"""
|
|
142
|
+
return "(?:{syl}(?:-(?={syl})|'(?=[{a}{e}{o}])(?={syl}))?)+".format(
|
|
143
|
+
syl=syl, a=vowels["a"], e=vowels["e"], o=vowels["o"]
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _build_sentence(word):
|
|
148
|
+
"""Builds a Pinyin sentence re pattern from a Pinyin word re pattern.
|
|
149
|
+
|
|
150
|
+
A sentence is defined as a series of valid Pinyin words, punctuation
|
|
151
|
+
(non-stops), and spaces followed by a single stop and zero or more
|
|
152
|
+
container-closing punctuation marks (e.g. apostrophe and brackets).
|
|
153
|
+
|
|
154
|
+
"""
|
|
155
|
+
return r"(?:{word}|[{non_stops}]|(?<![{stops} ]) )+[{stops}]['\"\]}}\)]*".format(
|
|
156
|
+
word=word, non_stops=escape(non_stops), stops=escape(stops)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
#: A regular expression pattern for a valid accented Pinyin syllable.
|
|
161
|
+
a_syl = acc_syl = accented_syllable = _build_syl(_a_vowels, tone_numbers=False)
|
|
162
|
+
|
|
163
|
+
#: A regular expression pattern for a valid numbered Pinyin syllable.
|
|
164
|
+
n_syl = num_syl = numbered_syllable = _build_syl(_n_vowels, tone_numbers=True)
|
|
165
|
+
|
|
166
|
+
#: A regular expression pattern for a valid Pinyin syllable.
|
|
167
|
+
syl = syllable = _build_syl(_a_vowels, tone_numbers=True)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
#: A regular expression pattern for a valid accented Pinyin word.
|
|
171
|
+
a_word = acc_word = accented_word = _build_word(a_syl, _a_vowels)
|
|
172
|
+
|
|
173
|
+
#: A regular expression pattern for a valid numbered Pinyin word.
|
|
174
|
+
n_word = num_word = numbered_word = _build_word(n_syl, _n_vowels)
|
|
175
|
+
|
|
176
|
+
#: A regular expression pattern for a valid Pinyin word.
|
|
177
|
+
word = _build_word(syl, _a_vowels)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
#: A regular expression pattern for a valid accented Pinyin sentence.
|
|
181
|
+
a_sent = acc_sent = accented_sentence = _build_sentence(a_word)
|
|
182
|
+
|
|
183
|
+
#: A regular expression pattern for a valid numbered Pinyin sentence.
|
|
184
|
+
n_sent = num_sent = numbered_sentence = _build_sentence(n_word)
|
|
185
|
+
|
|
186
|
+
#: A regular expression pattern for a valid Pinyin sentence.
|
|
187
|
+
sent = sentence = _build_sentence(word)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""Constants for working with Zhuyin (Bopomofo)."""
|
|
3
|
+
|
|
4
|
+
#: A string containing all Zhuyin characters.
|
|
5
|
+
characters = (
|
|
6
|
+
"ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙ" "ㄚㄛㄝㄜㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩㄭ"
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
#: A string containing all Zhuyin tone marks.
|
|
10
|
+
marks = (
|
|
11
|
+
"\u02C7" # Caron
|
|
12
|
+
"\u02CA" # Modifier letter accute accent
|
|
13
|
+
"\u02CB" # Modifier letter grave accent
|
|
14
|
+
"\u02D9" # Dot above
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
#: A regular expression pattern for a Zhuyin syllable.
|
|
18
|
+
syl = syllable = (
|
|
19
|
+
"(?:"
|
|
20
|
+
"[ㄇㄉㄊㄋㄌㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?ㄜ|"
|
|
21
|
+
"[ㄅㄆㄇㄉㄊㄋㄌㄍㄎㄏㄓㄔㄕㄗㄘㄙㄧ]?ㄞ|"
|
|
22
|
+
"[ㄅㄆㄇㄈㄉㄋㄌㄍㄏㄓㄕㄗ]?ㄟ|"
|
|
23
|
+
"[ㄅㄆㄇㄈㄋㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?ㄣ|"
|
|
24
|
+
"[ㄉㄌㄐㄑㄒ]?ㄧㄚ|"
|
|
25
|
+
"[ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄓㄔㄕㄗㄘㄙ]?ㄚ|"
|
|
26
|
+
"[ㄅㄆㄇㄉㄊㄋㄌㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?ㄠ|"
|
|
27
|
+
"[ㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?ㄡ|"
|
|
28
|
+
"[ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?ㄢ|"
|
|
29
|
+
"[ㄇㄉㄋㄌㄐㄑㄒ]?ㄧㄡ|"
|
|
30
|
+
"[ㄅㄆㄇㄋㄌㄐㄑㄒ]?ㄧㄣ|"
|
|
31
|
+
"[ㄐㄑㄒ]?ㄩ[ㄢㄥ]|"
|
|
32
|
+
"[ㄌㄐㄑㄒ]?ㄩㄣ|"
|
|
33
|
+
"[ㄋㄌㄐㄑㄒ]?(?:ㄩㄝ?|ㄧㄤ)|"
|
|
34
|
+
"[ㄅㄆㄇㄈㄌㄧ]?ㄛ|"
|
|
35
|
+
"[ㄅㄆㄇㄉㄊㄋㄌㄐㄑㄒ]?ㄧ[ㄝㄠㄢㄥ]?|"
|
|
36
|
+
"[ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?[ㄤㄥ]|"
|
|
37
|
+
"[ㄍㄎㄏㄓㄔㄕ]?ㄨ[ㄚㄞㄤ]|"
|
|
38
|
+
"[ㄉㄊㄋㄌㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?ㄨㄛ|"
|
|
39
|
+
"[ㄉㄊㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?ㄨㄟ|"
|
|
40
|
+
"[ㄉㄊㄋㄌㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?ㄨㄢ|"
|
|
41
|
+
"[ㄉㄊㄌㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?ㄨㄣ|"
|
|
42
|
+
"[ㄉㄊㄋㄌㄍㄎㄏㄓㄔㄖㄗㄘㄙ]?ㄨㄥ|"
|
|
43
|
+
"[ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄓㄔㄕㄖㄗㄘㄙ]?ㄨ|"
|
|
44
|
+
"[ㄓㄔㄕㄖㄗㄘㄙㄝㄦㄧ]"
|
|
45
|
+
")[{marks}]?"
|
|
46
|
+
).format(marks=marks)
|
metadata
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: jtcg_locale_detector
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 1.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Original mark
|
|
8
|
+
- JTCG Team
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2025-06-01 00:00:00.000000000 Z
|
|
13
|
+
dependencies:
|
|
14
|
+
- !ruby/object:Gem::Dependency
|
|
15
|
+
name: ffi
|
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
|
17
|
+
requirements:
|
|
18
|
+
- - "~>"
|
|
19
|
+
- !ruby/object:Gem::Version
|
|
20
|
+
version: '1.15'
|
|
21
|
+
type: :runtime
|
|
22
|
+
prerelease: false
|
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
24
|
+
requirements:
|
|
25
|
+
- - "~>"
|
|
26
|
+
- !ruby/object:Gem::Version
|
|
27
|
+
version: '1.15'
|
|
28
|
+
- !ruby/object:Gem::Dependency
|
|
29
|
+
name: json
|
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
|
31
|
+
requirements:
|
|
32
|
+
- - "~>"
|
|
33
|
+
- !ruby/object:Gem::Version
|
|
34
|
+
version: '2.6'
|
|
35
|
+
type: :runtime
|
|
36
|
+
prerelease: false
|
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
38
|
+
requirements:
|
|
39
|
+
- - "~>"
|
|
40
|
+
- !ruby/object:Gem::Version
|
|
41
|
+
version: '2.6'
|
|
42
|
+
- !ruby/object:Gem::Dependency
|
|
43
|
+
name: bundler
|
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
|
45
|
+
requirements:
|
|
46
|
+
- - ">="
|
|
47
|
+
- !ruby/object:Gem::Version
|
|
48
|
+
version: '1.17'
|
|
49
|
+
type: :development
|
|
50
|
+
prerelease: false
|
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
52
|
+
requirements:
|
|
53
|
+
- - ">="
|
|
54
|
+
- !ruby/object:Gem::Version
|
|
55
|
+
version: '1.17'
|
|
56
|
+
- !ruby/object:Gem::Dependency
|
|
57
|
+
name: rake
|
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
|
59
|
+
requirements:
|
|
60
|
+
- - "~>"
|
|
61
|
+
- !ruby/object:Gem::Version
|
|
62
|
+
version: '13.0'
|
|
63
|
+
type: :development
|
|
64
|
+
prerelease: false
|
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
66
|
+
requirements:
|
|
67
|
+
- - "~>"
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: '13.0'
|
|
70
|
+
- !ruby/object:Gem::Dependency
|
|
71
|
+
name: rspec
|
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
|
73
|
+
requirements:
|
|
74
|
+
- - "~>"
|
|
75
|
+
- !ruby/object:Gem::Version
|
|
76
|
+
version: '3.12'
|
|
77
|
+
type: :development
|
|
78
|
+
prerelease: false
|
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
80
|
+
requirements:
|
|
81
|
+
- - "~>"
|
|
82
|
+
- !ruby/object:Gem::Version
|
|
83
|
+
version: '3.12'
|
|
84
|
+
- !ruby/object:Gem::Dependency
|
|
85
|
+
name: rubocop
|
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
|
87
|
+
requirements:
|
|
88
|
+
- - "~>"
|
|
89
|
+
- !ruby/object:Gem::Version
|
|
90
|
+
version: '1.50'
|
|
91
|
+
type: :development
|
|
92
|
+
prerelease: false
|
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
94
|
+
requirements:
|
|
95
|
+
- - "~>"
|
|
96
|
+
- !ruby/object:Gem::Version
|
|
97
|
+
version: '1.50'
|
|
98
|
+
description: Multi-language locale detector with specialized Chinese variant detection.
|
|
99
|
+
Uses FastText for initial language identification and multiple algorithms for Chinese
|
|
100
|
+
variant detection. Ruby gem packaged by JTCG Team.
|
|
101
|
+
email:
|
|
102
|
+
- mark@j-tcg.com
|
|
103
|
+
- enor@j-tcg.com
|
|
104
|
+
executables:
|
|
105
|
+
- locale-detector
|
|
106
|
+
extensions: []
|
|
107
|
+
extra_rdoc_files: []
|
|
108
|
+
files:
|
|
109
|
+
- CHANGELOG.md
|
|
110
|
+
- PACKAGING_SUMMARY.md
|
|
111
|
+
- README.md
|
|
112
|
+
- bin/locale-detector
|
|
113
|
+
- jtcg_locale_detector.gemspec
|
|
114
|
+
- lib/locale_detector.rb
|
|
115
|
+
- lib/locale_detector/client.rb
|
|
116
|
+
- lib/locale_detector/detector.rb
|
|
117
|
+
- lib/locale_detector/version.rb
|
|
118
|
+
- locale_detector.gemspec
|
|
119
|
+
- python/cli.py
|
|
120
|
+
- python/requirements.txt
|
|
121
|
+
- python/src/__init__.py
|
|
122
|
+
- python/src/__pycache__/__init__.cpython-311.pyc
|
|
123
|
+
- python/src/__pycache__/__init__.cpython-313.pyc
|
|
124
|
+
- python/src/__pycache__/locale_data.cpython-311.pyc
|
|
125
|
+
- python/src/__pycache__/locale_data.cpython-313.pyc
|
|
126
|
+
- python/src/__pycache__/locale_detector.cpython-311.pyc
|
|
127
|
+
- python/src/__pycache__/locale_detector.cpython-313.pyc
|
|
128
|
+
- python/src/artifacts/fasttext/lid.176.bin
|
|
129
|
+
- python/src/artifacts/fasttext/lid.176.ftz
|
|
130
|
+
- python/src/download_fasttext.py
|
|
131
|
+
- python/src/locale_data.py
|
|
132
|
+
- python/src/locale_detector.py
|
|
133
|
+
- python/src/locale_detector_c.c
|
|
134
|
+
- python/src/locale_detector_c.h
|
|
135
|
+
- python/src/locale_detector_cy.cpp
|
|
136
|
+
- python/src/locale_detector_cy.cpython-311-darwin.so
|
|
137
|
+
- python/src/locale_detector_cy.cpython-313-darwin.so
|
|
138
|
+
- python/src/locale_detector_cy.html
|
|
139
|
+
- python/src/locale_detector_cy.pyx
|
|
140
|
+
- python/src/utils/__init__.py
|
|
141
|
+
- python/src/utils/__pycache__/__init__.cpython-311.pyc
|
|
142
|
+
- python/src/utils/__pycache__/__init__.cpython-313.pyc
|
|
143
|
+
- python/src/utils/__pycache__/data_utils.cpython-311.pyc
|
|
144
|
+
- python/src/utils/__pycache__/data_utils.cpython-313.pyc
|
|
145
|
+
- python/src/utils/data_utils.py
|
|
146
|
+
- python/src/utils/data_utils_cy.cpp
|
|
147
|
+
- python/src/utils/data_utils_cy.cpython-311-darwin.so
|
|
148
|
+
- python/src/utils/data_utils_cy.cpython-313-darwin.so
|
|
149
|
+
- python/src/utils/data_utils_cy.html
|
|
150
|
+
- python/src/utils/data_utils_cy.pyx
|
|
151
|
+
- python/src/zhon/__init__.py
|
|
152
|
+
- python/src/zhon/__pycache__/__init__.cpython-311.pyc
|
|
153
|
+
- python/src/zhon/__pycache__/hanzi.cpython-311.pyc
|
|
154
|
+
- python/src/zhon/__pycache__/pinyin.cpython-311.pyc
|
|
155
|
+
- python/src/zhon/__pycache__/zhuyin.cpython-311.pyc
|
|
156
|
+
- python/src/zhon/cedict/__init__.py
|
|
157
|
+
- python/src/zhon/cedict/__pycache__/__init__.cpython-311.pyc
|
|
158
|
+
- python/src/zhon/cedict/__pycache__/all.cpython-311.pyc
|
|
159
|
+
- python/src/zhon/cedict/__pycache__/simplified.cpython-311.pyc
|
|
160
|
+
- python/src/zhon/cedict/__pycache__/traditional.cpython-311.pyc
|
|
161
|
+
- python/src/zhon/cedict/all.py
|
|
162
|
+
- python/src/zhon/cedict/simplified.py
|
|
163
|
+
- python/src/zhon/cedict/traditional.py
|
|
164
|
+
- python/src/zhon/hanzi.py
|
|
165
|
+
- python/src/zhon/pinyin.py
|
|
166
|
+
- python/src/zhon/zhuyin.py
|
|
167
|
+
homepage: https://github.com/jtcg/locale-detector
|
|
168
|
+
licenses:
|
|
169
|
+
- MIT
|
|
170
|
+
metadata:
|
|
171
|
+
homepage_uri: https://github.com/jtcg/locale-detector
|
|
172
|
+
source_code_uri: https://github.com/jtcg/locale-detector
|
|
173
|
+
changelog_uri: https://github.com/jtcg/locale-detector/blob/main/CHANGELOG.md
|
|
174
|
+
packaged_by: JTCG Team <enor@j-tcg.com>
|
|
175
|
+
original_author: Original mark <mark@j-tcg.com>
|
|
176
|
+
maintainer: JTCG Team
|
|
177
|
+
funding_uri: https://github.com/sponsors/jtcg
|
|
178
|
+
rubygems_mfa_required: 'true'
|
|
179
|
+
post_install_message:
|
|
180
|
+
rdoc_options: []
|
|
181
|
+
require_paths:
|
|
182
|
+
- lib
|
|
183
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
184
|
+
requirements:
|
|
185
|
+
- - ">="
|
|
186
|
+
- !ruby/object:Gem::Version
|
|
187
|
+
version: 2.6.0
|
|
188
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
189
|
+
requirements:
|
|
190
|
+
- - ">="
|
|
191
|
+
- !ruby/object:Gem::Version
|
|
192
|
+
version: '0'
|
|
193
|
+
requirements: []
|
|
194
|
+
rubygems_version: 3.0.3.1
|
|
195
|
+
signing_key:
|
|
196
|
+
specification_version: 4
|
|
197
|
+
summary: 多語言地區檢測工具,特別優化中文繁簡體檢測
|
|
198
|
+
test_files: []
|