latinizer 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/latinizer.rb +45 -268
- data/lib/lib/arabic.rb +199 -0
- data/lib/lib/han.rb +18 -0
- data/lib/lib/japanese.rb +61 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21b15ac5fbb50b85ac397a6b1e58bb4e7e63c5ce9800d7c21eddd02a5fa09d7d
|
4
|
+
data.tar.gz: 36e10b84402711854a0345eaf5ccf488a125ce197690daabdff6443526d20522
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c1b6a8eddc5258615cf0017b8209e6ee15ecb4332e0f241457e7a403364597e4ee525a89bcec3a93eb2e89821498ac65cbae893183b54cffffa7e88d3431672
|
7
|
+
data.tar.gz: 3a56c28ccbb182a25ba8e65998d0c95038f613ad4cbdd42d81103e47dc26bcacd0f7eee80549650736d9bb5657e9dd1de9bd824709a1ab9602ca9c77ff9417f1
|
data/lib/latinizer.rb
CHANGED
@@ -1,293 +1,70 @@
|
|
1
1
|
class Latinizer
|
2
|
-
require 'chinese_pinyin'
|
3
|
-
require 'mecab_standalone'
|
4
|
-
require 'romaji'
|
5
2
|
require 'translit'
|
6
3
|
require 'unicode/scripts'
|
7
4
|
require 'babosa'
|
5
|
+
require_relative './lib/arabic.rb'
|
6
|
+
require_relative './lib/han.rb'
|
7
|
+
require_relative './lib/japanese.rb'
|
8
|
+
|
9
|
+
SUPPORTED_SCRIPTS = [
|
10
|
+
'Arabic',
|
11
|
+
'Cyrillic',
|
12
|
+
'Han',
|
13
|
+
'Japanese'
|
14
|
+
]
|
8
15
|
|
9
16
|
def self.t(text, opt = nil)
|
10
|
-
scripts =
|
11
|
-
pinyin_options = {tonemarks: true}
|
17
|
+
scripts = detect_non_latin_scripts(text)
|
12
18
|
|
13
|
-
if
|
14
|
-
|
15
|
-
elsif
|
16
|
-
|
19
|
+
if scripts.size == 0
|
20
|
+
return opt == :ascii ? remove_non_ascii(text) : text
|
21
|
+
elsif scripts.size > 1
|
22
|
+
latinized = latinize_script(text, scripts.first, opt)
|
23
|
+
return t(latinized, opt)
|
17
24
|
end
|
18
25
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
return romanize_arabic(text)
|
23
|
-
when 'Cyrillic'
|
24
|
-
latinized = Translit.convert(text, :english)
|
25
|
-
return opt == :ascii ? latinized.to_slug.to_ascii.to_s : latinized
|
26
|
-
when 'Han'
|
27
|
-
return Pinyin.t(text, pinyin_options)
|
28
|
-
end
|
29
|
-
end
|
26
|
+
latinized = latinize_script(text, scripts.first, opt)
|
27
|
+
opt == :ascii ? remove_non_ascii(latinized) : latinized
|
28
|
+
end
|
30
29
|
|
31
|
-
|
32
|
-
|
30
|
+
def self.latinize_script(text, script, opt = nil)
|
31
|
+
case script
|
32
|
+
when 'Arabic'
|
33
|
+
return Arabic.t(text)
|
34
|
+
when 'Cyrillic'
|
35
|
+
latinized = Translit.convert(text, :english)
|
36
|
+
return opt == :ascii ? remove_diacritics(latinized) : latinized
|
37
|
+
when 'Han'
|
38
|
+
return Han.t(text, opt)
|
39
|
+
when 'Japanese'
|
40
|
+
return Japanese.t(text)
|
33
41
|
end
|
34
|
-
|
35
42
|
text
|
36
43
|
end
|
37
44
|
|
38
|
-
def self.
|
45
|
+
def self.detect_non_latin_scripts(text)
|
39
46
|
scripts = Unicode::Scripts.scripts(text) - ['Common', 'Inherited', 'Latin']
|
40
|
-
|
47
|
+
if is_japanese?(scripts)
|
48
|
+
scripts -= ['Han', 'Hiragana', 'Katakana']
|
49
|
+
scripts += ['Japanese']
|
50
|
+
end
|
51
|
+
scripts.intersection(SUPPORTED_SCRIPTS)
|
41
52
|
end
|
42
53
|
|
43
|
-
def self.
|
44
|
-
|
45
|
-
(scripts.include?('Hiragana') || scripts.include?('Katakana'))
|
54
|
+
def self.remove_diacritics(text)
|
55
|
+
text.to_slug.transliterate.to_s
|
46
56
|
end
|
47
57
|
|
48
|
-
def self.
|
49
|
-
|
50
|
-
.map{|k| k[-1]}
|
51
|
-
.join(' ')
|
52
|
-
.gsub('ー','')
|
53
|
-
.gsub(' 。','.')
|
54
|
-
.gsub(' ・','-')
|
55
|
-
.gsub(' 、',',')
|
56
|
-
)
|
58
|
+
def self.remove_non_ascii(text)
|
59
|
+
text.to_slug.transliterate.to_ascii.to_s
|
57
60
|
end
|
58
61
|
|
59
|
-
def self.
|
60
|
-
|
61
|
-
|
62
|
-
.map{|k| k.split("\t")}.tap(&:pop)
|
63
|
-
.map{|k| [k[0]].concat(k[1].split(','))}
|
64
|
-
.map{|k| [k[0], k[1], k[-2]]}
|
65
|
-
tokenized_kana = []
|
66
|
-
mecab_parsed.each do |token|
|
67
|
-
if token[1] == "助動詞"
|
68
|
-
tokenized_kana[-1][0] += token[0]
|
69
|
-
tokenized_kana[-1][-1] += token[-1]
|
70
|
-
elsif token[-1] == '*'
|
71
|
-
tokenized_kana << [token[0], token[1], token[0]]
|
72
|
-
else
|
73
|
-
tokenized_kana << token
|
74
|
-
end
|
75
|
-
end
|
76
|
-
tokenized_kana
|
62
|
+
def self.has_non_latin?(text)
|
63
|
+
scripts = Unicode::Scripts.scripts(text) - ['Common', 'Inherited', 'Latin']
|
64
|
+
scripts.size > 0 ? true : false
|
77
65
|
end
|
78
66
|
|
79
|
-
def self.
|
80
|
-
|
81
|
-
.gsub('،',',') # ARABIC COMMA
|
82
|
-
.gsub('؛',';') # ARABIC SEMICOLON
|
83
|
-
.gsub('؟','?') # ARABIC QUESTION MARK
|
84
|
-
.gsub('ء',"'") # ARABIC LETTER HAMZA
|
85
|
-
.gsub('آ','a') # ARABIC LETTER ALEF WITH MADDA ABOVE
|
86
|
-
.gsub('أ','a') # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
87
|
-
.gsub('ؤ','w') # ARABIC LETTER WAW WITH HAMZA ABOVE
|
88
|
-
.gsub('إ','i') # ARABIC LETTER ALEF WITH HAMZA BELOW
|
89
|
-
.gsub('ئ','ye') # ARABIC LETTER YEH WITH HAMZA ABOVE
|
90
|
-
.gsub('ا','a') # ARABIC LETTER ALEF
|
91
|
-
.gsub('ب','b') # ARABIC LETTER BEH
|
92
|
-
.gsub('ة','a') # ARABIC LETTER TEH MARBUTA
|
93
|
-
.gsub('ت','t') # ARABIC LETTER TEH
|
94
|
-
.gsub('ث','th') # ARABIC LETTER THEH
|
95
|
-
.gsub('ج','j') # ARABIC LETTER JEEM
|
96
|
-
.gsub('ح','h') # ARABIC LETTER HAH
|
97
|
-
.gsub('خ','kh') # ARABIC LETTER KHAH
|
98
|
-
.gsub('د','d') # ARABIC LETTER DAL
|
99
|
-
.gsub('ذ','th') # ARABIC LETTER THAL
|
100
|
-
.gsub('ر','r') # ARABIC LETTER REH
|
101
|
-
.gsub('ز','z') # ARABIC LETTER ZAIN
|
102
|
-
.gsub('س','s') # ARABIC LETTER SEEN
|
103
|
-
.gsub('ش','sh') # ARABIC LETTER SHEEN
|
104
|
-
.gsub('ص','s') # ARABIC LETTER SAD
|
105
|
-
.gsub('ض','d') # ARABIC LETTER DAD
|
106
|
-
.gsub('ط','t') # ARABIC LETTER TAH
|
107
|
-
.gsub('ظ','z') # ARABIC LETTER ZAH
|
108
|
-
.gsub('ع',"'") # ARABIC LETTER AIN
|
109
|
-
.gsub('غ','gh') # ARABIC LETTER GHAIN
|
110
|
-
.gsub('ـ','-') # ARABIC TATWEEL
|
111
|
-
.gsub('ف','f') # ARABIC LETTER FEH
|
112
|
-
.gsub('ق','q') # ARABIC LETTER QAF
|
113
|
-
.gsub('ك','k') # ARABIC LETTER KAF
|
114
|
-
.gsub('ل','l') # ARABIC LETTER LAM
|
115
|
-
.gsub('م','m') # ARABIC LETTER MEEM
|
116
|
-
.gsub('ن','n') # ARABIC LETTER NOON
|
117
|
-
.gsub('ه','h') # ARABIC LETTER HEH
|
118
|
-
.gsub('و','w') # ARABIC LETTER WAW
|
119
|
-
.gsub('ى','a') # ARABIC LETTER ALEF MAKSURA
|
120
|
-
.gsub('ي','y') # ARABIC LETTER YEH
|
121
|
-
.gsub('َ','a') # ARABIC FATHA
|
122
|
-
.gsub('ُ','u') # ARABIC DAMMA
|
123
|
-
.gsub('ِ','i') # ARABIC KASRA
|
124
|
-
.gsub('ْ','') # ARABIC SUKUN
|
125
|
-
.gsub('ٔ',"'") # ARABIC HAMZA ABOVE
|
126
|
-
.gsub('ٕ',"'") # ARABIC HAMZA BELOW
|
127
|
-
.gsub('٠','0') # ARABIC-INDIC DIGIT ZERO
|
128
|
-
.gsub('١','1') # ARABIC-INDIC DIGIT ONE
|
129
|
-
.gsub('٢','2') # ARABIC-INDIC DIGIT TWO
|
130
|
-
.gsub('٣','3') # ARABIC-INDIC DIGIT THREE
|
131
|
-
.gsub('٤','4') # ARABIC-INDIC DIGIT FOUR
|
132
|
-
.gsub('٥','5') # ARABIC-INDIC DIGIT FIVE
|
133
|
-
.gsub('٦','6') # ARABIC-INDIC DIGIT SIX
|
134
|
-
.gsub('٧','7') # ARABIC-INDIC DIGIT SEVEN
|
135
|
-
.gsub('٨','8') # ARABIC-INDIC DIGIT EIGHT
|
136
|
-
.gsub('٩','9') # ARABIC-INDIC DIGIT NINE
|
137
|
-
.gsub('٪','%') # ARABIC PERCENT SIGN
|
138
|
-
.gsub('٫',',') # ARABIC DECIMAL SEPARATOR
|
139
|
-
.gsub('٬',',') # ARABIC THOUSANDS SEPARATOR
|
140
|
-
.gsub('ٮ','b') # ARABIC LETTER DOTLESS BEH
|
141
|
-
.gsub('ٯ','q') # ARABIC LETTER DOTLESS QAF
|
142
|
-
.gsub('ٰ','a') # ARABIC LETTER SUPERSCRIPT ALEF
|
143
|
-
.gsub('ٱ','a') # ARABIC LETTER ALEF WASLA
|
144
|
-
.gsub('ٲ','a') # ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE
|
145
|
-
.gsub('ٳ','a') # ARABIC LETTER ALEF WITH WAVY HAMZA BELOW
|
146
|
-
.gsub('ٷ','u') # ARABIC LETTER U WITH HAMZA ABOVE
|
147
|
-
.gsub('ٹ','tt') # ARABIC LETTER TTEH
|
148
|
-
.gsub('ٺ','tt') # ARABIC LETTER TTEHEH
|
149
|
-
.gsub('ٻ','b') # ARABIC LETTER BEEH
|
150
|
-
.gsub('ټ','t') # ARABIC LETTER TEH WITH RING
|
151
|
-
.gsub('ٽ','t') # ARABIC LETTER TEH WITH THREE DOTS ABOVE DOWNWARDS
|
152
|
-
.gsub('پ','p') # ARABIC LETTER PEH
|
153
|
-
.gsub('ٿ','t') # ARABIC LETTER TEHEH
|
154
|
-
.gsub('ڀ','b') # ARABIC LETTER BEHEH
|
155
|
-
.gsub('ځ','h') # ARABIC LETTER HAH WITH HAMZA ABOVE
|
156
|
-
.gsub('ڂ','h') # ARABIC LETTER HAH WITH TWO DOTS VERTICAL ABOVE
|
157
|
-
.gsub('ڃ','ny') # ARABIC LETTER NYEH
|
158
|
-
.gsub('ڄ','dy') # ARABIC LETTER DYEH
|
159
|
-
.gsub('څ','h') # ARABIC LETTER HAH WITH THREE DOTS ABOVE
|
160
|
-
.gsub('چ','tch') # ARABIC LETTER TCHEH
|
161
|
-
.gsub('ڇ','tch') # ARABIC LETTER TCHEHEH
|
162
|
-
.gsub('ڈ','dd') # ARABIC LETTER DDAL
|
163
|
-
.gsub('ډ','d') # ARABIC LETTER DAL WITH RING
|
164
|
-
.gsub('ڊ','d') # ARABIC LETTER DAL WITH DOT BELOW
|
165
|
-
.gsub('ڋ','d') # ARABIC LETTER DAL WITH DOT BELOW AND SMALL TAH
|
166
|
-
.gsub('ڌ','d') # ARABIC LETTER DAHAL
|
167
|
-
.gsub('ڍ','dd') # ARABIC LETTER DDAHAL
|
168
|
-
.gsub('ڎ','d') # ARABIC LETTER DUL
|
169
|
-
.gsub('ڏ','d') # ARABIC LETTER DAL WITH THREE DOTS ABOVE DOWNWARDS
|
170
|
-
.gsub('ڐ','d') # ARABIC LETTER DAL WITH FOUR DOTS ABOVE
|
171
|
-
.gsub('ڑ','rr') # ARABIC LETTER RREH
|
172
|
-
.gsub('ڒ','r') # ARABIC LETTER REH WITH SMALL V
|
173
|
-
.gsub('ړ','r') # ARABIC LETTER REH WITH RING
|
174
|
-
.gsub('ڔ','r') # ARABIC LETTER REH WITH DOT BELOW
|
175
|
-
.gsub('ڕ','r') # ARABIC LETTER REH WITH SMALL V BELOW
|
176
|
-
.gsub('ږ','r') # ARABIC LETTER REH WITH DOT BELOW AND DOT ABOVE
|
177
|
-
.gsub('ڗ','r') # ARABIC LETTER REH WITH TWO DOTS ABOVE
|
178
|
-
.gsub('ژ','j') # ARABIC LETTER JEH
|
179
|
-
.gsub('ڙ','r') # ARABIC LETTER REH WITH FOUR DOTS ABOVE
|
180
|
-
.gsub('ښ','s') # ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE
|
181
|
-
.gsub('ڛ','s') # ARABIC LETTER SEEN WITH THREE DOTS BELOW
|
182
|
-
.gsub('ڜ','s') # ARABIC LETTER SEEN WITH THREE DOTS BELOW AND THREE DOTS ABOVE
|
183
|
-
.gsub('ڝ','s') # ARABIC LETTER SAD WITH TWO DOTS BELOW
|
184
|
-
.gsub('ڞ','s') # ARABIC LETTER SAD WITH THREE DOTS ABOVE
|
185
|
-
.gsub('ڟ','t') # ARABIC LETTER TAH WITH THREE DOTS ABOVE
|
186
|
-
.gsub('ڠ','n') # ARABIC LETTER AIN WITH THREE DOTS ABOVE
|
187
|
-
.gsub('ڡ','f') # ARABIC LETTER DOTLESS FEH
|
188
|
-
.gsub('ڢ','f') # ARABIC LETTER FEH WITH DOT MOVED BELOW
|
189
|
-
.gsub('ڣ','f') # ARABIC LETTER FEH WITH DOT BELOW
|
190
|
-
.gsub('ڤ','v') # ARABIC LETTER VEH
|
191
|
-
.gsub('ڥ','f') # ARABIC LETTER FEH WITH THREE DOTS BELOW
|
192
|
-
.gsub('ڦ','p') # ARABIC LETTER PEHEH
|
193
|
-
.gsub('ڧ','q') # ARABIC LETTER QAF WITH DOT ABOVE
|
194
|
-
.gsub('ڨ','q') # ARABIC LETTER QAF WITH THREE DOTS ABOVE
|
195
|
-
.gsub('ک','k') # ARABIC LETTER KEHEH
|
196
|
-
.gsub('ڪ','k') # ARABIC LETTER SWASH KAF
|
197
|
-
.gsub('ګ','k') # ARABIC LETTER KAF WITH RING
|
198
|
-
.gsub('ڬ','k') # ARABIC LETTER KAF WITH DOT ABOVE
|
199
|
-
.gsub('ڭ','ng') # ARABIC LETTER NG
|
200
|
-
.gsub('ڮ','k') # ARABIC LETTER KAF WITH THREE DOTS BELOW
|
201
|
-
.gsub('گ','g') # ARABIC LETTER GAF
|
202
|
-
.gsub('ڰ','g') # ARABIC LETTER GAF WITH RING
|
203
|
-
.gsub('ڱ','ng') # ARABIC LETTER NGOEH
|
204
|
-
.gsub('ڲ','g') # ARABIC LETTER GAF WITH TWO DOTS BELOW
|
205
|
-
.gsub('ڳ','g') # ARABIC LETTER GUEH
|
206
|
-
.gsub('ڴ','g') # ARABIC LETTER GAF WITH THREE DOTS ABOVE
|
207
|
-
.gsub('ڵ','l') # ARABIC LETTER LAM WITH SMALL V
|
208
|
-
.gsub('ڶ','l') # ARABIC LETTER LAM WITH DOT ABOVE
|
209
|
-
.gsub('ڷ','l') # ARABIC LETTER LAM WITH THREE DOTS ABOVE
|
210
|
-
.gsub('ڸ','l') # ARABIC LETTER LAM WITH THREE DOTS BELOW
|
211
|
-
.gsub('ڹ','n') # ARABIC LETTER NOON WITH DOT BELOW
|
212
|
-
.gsub('ں','n') # ARABIC LETTER NOON GHUNNA
|
213
|
-
.gsub('ڻ','rn') # ARABIC LETTER RNOON
|
214
|
-
.gsub('ڼ','n') # ARABIC LETTER NOON WITH RING
|
215
|
-
.gsub('ڽ','n') # ARABIC LETTER NOON WITH THREE DOTS ABOVE
|
216
|
-
.gsub('ھ','h') # ARABIC LETTER HEH DOACHASHMEE
|
217
|
-
.gsub('ڿ','tch') # ARABIC LETTER TCHEH WITH DOT ABOVE
|
218
|
-
.gsub('ۀ','h') # ARABIC LETTER HEH WITH YEH ABOVE
|
219
|
-
.gsub('ہ','h') # ARABIC LETTER HEH GOAL
|
220
|
-
.gsub('ۂ','h') # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE
|
221
|
-
.gsub('ۃ','a') # ARABIC LETTER TEH MARBUTA GOAL
|
222
|
-
.gsub('ۄ','w') # ARABIC LETTER WAW WITH RING
|
223
|
-
.gsub('ۅ','oe') # ARABIC LETTER KIRGHIZ OE
|
224
|
-
.gsub('ۆ','oe') # ARABIC LETTER OE
|
225
|
-
.gsub('ۇ','u') # ARABIC LETTER U
|
226
|
-
.gsub('ۈ','yu') # ARABIC LETTER YU
|
227
|
-
.gsub('ۉ','yu') # ARABIC LETTER KIRGHIZ YU
|
228
|
-
.gsub('ۊ','w') # ARABIC LETTER WAW WITH TWO DOTS ABOVE
|
229
|
-
.gsub('ۋ','v') # ARABIC LETTER VE
|
230
|
-
.gsub('ی','y') # ARABIC LETTER FARSI YEH
|
231
|
-
.gsub('ۍ','y') # ARABIC LETTER YEH WITH TAIL
|
232
|
-
.gsub('ێ','y') # ARABIC LETTER YEH WITH SMALL V
|
233
|
-
.gsub('ۏ','w') # ARABIC LETTER WAW WITH DOT ABOVE
|
234
|
-
.gsub('ې','e') # ARABIC LETTER E
|
235
|
-
.gsub('ۑ','y') # ARABIC LETTER YEH WITH THREE DOTS BELOW
|
236
|
-
.gsub('ے','y') # ARABIC LETTER YEH BARREE
|
237
|
-
.gsub('ۓ','y') # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
|
238
|
-
.gsub('۔','.') # ARABIC FULL STOP
|
239
|
-
.gsub('ە','ae') # ARABIC LETTER AE
|
240
|
-
.gsub('ۮ','d') # ARABIC LETTER DAL WITH INVERTED V
|
241
|
-
.gsub('ۯ','r') # ARABIC LETTER REH WITH INVERTED V
|
242
|
-
.gsub('۰','0') # EXTENDED ARABIC-INDIC DIGIT ZERO
|
243
|
-
.gsub('۱','1') # EXTENDED ARABIC-INDIC DIGIT ONE
|
244
|
-
.gsub('۲','2') # EXTENDED ARABIC-INDIC DIGIT TWO
|
245
|
-
.gsub('۳','3') # EXTENDED ARABIC-INDIC DIGIT THREE
|
246
|
-
.gsub('۴','4') # EXTENDED ARABIC-INDIC DIGIT FOUR
|
247
|
-
.gsub('۵','5') # EXTENDED ARABIC-INDIC DIGIT FIVE
|
248
|
-
.gsub('۶','6') # EXTENDED ARABIC-INDIC DIGIT SIX
|
249
|
-
.gsub('۷','7') # EXTENDED ARABIC-INDIC DIGIT SEVEN
|
250
|
-
.gsub('۸','8') # EXTENDED ARABIC-INDIC DIGIT EIGHT
|
251
|
-
.gsub('۹','9') # EXTENDED ARABIC-INDIC DIGIT NINE
|
252
|
-
.gsub('ۺ','sh') # ARABIC LETTER SHEEN WITH DOT BELOW
|
253
|
-
.gsub('ۻ','d') # ARABIC LETTER DAD WITH DOT BELOW
|
254
|
-
.gsub('ۼ','gh') # ARABIC LETTER GHAIN WITH DOT BELOW
|
255
|
-
.gsub('۽','&') # ARABIC SIGN SINDHI AMPERSAND
|
256
|
-
.gsub('ﷲ','Allah') # ARABIC LIGATURE ALLAH ISOLATED FORM
|
257
|
-
.gsub('و','w') # Arabic letter waw
|
258
|
-
.gsub('ء',"'") # hamza
|
259
|
-
.gsub('ٔ',"'") # hamza above
|
260
|
-
.gsub('ٕ',"'") # hamza below
|
261
|
-
.gsub('ع',"'") # ain
|
262
|
-
.gsub('آ','a') # alef madda
|
263
|
-
.gsub('إ','i') # alef with hamza below
|
264
|
-
.gsub('ٱ','a') # alef wasla
|
265
|
-
.gsub('ة','a') # teh marbuta
|
266
|
-
.gsub('ۃ','a') # teh marbuta goal
|
267
|
-
.gsub('ي','y') # Arabic yeh
|
268
|
-
.gsub('ى','a') # alef maksura
|
269
|
-
.gsub('ﻯ','a') # alef maksura isolated form
|
270
|
-
.gsub('ﻰ','a') # alef maksura final form
|
271
|
-
.gsub('ﯨ','a') # Uighur Kazach Kirghiz alef maksura initial form
|
272
|
-
.gsub('ﯩ','a') # Uighur Kazach Kirghiz alef maksura medial form
|
273
|
-
.gsub('ٰ','a ') # Arabic letter superscript alef
|
274
|
-
.gsub('ـ','') # tatweel (filler)
|
275
|
-
.gsub('َ','a') # fatha ("-a")
|
276
|
-
.gsub('ُ','u') # damma ("-u")
|
277
|
-
.gsub('ِ','i') # kasra ("-i")
|
278
|
-
.gsub('ْ','') # sukun (no vowel)
|
279
|
-
.gsub('ۡ','') # comment small high dotless head of khah; like sukun (no vowel); used in Kashmiri, Assamese
|
280
|
-
.gsub('اً','an') # alef + fathatan
|
281
|
-
.gsub('ً','') # fathatan ("-an")
|
282
|
-
.gsub('ٌ','') # dammatan ("-un")
|
283
|
-
.gsub('ٍ','') # kasratan ("-in")
|
284
|
-
.gsub('ّ','') # shadda (consonant doubler)
|
285
|
-
.gsub('ڃ','ny') # Arabic letter nyeh U+0683 (used in Sindhi (snd))
|
286
|
-
.gsub('ڄ','dy') # Arabic letter dyeh U+0684 (used in Sindhi (snd))
|
287
|
-
.gsub('۾','men') # Sindhi postposition men
|
288
|
-
.gsub('ؑ','alayhe wasallam') # "upon him be peace"
|
289
|
-
.gsub('ﷴ','Mohammad') # "Mohammad"
|
290
|
-
.gsub('ﷸ','wasallam') # "and peace"
|
291
|
-
.gsub('ﷺ','sallallahou alayhe wasallam') # "prayer of God be upon him and his family and peace"
|
67
|
+
def self.is_japanese?(scripts)
|
68
|
+
scripts.include?('Hiragana') || scripts.include?('Katakana')
|
292
69
|
end
|
293
70
|
end
|
data/lib/lib/arabic.rb
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
class Arabic
|
2
|
+
|
3
|
+
ARABIC = {
|
4
|
+
'،' => ',', # ARABIC COMMA
|
5
|
+
'؛' => ';', # ARABIC SEMICOLON
|
6
|
+
'؟' => '?', # ARABIC QUESTION MARK
|
7
|
+
'ء' => "'", # ARABIC LETTER HAMZA
|
8
|
+
'آ' => 'a', # ARABIC LETTER ALEF WITH MADDA ABOVE
|
9
|
+
'أ' => 'a', # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
10
|
+
'ؤ' => 'w', # ARABIC LETTER WAW WITH HAMZA ABOVE
|
11
|
+
'إ' => 'i', # ARABIC LETTER ALEF WITH HAMZA BELOW
|
12
|
+
'ئ' => 'ye', # ARABIC LETTER YEH WITH HAMZA ABOVE
|
13
|
+
'ا' => 'a', # ARABIC LETTER ALEF
|
14
|
+
'ب' => 'b', # ARABIC LETTER BEH
|
15
|
+
'ة' => 'a', # ARABIC LETTER TEH MARBUTA
|
16
|
+
'ت' => 't', # ARABIC LETTER TEH
|
17
|
+
'ث' => 'th', # ARABIC LETTER THEH
|
18
|
+
'ج' => 'j', # ARABIC LETTER JEEM
|
19
|
+
'ح' => 'h', # ARABIC LETTER HAH
|
20
|
+
'خ' => 'kh', # ARABIC LETTER KHAH
|
21
|
+
'د' => 'd', # ARABIC LETTER DAL
|
22
|
+
'ذ' => 'th', # ARABIC LETTER THAL
|
23
|
+
'ر' => 'r', # ARABIC LETTER REH
|
24
|
+
'ز' => 'z', # ARABIC LETTER ZAIN
|
25
|
+
'س' => 's', # ARABIC LETTER SEEN
|
26
|
+
'ش' => 'sh', # ARABIC LETTER SHEEN
|
27
|
+
'ص' => 's', # ARABIC LETTER SAD
|
28
|
+
'ض' => 'd', # ARABIC LETTER DAD
|
29
|
+
'ط' => 't', # ARABIC LETTER TAH
|
30
|
+
'ظ' => 'z', # ARABIC LETTER ZAH
|
31
|
+
'ع' => "'", # ARABIC LETTER AIN
|
32
|
+
'غ' => 'gh', # ARABIC LETTER GHAIN
|
33
|
+
'ـ' => '-', # ARABIC TATWEEL
|
34
|
+
'ف' => 'f', # ARABIC LETTER FEH
|
35
|
+
'ق' => 'q', # ARABIC LETTER QAF
|
36
|
+
'ك' => 'k', # ARABIC LETTER KAF
|
37
|
+
'ل' => 'l', # ARABIC LETTER LAM
|
38
|
+
'م' => 'm', # ARABIC LETTER MEEM
|
39
|
+
'ن' => 'n', # ARABIC LETTER NOON
|
40
|
+
'ه' => 'h', # ARABIC LETTER HEH
|
41
|
+
'و' => 'w', # ARABIC LETTER WAW
|
42
|
+
'ى' => 'a', # ARABIC LETTER ALEF MAKSURA
|
43
|
+
'ي' => 'y', # ARABIC LETTER YEH
|
44
|
+
'َ' => 'a', # ARABIC FATHA
|
45
|
+
'ُ' => 'u', # ARABIC DAMMA
|
46
|
+
'ِ' => 'i', # ARABIC KASRA
|
47
|
+
'ْ' => '', # ARABIC SUKUN
|
48
|
+
'ٔ' => "'", # ARABIC HAMZA ABOVE
|
49
|
+
'ٕ' => "'", # ARABIC HAMZA BELOW
|
50
|
+
'٠' => '0', # ARABIC-INDIC DIGIT ZERO
|
51
|
+
'١' => '1', # ARABIC-INDIC DIGIT ONE
|
52
|
+
'٢' => '2', # ARABIC-INDIC DIGIT TWO
|
53
|
+
'٣' => '3', # ARABIC-INDIC DIGIT THREE
|
54
|
+
'٤' => '4', # ARABIC-INDIC DIGIT FOUR
|
55
|
+
'٥' => '5', # ARABIC-INDIC DIGIT FIVE
|
56
|
+
'٦' => '6', # ARABIC-INDIC DIGIT SIX
|
57
|
+
'٧' => '7', # ARABIC-INDIC DIGIT SEVEN
|
58
|
+
'٨' => '8', # ARABIC-INDIC DIGIT EIGHT
|
59
|
+
'٩' => '9', # ARABIC-INDIC DIGIT NINE
|
60
|
+
'٪' => '%', # ARABIC PERCENT SIGN
|
61
|
+
'٫' => ' => ', # ARABIC DECIMAL SEPARATOR
|
62
|
+
'٬' => ' => ', # ARABIC THOUSANDS SEPARATOR
|
63
|
+
'ٮ' => 'b', # ARABIC LETTER DOTLESS BEH
|
64
|
+
'ٯ' => 'q', # ARABIC LETTER DOTLESS QAF
|
65
|
+
'ٰ' => 'a', # ARABIC LETTER SUPERSCRIPT ALEF
|
66
|
+
'ٱ' => 'a', # ARABIC LETTER ALEF WASLA
|
67
|
+
'ٲ' => 'a', # ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE
|
68
|
+
'ٳ' => 'a', # ARABIC LETTER ALEF WITH WAVY HAMZA BELOW
|
69
|
+
'ٷ' => 'u', # ARABIC LETTER U WITH HAMZA ABOVE
|
70
|
+
'ٹ' => 'tt', # ARABIC LETTER TTEH
|
71
|
+
'ٺ' => 'tt', # ARABIC LETTER TTEHEH
|
72
|
+
'ٻ' => 'b', # ARABIC LETTER BEEH
|
73
|
+
'ټ' => 't', # ARABIC LETTER TEH WITH RING
|
74
|
+
'ٽ' => 't', # ARABIC LETTER TEH WITH THREE DOTS ABOVE DOWNWARDS
|
75
|
+
'پ' => 'p', # ARABIC LETTER PEH
|
76
|
+
'ٿ' => 't', # ARABIC LETTER TEHEH
|
77
|
+
'ڀ' => 'b', # ARABIC LETTER BEHEH
|
78
|
+
'ځ' => 'h', # ARABIC LETTER HAH WITH HAMZA ABOVE
|
79
|
+
'ڂ' => 'h', # ARABIC LETTER HAH WITH TWO DOTS VERTICAL ABOVE
|
80
|
+
'ڃ' => 'ny', # ARABIC LETTER NYEH
|
81
|
+
'ڄ' => 'dy', # ARABIC LETTER DYEH
|
82
|
+
'څ' => 'h', # ARABIC LETTER HAH WITH THREE DOTS ABOVE
|
83
|
+
'چ' => 'tch', # ARABIC LETTER TCHEH
|
84
|
+
'ڇ' => 'tch', # ARABIC LETTER TCHEHEH
|
85
|
+
'ڈ' => 'dd', # ARABIC LETTER DDAL
|
86
|
+
'ډ' => 'd', # ARABIC LETTER DAL WITH RING
|
87
|
+
'ڊ' => 'd', # ARABIC LETTER DAL WITH DOT BELOW
|
88
|
+
'ڋ' => 'd', # ARABIC LETTER DAL WITH DOT BELOW AND SMALL TAH
|
89
|
+
'ڌ' => 'd', # ARABIC LETTER DAHAL
|
90
|
+
'ڍ' => 'dd', # ARABIC LETTER DDAHAL
|
91
|
+
'ڎ' => 'd', # ARABIC LETTER DUL
|
92
|
+
'ڏ' => 'd', # ARABIC LETTER DAL WITH THREE DOTS ABOVE DOWNWARDS
|
93
|
+
'ڐ' => 'd', # ARABIC LETTER DAL WITH FOUR DOTS ABOVE
|
94
|
+
'ڑ' => 'rr', # ARABIC LETTER RREH
|
95
|
+
'ڒ' => 'r', # ARABIC LETTER REH WITH SMALL V
|
96
|
+
'ړ' => 'r', # ARABIC LETTER REH WITH RING
|
97
|
+
'ڔ' => 'r', # ARABIC LETTER REH WITH DOT BELOW
|
98
|
+
'ڕ' => 'r', # ARABIC LETTER REH WITH SMALL V BELOW
|
99
|
+
'ږ' => 'r', # ARABIC LETTER REH WITH DOT BELOW AND DOT ABOVE
|
100
|
+
'ڗ' => 'r', # ARABIC LETTER REH WITH TWO DOTS ABOVE
|
101
|
+
'ژ' => 'j', # ARABIC LETTER JEH
|
102
|
+
'ڙ' => 'r', # ARABIC LETTER REH WITH FOUR DOTS ABOVE
|
103
|
+
'ښ' => 's', # ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE
|
104
|
+
'ڛ' => 's', # ARABIC LETTER SEEN WITH THREE DOTS BELOW
|
105
|
+
'ڜ' => 's', # ARABIC LETTER SEEN WITH THREE DOTS BELOW AND THREE DOTS ABOVE
|
106
|
+
'ڝ' => 's', # ARABIC LETTER SAD WITH TWO DOTS BELOW
|
107
|
+
'ڞ' => 's', # ARABIC LETTER SAD WITH THREE DOTS ABOVE
|
108
|
+
'ڟ' => 't', # ARABIC LETTER TAH WITH THREE DOTS ABOVE
|
109
|
+
'ڠ' => 'n', # ARABIC LETTER AIN WITH THREE DOTS ABOVE
|
110
|
+
'ڡ' => 'f', # ARABIC LETTER DOTLESS FEH
|
111
|
+
'ڢ' => 'f', # ARABIC LETTER FEH WITH DOT MOVED BELOW
|
112
|
+
'ڣ' => 'f', # ARABIC LETTER FEH WITH DOT BELOW
|
113
|
+
'ڤ' => 'v', # ARABIC LETTER VEH
|
114
|
+
'ڥ' => 'f', # ARABIC LETTER FEH WITH THREE DOTS BELOW
|
115
|
+
'ڦ' => 'p', # ARABIC LETTER PEHEH
|
116
|
+
'ڧ' => 'q', # ARABIC LETTER QAF WITH DOT ABOVE
|
117
|
+
'ڨ' => 'q', # ARABIC LETTER QAF WITH THREE DOTS ABOVE
|
118
|
+
'ک' => 'k', # ARABIC LETTER KEHEH
|
119
|
+
'ڪ' => 'k', # ARABIC LETTER SWASH KAF
|
120
|
+
'ګ' => 'k', # ARABIC LETTER KAF WITH RING
|
121
|
+
'ڬ' => 'k', # ARABIC LETTER KAF WITH DOT ABOVE
|
122
|
+
'ڭ' => 'ng', # ARABIC LETTER NG
|
123
|
+
'ڮ' => 'k', # ARABIC LETTER KAF WITH THREE DOTS BELOW
|
124
|
+
'گ' => 'g', # ARABIC LETTER GAF
|
125
|
+
'ڰ' => 'g', # ARABIC LETTER GAF WITH RING
|
126
|
+
'ڱ' => 'ng', # ARABIC LETTER NGOEH
|
127
|
+
'ڲ' => 'g', # ARABIC LETTER GAF WITH TWO DOTS BELOW
|
128
|
+
'ڳ' => 'g', # ARABIC LETTER GUEH
|
129
|
+
'ڴ' => 'g', # ARABIC LETTER GAF WITH THREE DOTS ABOVE
|
130
|
+
'ڵ' => 'l', # ARABIC LETTER LAM WITH SMALL V
|
131
|
+
'ڶ' => 'l', # ARABIC LETTER LAM WITH DOT ABOVE
|
132
|
+
'ڷ' => 'l', # ARABIC LETTER LAM WITH THREE DOTS ABOVE
|
133
|
+
'ڸ' => 'l', # ARABIC LETTER LAM WITH THREE DOTS BELOW
|
134
|
+
'ڹ' => 'n', # ARABIC LETTER NOON WITH DOT BELOW
|
135
|
+
'ں' => 'n', # ARABIC LETTER NOON GHUNNA
|
136
|
+
'ڻ' => 'rn', # ARABIC LETTER RNOON
|
137
|
+
'ڼ' => 'n', # ARABIC LETTER NOON WITH RING
|
138
|
+
'ڽ' => 'n', # ARABIC LETTER NOON WITH THREE DOTS ABOVE
|
139
|
+
'ھ' => 'h', # ARABIC LETTER HEH DOACHASHMEE
|
140
|
+
'ڿ' => 'tch', # ARABIC LETTER TCHEH WITH DOT ABOVE
|
141
|
+
'ۀ' => 'h', # ARABIC LETTER HEH WITH YEH ABOVE
|
142
|
+
'ہ' => 'h', # ARABIC LETTER HEH GOAL
|
143
|
+
'ۂ' => 'h', # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE
|
144
|
+
'ۃ' => 'a', # ARABIC LETTER TEH MARBUTA GOAL
|
145
|
+
'ۄ' => 'w', # ARABIC LETTER WAW WITH RING
|
146
|
+
'ۅ' => 'oe', # ARABIC LETTER KIRGHIZ OE
|
147
|
+
'ۆ' => 'oe', # ARABIC LETTER OE
|
148
|
+
'ۇ' => 'u', # ARABIC LETTER U
|
149
|
+
'ۈ' => 'yu', # ARABIC LETTER YU
|
150
|
+
'ۉ' => 'yu', # ARABIC LETTER KIRGHIZ YU
|
151
|
+
'ۊ' => 'w', # ARABIC LETTER WAW WITH TWO DOTS ABOVE
|
152
|
+
'ۋ' => 'v', # ARABIC LETTER VE
|
153
|
+
'ی' => 'y', # ARABIC LETTER FARSI YEH
|
154
|
+
'ۍ' => 'y', # ARABIC LETTER YEH WITH TAIL
|
155
|
+
'ێ' => 'y', # ARABIC LETTER YEH WITH SMALL V
|
156
|
+
'ۏ' => 'w', # ARABIC LETTER WAW WITH DOT ABOVE
|
157
|
+
'ې' => 'e', # ARABIC LETTER E
|
158
|
+
'ۑ' => 'y', # ARABIC LETTER YEH WITH THREE DOTS BELOW
|
159
|
+
'ے' => 'y', # ARABIC LETTER YEH BARREE
|
160
|
+
'ۓ' => 'y', # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
|
161
|
+
'۔' => '.', # ARABIC FULL STOP
|
162
|
+
'ە' => 'ae', # ARABIC LETTER AE
|
163
|
+
'ۮ' => 'd', # ARABIC LETTER DAL WITH INVERTED V
|
164
|
+
'ۯ' => 'r', # ARABIC LETTER REH WITH INVERTED V
|
165
|
+
'۰' => '0', # EXTENDED ARABIC-INDIC DIGIT ZERO
|
166
|
+
'۱' => '1', # EXTENDED ARABIC-INDIC DIGIT ONE
|
167
|
+
'۲' => '2', # EXTENDED ARABIC-INDIC DIGIT TWO
|
168
|
+
'۳' => '3', # EXTENDED ARABIC-INDIC DIGIT THREE
|
169
|
+
'۴' => '4', # EXTENDED ARABIC-INDIC DIGIT FOUR
|
170
|
+
'۵' => '5', # EXTENDED ARABIC-INDIC DIGIT FIVE
|
171
|
+
'۶' => '6', # EXTENDED ARABIC-INDIC DIGIT SIX
|
172
|
+
'۷' => '7', # EXTENDED ARABIC-INDIC DIGIT SEVEN
|
173
|
+
'۸' => '8', # EXTENDED ARABIC-INDIC DIGIT EIGHT
|
174
|
+
'۹' => '9', # EXTENDED ARABIC-INDIC DIGIT NINE
|
175
|
+
'ۺ' => 'sh', # ARABIC LETTER SHEEN WITH DOT BELOW
|
176
|
+
'ۻ' => 'd', # ARABIC LETTER DAD WITH DOT BELOW
|
177
|
+
'ۼ' => 'gh', # ARABIC LETTER GHAIN WITH DOT BELOW
|
178
|
+
'۽' => '&', # ARABIC SIGN SINDHI AMPERSAND
|
179
|
+
'ﷲ' => 'Allah', # ARABIC LIGATURE ALLAH ISOLATED FORM
|
180
|
+
'ۡ' => '', # comment small high dotless head of khah; like sukun (no vowel); used in Kashmiri => Assamese
|
181
|
+
'اً' => 'an', # alef + fathatan
|
182
|
+
'ً' => '', # fathatan ("-an")
|
183
|
+
'ٌ' => '', # dammatan ("-un")
|
184
|
+
'ٍ' => '', # kasratan ("-in")
|
185
|
+
'ّ' => '', # shadda (consonant doubler)
|
186
|
+
'۾' => 'men', # Sindhi postposition men
|
187
|
+
'ؑ' => 'alayhe wasallam', # "upon him be peace"
|
188
|
+
'ﷴ' => 'Mohammad', # "Mohammad"
|
189
|
+
'ﷸ' => 'wasallam', # "and peace"
|
190
|
+
'ﷺ' => 'sallallahou alayhe wasallam', # "prayer of God be upon him and his family and peace"
|
191
|
+
}.freeze
|
192
|
+
|
193
|
+
def self.t(text)
|
194
|
+
latin = text.dup
|
195
|
+
ARABIC.each { |k,v| latin.gsub!(k, v)}
|
196
|
+
latin
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
data/lib/lib/han.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
class Han
|
2
|
+
require 'chinese_pinyin'
|
3
|
+
|
4
|
+
def self.t(text, opt = nil)
|
5
|
+
latin = []
|
6
|
+
chars = text.split("")
|
7
|
+
chars.each_with_index do |char, index|
|
8
|
+
if char =~ /\p{Han}/
|
9
|
+
converted_char = Pinyin.t(char, opt == :ascii ? {} : {tonemarks: true})
|
10
|
+
latin << ' '
|
11
|
+
latin << converted_char
|
12
|
+
else
|
13
|
+
latin << char
|
14
|
+
end
|
15
|
+
end
|
16
|
+
latin.join('').gsub(' ', ' ')
|
17
|
+
end
|
18
|
+
end
|
data/lib/lib/japanese.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
class Japanese
|
2
|
+
require 'mecab_standalone'
|
3
|
+
require 'romaji'
|
4
|
+
|
5
|
+
JAPANESE_PONCTUATION = {
|
6
|
+
' ' => ' ',
|
7
|
+
'、' => ',',
|
8
|
+
'。' => '.',
|
9
|
+
':' => ':',
|
10
|
+
'!' => '!',
|
11
|
+
'?' => '?',
|
12
|
+
'〜' => '~',
|
13
|
+
'…' => '...',
|
14
|
+
'‥' => '..',
|
15
|
+
'「 ' => ' \'',
|
16
|
+
'」' => '\'',
|
17
|
+
'『 ' => ' "',
|
18
|
+
'』' => '"',
|
19
|
+
'〝 ' => ' "',
|
20
|
+
'〟' => '"',
|
21
|
+
'( ' => ' (',
|
22
|
+
')' => ')',
|
23
|
+
'【 ' => ' [',
|
24
|
+
'】' => ']',
|
25
|
+
'{ ' => ' {',
|
26
|
+
'}' => '}',
|
27
|
+
}.freeze
|
28
|
+
|
29
|
+
def self.t(text)
|
30
|
+
latin = text.dup
|
31
|
+
parsed = parse(text)
|
32
|
+
parsed.each do |token|
|
33
|
+
if token[-1]=~ /\p{Katakana}/
|
34
|
+
latin.sub!(token[0], ' ' + Romaji.kana2romaji(token[-1]) )
|
35
|
+
end
|
36
|
+
end
|
37
|
+
JAPANESE_PONCTUATION.each { |k,v| latin.gsub!(k, v)}
|
38
|
+
latin
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.parse(text)
|
42
|
+
mecab_parsed = MecabStandalone.parse(text)
|
43
|
+
.split("\n")
|
44
|
+
.map{|k| k.split("\t")}.tap(&:pop)
|
45
|
+
.map{|k| [k[0]].concat(k[1].split(','))}
|
46
|
+
.map{|k| [k[0], k[1], k[-2]]}
|
47
|
+
tokenized_kana = []
|
48
|
+
mecab_parsed.each do |token|
|
49
|
+
if token[1] == "助動詞"
|
50
|
+
tokenized_kana[-1][0] += token[0]
|
51
|
+
tokenized_kana[-1][-1] += token[-1]
|
52
|
+
elsif token[-1] == '*'
|
53
|
+
tokenized_kana << [token[0], token[1], token[0]]
|
54
|
+
else
|
55
|
+
tokenized_kana << token
|
56
|
+
end
|
57
|
+
end
|
58
|
+
tokenized_kana
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: latinizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- William Yugue
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: chinese_pinyin
|
@@ -108,6 +108,9 @@ extensions: []
|
|
108
108
|
extra_rdoc_files: []
|
109
109
|
files:
|
110
110
|
- lib/latinizer.rb
|
111
|
+
- lib/lib/arabic.rb
|
112
|
+
- lib/lib/han.rb
|
113
|
+
- lib/lib/japanese.rb
|
111
114
|
homepage: https://github.com/wyugue/latinizer
|
112
115
|
licenses:
|
113
116
|
- MIT
|