latinizer 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/latinizer.rb +288 -0
- metadata +134 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 86639550eb5eec0ca450691fe31e7c539c4612eec514c69d56f9efdaf8877d76
|
4
|
+
data.tar.gz: d9ba4e3f3336de06cae13f895d530d307d978d83c699bc7c3a5a20cf2280db9f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ad98e9f27e9f06f7c1630aadc71df6aa0ebb0954e9679a13a542376ef63c559371306bdab1d1eccfaf16dadd33de68bd2e517af3f9cc49f99e2b310049dde4c2
|
7
|
+
data.tar.gz: 5c23300c6e2bab82ca96824c6b693ba6800866143b8fc1f8525f51b8e80909a8b7edeb7fad0648273e270729ea6d146d3981af5f3785e7dfd1d1ec2fcb293aaf
|
data/lib/latinizer.rb
ADDED
@@ -0,0 +1,288 @@
|
|
1
|
+
class Latinizer
|
2
|
+
require 'chinese_pinyin'
|
3
|
+
require 'mecab_standalone'
|
4
|
+
require 'romaji'
|
5
|
+
require 'translit'
|
6
|
+
require 'unicode/scripts'
|
7
|
+
require 'babosa'
|
8
|
+
|
9
|
+
def self.t(text, opt = nil)
|
10
|
+
scripts = Unicode::Scripts.scripts(text) - ['Common', 'Inherited', 'Latin']
|
11
|
+
pinyin_options = {tonemarks: true}
|
12
|
+
|
13
|
+
if opt == :ascii
|
14
|
+
pinyin_options = {}
|
15
|
+
elsif opt == :ja
|
16
|
+
return romanize_japanese(text)
|
17
|
+
end
|
18
|
+
|
19
|
+
if scripts.size == 1
|
20
|
+
case scripts.first
|
21
|
+
when 'Arabic'
|
22
|
+
return romanize_arabic(text)
|
23
|
+
when 'Cyrillic'
|
24
|
+
latinized = Translit.convert(text, :english)
|
25
|
+
return opt == :ascii ? latinized.to_slug.to_ascii.to_s : latinized
|
26
|
+
when 'Han'
|
27
|
+
return Pinyin.t(text, pinyin_options)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
if is_japanese?(scripts)
|
32
|
+
return romanize_japanese(text)
|
33
|
+
end
|
34
|
+
|
35
|
+
text
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.is_japanese?(scripts) #fix only kana text
|
39
|
+
(scripts.include?('Han') && (scripts.include?('Hiragana') || scripts.include?('Katakana'))) ||
|
40
|
+
(scripts.include?('Hiragana') || scripts.include?('Katakana'))
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.romanize_japanese(text)
|
44
|
+
Romaji.kana2romaji(parse_japanese(text)
|
45
|
+
.map{|k| k[-1]}
|
46
|
+
.join(' ')
|
47
|
+
.gsub('ー','')
|
48
|
+
.gsub(' 。','.')
|
49
|
+
.gsub(' ・','-')
|
50
|
+
.gsub(' 、',',')
|
51
|
+
)
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.parse_japanese(text)
|
55
|
+
mecab_parsed = MecabStandalone.parse(text)
|
56
|
+
.split("\n")
|
57
|
+
.map{|k| k.split("\t")}.tap(&:pop)
|
58
|
+
.map{|k| [k[0]].concat(k[1].split(','))}
|
59
|
+
.map{|k| [k[0], k[1], k[-2]]}
|
60
|
+
tokenized_kana = []
|
61
|
+
mecab_parsed.each do |token|
|
62
|
+
if token[1] == "助動詞"
|
63
|
+
tokenized_kana[-1][0] += token[0]
|
64
|
+
tokenized_kana[-1][-1] += token[-1]
|
65
|
+
elsif token[-1] == '*'
|
66
|
+
tokenized_kana << [token[0], token[1], token[0]]
|
67
|
+
else
|
68
|
+
tokenized_kana << token
|
69
|
+
end
|
70
|
+
end
|
71
|
+
tokenized_kana
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.romanize_arabic(text)
|
75
|
+
text
|
76
|
+
.gsub('،',',') # ARABIC COMMA
|
77
|
+
.gsub('؛',';') # ARABIC SEMICOLON
|
78
|
+
.gsub('؟','?') # ARABIC QUESTION MARK
|
79
|
+
.gsub('ء',"'") # ARABIC LETTER HAMZA
|
80
|
+
.gsub('آ','a') # ARABIC LETTER ALEF WITH MADDA ABOVE
|
81
|
+
.gsub('أ','a') # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
82
|
+
.gsub('ؤ','w') # ARABIC LETTER WAW WITH HAMZA ABOVE
|
83
|
+
.gsub('إ','i') # ARABIC LETTER ALEF WITH HAMZA BELOW
|
84
|
+
.gsub('ئ','ye') # ARABIC LETTER YEH WITH HAMZA ABOVE
|
85
|
+
.gsub('ا','a') # ARABIC LETTER ALEF
|
86
|
+
.gsub('ب','b') # ARABIC LETTER BEH
|
87
|
+
.gsub('ة','a') # ARABIC LETTER TEH MARBUTA
|
88
|
+
.gsub('ت','t') # ARABIC LETTER TEH
|
89
|
+
.gsub('ث','th') # ARABIC LETTER THEH
|
90
|
+
.gsub('ج','j') # ARABIC LETTER JEEM
|
91
|
+
.gsub('ح','h') # ARABIC LETTER HAH
|
92
|
+
.gsub('خ','kh') # ARABIC LETTER KHAH
|
93
|
+
.gsub('د','d') # ARABIC LETTER DAL
|
94
|
+
.gsub('ذ','th') # ARABIC LETTER THAL
|
95
|
+
.gsub('ر','r') # ARABIC LETTER REH
|
96
|
+
.gsub('ز','z') # ARABIC LETTER ZAIN
|
97
|
+
.gsub('س','s') # ARABIC LETTER SEEN
|
98
|
+
.gsub('ش','sh') # ARABIC LETTER SHEEN
|
99
|
+
.gsub('ص','s') # ARABIC LETTER SAD
|
100
|
+
.gsub('ض','d') # ARABIC LETTER DAD
|
101
|
+
.gsub('ط','t') # ARABIC LETTER TAH
|
102
|
+
.gsub('ظ','z') # ARABIC LETTER ZAH
|
103
|
+
.gsub('ع',"'") # ARABIC LETTER AIN
|
104
|
+
.gsub('غ','gh') # ARABIC LETTER GHAIN
|
105
|
+
.gsub('ـ','-') # ARABIC TATWEEL
|
106
|
+
.gsub('ف','f') # ARABIC LETTER FEH
|
107
|
+
.gsub('ق','q') # ARABIC LETTER QAF
|
108
|
+
.gsub('ك','k') # ARABIC LETTER KAF
|
109
|
+
.gsub('ل','l') # ARABIC LETTER LAM
|
110
|
+
.gsub('م','m') # ARABIC LETTER MEEM
|
111
|
+
.gsub('ن','n') # ARABIC LETTER NOON
|
112
|
+
.gsub('ه','h') # ARABIC LETTER HEH
|
113
|
+
.gsub('و','w') # ARABIC LETTER WAW
|
114
|
+
.gsub('ى','a') # ARABIC LETTER ALEF MAKSURA
|
115
|
+
.gsub('ي','y') # ARABIC LETTER YEH
|
116
|
+
.gsub('َ','a') # ARABIC FATHA
|
117
|
+
.gsub('ُ','u') # ARABIC DAMMA
|
118
|
+
.gsub('ِ','i') # ARABIC KASRA
|
119
|
+
.gsub('ْ','') # ARABIC SUKUN
|
120
|
+
.gsub('ٔ',"'") # ARABIC HAMZA ABOVE
|
121
|
+
.gsub('ٕ',"'") # ARABIC HAMZA BELOW
|
122
|
+
.gsub('٠','0') # ARABIC-INDIC DIGIT ZERO
|
123
|
+
.gsub('١','1') # ARABIC-INDIC DIGIT ONE
|
124
|
+
.gsub('٢','2') # ARABIC-INDIC DIGIT TWO
|
125
|
+
.gsub('٣','3') # ARABIC-INDIC DIGIT THREE
|
126
|
+
.gsub('٤','4') # ARABIC-INDIC DIGIT FOUR
|
127
|
+
.gsub('٥','5') # ARABIC-INDIC DIGIT FIVE
|
128
|
+
.gsub('٦','6') # ARABIC-INDIC DIGIT SIX
|
129
|
+
.gsub('٧','7') # ARABIC-INDIC DIGIT SEVEN
|
130
|
+
.gsub('٨','8') # ARABIC-INDIC DIGIT EIGHT
|
131
|
+
.gsub('٩','9') # ARABIC-INDIC DIGIT NINE
|
132
|
+
.gsub('٪','%') # ARABIC PERCENT SIGN
|
133
|
+
.gsub('٫',',') # ARABIC DECIMAL SEPARATOR
|
134
|
+
.gsub('٬',',') # ARABIC THOUSANDS SEPARATOR
|
135
|
+
.gsub('ٮ','b') # ARABIC LETTER DOTLESS BEH
|
136
|
+
.gsub('ٯ','q') # ARABIC LETTER DOTLESS QAF
|
137
|
+
.gsub('ٰ','a') # ARABIC LETTER SUPERSCRIPT ALEF
|
138
|
+
.gsub('ٱ','a') # ARABIC LETTER ALEF WASLA
|
139
|
+
.gsub('ٲ','a') # ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE
|
140
|
+
.gsub('ٳ','a') # ARABIC LETTER ALEF WITH WAVY HAMZA BELOW
|
141
|
+
.gsub('ٷ','u') # ARABIC LETTER U WITH HAMZA ABOVE
|
142
|
+
.gsub('ٹ','tt') # ARABIC LETTER TTEH
|
143
|
+
.gsub('ٺ','tt') # ARABIC LETTER TTEHEH
|
144
|
+
.gsub('ٻ','b') # ARABIC LETTER BEEH
|
145
|
+
.gsub('ټ','t') # ARABIC LETTER TEH WITH RING
|
146
|
+
.gsub('ٽ','t') # ARABIC LETTER TEH WITH THREE DOTS ABOVE DOWNWARDS
|
147
|
+
.gsub('پ','p') # ARABIC LETTER PEH
|
148
|
+
.gsub('ٿ','t') # ARABIC LETTER TEHEH
|
149
|
+
.gsub('ڀ','b') # ARABIC LETTER BEHEH
|
150
|
+
.gsub('ځ','h') # ARABIC LETTER HAH WITH HAMZA ABOVE
|
151
|
+
.gsub('ڂ','h') # ARABIC LETTER HAH WITH TWO DOTS VERTICAL ABOVE
|
152
|
+
.gsub('ڃ','ny') # ARABIC LETTER NYEH
|
153
|
+
.gsub('ڄ','dy') # ARABIC LETTER DYEH
|
154
|
+
.gsub('څ','h') # ARABIC LETTER HAH WITH THREE DOTS ABOVE
|
155
|
+
.gsub('چ','tch') # ARABIC LETTER TCHEH
|
156
|
+
.gsub('ڇ','tch') # ARABIC LETTER TCHEHEH
|
157
|
+
.gsub('ڈ','dd') # ARABIC LETTER DDAL
|
158
|
+
.gsub('ډ','d') # ARABIC LETTER DAL WITH RING
|
159
|
+
.gsub('ڊ','d') # ARABIC LETTER DAL WITH DOT BELOW
|
160
|
+
.gsub('ڋ','d') # ARABIC LETTER DAL WITH DOT BELOW AND SMALL TAH
|
161
|
+
.gsub('ڌ','d') # ARABIC LETTER DAHAL
|
162
|
+
.gsub('ڍ','dd') # ARABIC LETTER DDAHAL
|
163
|
+
.gsub('ڎ','d') # ARABIC LETTER DUL
|
164
|
+
.gsub('ڏ','d') # ARABIC LETTER DAL WITH THREE DOTS ABOVE DOWNWARDS
|
165
|
+
.gsub('ڐ','d') # ARABIC LETTER DAL WITH FOUR DOTS ABOVE
|
166
|
+
.gsub('ڑ','rr') # ARABIC LETTER RREH
|
167
|
+
.gsub('ڒ','r') # ARABIC LETTER REH WITH SMALL V
|
168
|
+
.gsub('ړ','r') # ARABIC LETTER REH WITH RING
|
169
|
+
.gsub('ڔ','r') # ARABIC LETTER REH WITH DOT BELOW
|
170
|
+
.gsub('ڕ','r') # ARABIC LETTER REH WITH SMALL V BELOW
|
171
|
+
.gsub('ږ','r') # ARABIC LETTER REH WITH DOT BELOW AND DOT ABOVE
|
172
|
+
.gsub('ڗ','r') # ARABIC LETTER REH WITH TWO DOTS ABOVE
|
173
|
+
.gsub('ژ','j') # ARABIC LETTER JEH
|
174
|
+
.gsub('ڙ','r') # ARABIC LETTER REH WITH FOUR DOTS ABOVE
|
175
|
+
.gsub('ښ','s') # ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE
|
176
|
+
.gsub('ڛ','s') # ARABIC LETTER SEEN WITH THREE DOTS BELOW
|
177
|
+
.gsub('ڜ','s') # ARABIC LETTER SEEN WITH THREE DOTS BELOW AND THREE DOTS ABOVE
|
178
|
+
.gsub('ڝ','s') # ARABIC LETTER SAD WITH TWO DOTS BELOW
|
179
|
+
.gsub('ڞ','s') # ARABIC LETTER SAD WITH THREE DOTS ABOVE
|
180
|
+
.gsub('ڟ','t') # ARABIC LETTER TAH WITH THREE DOTS ABOVE
|
181
|
+
.gsub('ڠ','n') # ARABIC LETTER AIN WITH THREE DOTS ABOVE
|
182
|
+
.gsub('ڡ','f') # ARABIC LETTER DOTLESS FEH
|
183
|
+
.gsub('ڢ','f') # ARABIC LETTER FEH WITH DOT MOVED BELOW
|
184
|
+
.gsub('ڣ','f') # ARABIC LETTER FEH WITH DOT BELOW
|
185
|
+
.gsub('ڤ','v') # ARABIC LETTER VEH
|
186
|
+
.gsub('ڥ','f') # ARABIC LETTER FEH WITH THREE DOTS BELOW
|
187
|
+
.gsub('ڦ','p') # ARABIC LETTER PEHEH
|
188
|
+
.gsub('ڧ','q') # ARABIC LETTER QAF WITH DOT ABOVE
|
189
|
+
.gsub('ڨ','q') # ARABIC LETTER QAF WITH THREE DOTS ABOVE
|
190
|
+
.gsub('ک','k') # ARABIC LETTER KEHEH
|
191
|
+
.gsub('ڪ','k') # ARABIC LETTER SWASH KAF
|
192
|
+
.gsub('ګ','k') # ARABIC LETTER KAF WITH RING
|
193
|
+
.gsub('ڬ','k') # ARABIC LETTER KAF WITH DOT ABOVE
|
194
|
+
.gsub('ڭ','ng') # ARABIC LETTER NG
|
195
|
+
.gsub('ڮ','k') # ARABIC LETTER KAF WITH THREE DOTS BELOW
|
196
|
+
.gsub('گ','g') # ARABIC LETTER GAF
|
197
|
+
.gsub('ڰ','g') # ARABIC LETTER GAF WITH RING
|
198
|
+
.gsub('ڱ','ng') # ARABIC LETTER NGOEH
|
199
|
+
.gsub('ڲ','g') # ARABIC LETTER GAF WITH TWO DOTS BELOW
|
200
|
+
.gsub('ڳ','g') # ARABIC LETTER GUEH
|
201
|
+
.gsub('ڴ','g') # ARABIC LETTER GAF WITH THREE DOTS ABOVE
|
202
|
+
.gsub('ڵ','l') # ARABIC LETTER LAM WITH SMALL V
|
203
|
+
.gsub('ڶ','l') # ARABIC LETTER LAM WITH DOT ABOVE
|
204
|
+
.gsub('ڷ','l') # ARABIC LETTER LAM WITH THREE DOTS ABOVE
|
205
|
+
.gsub('ڸ','l') # ARABIC LETTER LAM WITH THREE DOTS BELOW
|
206
|
+
.gsub('ڹ','n') # ARABIC LETTER NOON WITH DOT BELOW
|
207
|
+
.gsub('ں','n') # ARABIC LETTER NOON GHUNNA
|
208
|
+
.gsub('ڻ','rn') # ARABIC LETTER RNOON
|
209
|
+
.gsub('ڼ','n') # ARABIC LETTER NOON WITH RING
|
210
|
+
.gsub('ڽ','n') # ARABIC LETTER NOON WITH THREE DOTS ABOVE
|
211
|
+
.gsub('ھ','h') # ARABIC LETTER HEH DOACHASHMEE
|
212
|
+
.gsub('ڿ','tch') # ARABIC LETTER TCHEH WITH DOT ABOVE
|
213
|
+
.gsub('ۀ','h') # ARABIC LETTER HEH WITH YEH ABOVE
|
214
|
+
.gsub('ہ','h') # ARABIC LETTER HEH GOAL
|
215
|
+
.gsub('ۂ','h') # ARABIC LETTER HEH GOAL WITH HAMZA ABOVE
|
216
|
+
.gsub('ۃ','a') # ARABIC LETTER TEH MARBUTA GOAL
|
217
|
+
.gsub('ۄ','w') # ARABIC LETTER WAW WITH RING
|
218
|
+
.gsub('ۅ','oe') # ARABIC LETTER KIRGHIZ OE
|
219
|
+
.gsub('ۆ','oe') # ARABIC LETTER OE
|
220
|
+
.gsub('ۇ','u') # ARABIC LETTER U
|
221
|
+
.gsub('ۈ','yu') # ARABIC LETTER YU
|
222
|
+
.gsub('ۉ','yu') # ARABIC LETTER KIRGHIZ YU
|
223
|
+
.gsub('ۊ','w') # ARABIC LETTER WAW WITH TWO DOTS ABOVE
|
224
|
+
.gsub('ۋ','v') # ARABIC LETTER VE
|
225
|
+
.gsub('ی','y') # ARABIC LETTER FARSI YEH
|
226
|
+
.gsub('ۍ','y') # ARABIC LETTER YEH WITH TAIL
|
227
|
+
.gsub('ێ','y') # ARABIC LETTER YEH WITH SMALL V
|
228
|
+
.gsub('ۏ','w') # ARABIC LETTER WAW WITH DOT ABOVE
|
229
|
+
.gsub('ې','e') # ARABIC LETTER E
|
230
|
+
.gsub('ۑ','y') # ARABIC LETTER YEH WITH THREE DOTS BELOW
|
231
|
+
.gsub('ے','y') # ARABIC LETTER YEH BARREE
|
232
|
+
.gsub('ۓ','y') # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
|
233
|
+
.gsub('۔','.') # ARABIC FULL STOP
|
234
|
+
.gsub('ە','ae') # ARABIC LETTER AE
|
235
|
+
.gsub('ۮ','d') # ARABIC LETTER DAL WITH INVERTED V
|
236
|
+
.gsub('ۯ','r') # ARABIC LETTER REH WITH INVERTED V
|
237
|
+
.gsub('۰','0') # EXTENDED ARABIC-INDIC DIGIT ZERO
|
238
|
+
.gsub('۱','1') # EXTENDED ARABIC-INDIC DIGIT ONE
|
239
|
+
.gsub('۲','2') # EXTENDED ARABIC-INDIC DIGIT TWO
|
240
|
+
.gsub('۳','3') # EXTENDED ARABIC-INDIC DIGIT THREE
|
241
|
+
.gsub('۴','4') # EXTENDED ARABIC-INDIC DIGIT FOUR
|
242
|
+
.gsub('۵','5') # EXTENDED ARABIC-INDIC DIGIT FIVE
|
243
|
+
.gsub('۶','6') # EXTENDED ARABIC-INDIC DIGIT SIX
|
244
|
+
.gsub('۷','7') # EXTENDED ARABIC-INDIC DIGIT SEVEN
|
245
|
+
.gsub('۸','8') # EXTENDED ARABIC-INDIC DIGIT EIGHT
|
246
|
+
.gsub('۹','9') # EXTENDED ARABIC-INDIC DIGIT NINE
|
247
|
+
.gsub('ۺ','sh') # ARABIC LETTER SHEEN WITH DOT BELOW
|
248
|
+
.gsub('ۻ','d') # ARABIC LETTER DAD WITH DOT BELOW
|
249
|
+
.gsub('ۼ','gh') # ARABIC LETTER GHAIN WITH DOT BELOW
|
250
|
+
.gsub('۽','&') # ARABIC SIGN SINDHI AMPERSAND
|
251
|
+
.gsub('ﷲ','Allah') # ARABIC LIGATURE ALLAH ISOLATED FORM
|
252
|
+
.gsub('و','w') # Arabic letter waw
|
253
|
+
.gsub('ء',"'") # hamza
|
254
|
+
.gsub('ٔ',"'") # hamza above
|
255
|
+
.gsub('ٕ',"'") # hamza below
|
256
|
+
.gsub('ع',"'") # ain
|
257
|
+
.gsub('آ','a') # alef madda
|
258
|
+
.gsub('إ','i') # alef with hamza below
|
259
|
+
.gsub('ٱ','a') # alef wasla
|
260
|
+
.gsub('ة','a') # teh marbuta
|
261
|
+
.gsub('ۃ','a') # teh marbuta goal
|
262
|
+
.gsub('ي','y') # Arabic yeh
|
263
|
+
.gsub('ى','a') # alef maksura
|
264
|
+
.gsub('ﻯ','a') # alef maksura isolated form
|
265
|
+
.gsub('ﻰ','a') # alef maksura final form
|
266
|
+
.gsub('ﯨ','a') # Uighur Kazach Kirghiz alef maksura initial form
|
267
|
+
.gsub('ﯩ','a') # Uighur Kazach Kirghiz alef maksura medial form
|
268
|
+
.gsub('ٰ','a ') # Arabic letter superscript alef
|
269
|
+
.gsub('ـ','') # tatweel (filler)
|
270
|
+
.gsub('َ','a') # fatha ("-a")
|
271
|
+
.gsub('ُ','u') # damma ("-u")
|
272
|
+
.gsub('ِ','i') # kasra ("-i")
|
273
|
+
.gsub('ْ','') # sukun (no vowel)
|
274
|
+
.gsub('ۡ','') # comment small high dotless head of khah; like sukun (no vowel); used in Kashmiri, Assamese
|
275
|
+
.gsub('اً','an') # alef + fathatan
|
276
|
+
.gsub('ً','') # fathatan ("-an")
|
277
|
+
.gsub('ٌ','') # dammatan ("-un")
|
278
|
+
.gsub('ٍ','') # kasratan ("-in")
|
279
|
+
.gsub('ّ','') # shadda (consonant doubler)
|
280
|
+
.gsub('ڃ','ny') # Arabic letter nyeh U+0683 (used in Sindhi (snd))
|
281
|
+
.gsub('ڄ','dy') # Arabic letter dyeh U+0684 (used in Sindhi (snd))
|
282
|
+
.gsub('۾','men') # Sindhi postposition men
|
283
|
+
.gsub('ؑ','alayhe wasallam') # "upon him be peace"
|
284
|
+
.gsub('ﷴ','Mohammad') # "Mohammad"
|
285
|
+
.gsub('ﷸ','wasallam') # "and peace"
|
286
|
+
.gsub('ﷺ','sallallahou alayhe wasallam') # "prayer of God be upon him and his family and peace"
|
287
|
+
end
|
288
|
+
end
|
metadata
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: latinizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- William Yugue
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-09-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: chinese_pinyin
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mecab_standalone
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.1'
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: 0.1.2
|
37
|
+
type: :runtime
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - "~>"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0.1'
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 0.1.2
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: romaji
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0.2'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - "~>"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0.2'
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: translit
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - "~>"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0.1'
|
68
|
+
type: :runtime
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0.1'
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: unicode-scripts
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '1.6'
|
82
|
+
type: :runtime
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '1.6'
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
name: babosa
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '1.0'
|
96
|
+
type: :runtime
|
97
|
+
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - "~>"
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '1.0'
|
103
|
+
description: A simple general latinization / romanization / transliteration gem wrapping
|
104
|
+
Mecab, Chinese Pinyin and other more specific romanization gems
|
105
|
+
email: wyugue@gmail.com
|
106
|
+
executables: []
|
107
|
+
extensions: []
|
108
|
+
extra_rdoc_files: []
|
109
|
+
files:
|
110
|
+
- lib/latinizer.rb
|
111
|
+
homepage: https://github.com/wyugue/latinizer
|
112
|
+
licenses:
|
113
|
+
- MIT
|
114
|
+
metadata: {}
|
115
|
+
post_install_message:
|
116
|
+
rdoc_options: []
|
117
|
+
require_paths:
|
118
|
+
- lib
|
119
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0'
|
129
|
+
requirements: []
|
130
|
+
rubygems_version: 3.1.4
|
131
|
+
signing_key:
|
132
|
+
specification_version: 4
|
133
|
+
summary: latinizer
|
134
|
+
test_files: []
|