pinyin 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/TODO +22 -0
- data/examples/cgiform/cgiform.rb +24 -0
- data/examples/cgiform/template.rhtml +69 -0
- data/examples/hello.rb +12 -0
- data/lib/conversions.rb +74 -0
- data/lib/data/comparison.csv +410 -0
- data/lib/data/final.csv +10 -0
- data/lib/data/initial.csv +7 -0
- data/lib/data/paladiy.txt +421 -0
- data/lib/data/rules.yaml +24 -0
- data/lib/data/valid_pinyin.yaml +455 -0
- data/lib/exception.rb +14 -0
- data/lib/groundwork.rb +148 -0
- data/lib/pinyin.rb +71 -0
- data/lib/support.rb +16 -0
- data/lib/tones/accents.rb +59 -0
- data/lib/tones/marks.rb +25 -0
- data/lib/tones/no_tones.rb +16 -0
- data/lib/tones/numbers.rb +24 -0
- data/lib/tones.rb +19 -0
- data/rakefile +39 -0
- data/test/comparison_test.rb +35 -0
- data/test/hanyu_coverage.rb +33 -0
- metadata +74 -0
@@ -0,0 +1,455 @@
|
|
1
|
+
# Valid pinyin syllables by final and initial
|
2
|
+
# Hpricoted from http://en.wikipedia.org/wiki/Pinyin_table
|
3
|
+
# Hand-edited for Ueng/weng which is under Ong in the table
|
4
|
+
#
|
5
|
+
# Note that, in order to only use 7-bit ASCII characters,
|
6
|
+
# the u with two dots (umlaut) is written v, and the e with
|
7
|
+
# a little hat on top (circumflex) is written E.
|
8
|
+
---
|
9
|
+
V:
|
10
|
+
Ne: nü
|
11
|
+
Qi: qu
|
12
|
+
Empty: yu
|
13
|
+
Le: lü
|
14
|
+
Xi: xu
|
15
|
+
Ji: ju
|
16
|
+
Ian:
|
17
|
+
Ne: nian
|
18
|
+
De: dian
|
19
|
+
Qi: qian
|
20
|
+
Mo: mian
|
21
|
+
Bo: bian
|
22
|
+
Empty: yan
|
23
|
+
Le: lian
|
24
|
+
Xi: xian
|
25
|
+
Te: tian
|
26
|
+
Ji: jian
|
27
|
+
Po: pian
|
28
|
+
Ee:
|
29
|
+
Empty: ê
|
30
|
+
Ua:
|
31
|
+
Empty: wa
|
32
|
+
Chi: chua
|
33
|
+
Ke: kua
|
34
|
+
Zhi: zhua
|
35
|
+
He: hua
|
36
|
+
Ge: gua
|
37
|
+
Shi: shua
|
38
|
+
Iu:
|
39
|
+
Ne: niu
|
40
|
+
De: diu
|
41
|
+
Qi: qiu
|
42
|
+
Mo: miu
|
43
|
+
Empty: you
|
44
|
+
Le: liu
|
45
|
+
Xi: xiu
|
46
|
+
Ji: jiu
|
47
|
+
Iao:
|
48
|
+
Ne: niao
|
49
|
+
De: diao
|
50
|
+
Qi: qiao
|
51
|
+
Mo: miao
|
52
|
+
Bo: biao
|
53
|
+
Empty: yao
|
54
|
+
Le: liao
|
55
|
+
Xi: xiao
|
56
|
+
Te: tiao
|
57
|
+
Ji: jiao
|
58
|
+
Po: piao
|
59
|
+
A:
|
60
|
+
Si: sa
|
61
|
+
Ne: na
|
62
|
+
De: da
|
63
|
+
Mo: ma
|
64
|
+
Bo: ba
|
65
|
+
Empty: a
|
66
|
+
Le: la
|
67
|
+
Zi: za
|
68
|
+
Chi: cha
|
69
|
+
Ke: ka
|
70
|
+
Zhi: zha
|
71
|
+
Ci: ca
|
72
|
+
Te: ta
|
73
|
+
He: ha
|
74
|
+
Ge: ga
|
75
|
+
Shi: sha
|
76
|
+
Fo: fa
|
77
|
+
Po: pa
|
78
|
+
Vn:
|
79
|
+
Qi: qun
|
80
|
+
Empty: yun
|
81
|
+
Xi: xun
|
82
|
+
Ji: jun
|
83
|
+
Uan:
|
84
|
+
Si: suan
|
85
|
+
Ri: ruan
|
86
|
+
Ne: nuan
|
87
|
+
De: duan
|
88
|
+
Empty: wan
|
89
|
+
Le: luan
|
90
|
+
Zi: zuan
|
91
|
+
Chi: chuan
|
92
|
+
Ke: kuan
|
93
|
+
Zhi: zhuan
|
94
|
+
Ci: cuan
|
95
|
+
Te: tuan
|
96
|
+
He: huan
|
97
|
+
Ge: guan
|
98
|
+
Shi: shuan
|
99
|
+
Ing:
|
100
|
+
Ne: ning
|
101
|
+
De: ding
|
102
|
+
Qi: qing
|
103
|
+
Mo: ming
|
104
|
+
Bo: bing
|
105
|
+
Empty: ying
|
106
|
+
Le: ling
|
107
|
+
Xi: xing
|
108
|
+
Te: ting
|
109
|
+
Ji: jing
|
110
|
+
Po: ping
|
111
|
+
Ia:
|
112
|
+
Qi: qia
|
113
|
+
Empty: ya
|
114
|
+
Le: lia
|
115
|
+
Xi: xia
|
116
|
+
Ji: jia
|
117
|
+
Er:
|
118
|
+
Empty: er
|
119
|
+
An:
|
120
|
+
Si: san
|
121
|
+
Ri: ran
|
122
|
+
Ne: nan
|
123
|
+
De: dan
|
124
|
+
Mo: man
|
125
|
+
Bo: ban
|
126
|
+
Empty: an
|
127
|
+
Le: lan
|
128
|
+
Zi: zan
|
129
|
+
Chi: chan
|
130
|
+
Ke: kan
|
131
|
+
Zhi: zhan
|
132
|
+
Ci: can
|
133
|
+
Te: tan
|
134
|
+
He: han
|
135
|
+
Ge: gan
|
136
|
+
Shi: shan
|
137
|
+
Fo: fan
|
138
|
+
Po: pan
|
139
|
+
Empty:
|
140
|
+
Si: si
|
141
|
+
Ri: ri
|
142
|
+
Zi: zi
|
143
|
+
Chi: chi
|
144
|
+
Zhi: zhi
|
145
|
+
Ci: ci
|
146
|
+
Shi: shi
|
147
|
+
Van:
|
148
|
+
Qi: quan
|
149
|
+
Empty: yuan
|
150
|
+
Xi: xuan
|
151
|
+
Ji: juan
|
152
|
+
Un:
|
153
|
+
Si: sun
|
154
|
+
Ri: run
|
155
|
+
De: dun
|
156
|
+
Empty: wen
|
157
|
+
Le: lun
|
158
|
+
Zi: zun
|
159
|
+
Chi: chun
|
160
|
+
Ke: kun
|
161
|
+
Zhi: zhun
|
162
|
+
Ci: cun
|
163
|
+
Te: tun
|
164
|
+
He: hun
|
165
|
+
Ge: gun
|
166
|
+
Shi: shun
|
167
|
+
Ao:
|
168
|
+
Si: sao
|
169
|
+
Ri: rao
|
170
|
+
Ne: nao
|
171
|
+
De: dao
|
172
|
+
Mo: mao
|
173
|
+
Bo: bao
|
174
|
+
Empty: ao
|
175
|
+
Le: lao
|
176
|
+
Zi: zao
|
177
|
+
Chi: chao
|
178
|
+
Ke: kao
|
179
|
+
Zhi: zhao
|
180
|
+
Ci: cao
|
181
|
+
Te: tao
|
182
|
+
He: hao
|
183
|
+
Ge: gao
|
184
|
+
Shi: shao
|
185
|
+
Po: pao
|
186
|
+
Uo:
|
187
|
+
Si: suo
|
188
|
+
Ri: ruo
|
189
|
+
Ne: nuo
|
190
|
+
De: duo
|
191
|
+
Empty: wo
|
192
|
+
Le: luo
|
193
|
+
Zi: zuo
|
194
|
+
Chi: chuo
|
195
|
+
Ke: kuo
|
196
|
+
Zhi: zhuo
|
197
|
+
Ci: cuo
|
198
|
+
Te: tuo
|
199
|
+
He: huo
|
200
|
+
Ge: guo
|
201
|
+
Shi: shuo
|
202
|
+
Ang:
|
203
|
+
Si: sang
|
204
|
+
Ri: rang
|
205
|
+
Ne: nang
|
206
|
+
De: dang
|
207
|
+
Mo: mang
|
208
|
+
Bo: bang
|
209
|
+
Empty: ang
|
210
|
+
Le: lang
|
211
|
+
Zi: zang
|
212
|
+
Chi: chang
|
213
|
+
Ke: kang
|
214
|
+
Zhi: zhang
|
215
|
+
Ci: cang
|
216
|
+
Te: tang
|
217
|
+
He: hang
|
218
|
+
Ge: gang
|
219
|
+
Shi: shang
|
220
|
+
Fo: fang
|
221
|
+
Po: pang
|
222
|
+
Ei:
|
223
|
+
Ne: nei
|
224
|
+
De: dei
|
225
|
+
Mo: mei
|
226
|
+
Bo: bei
|
227
|
+
Empty: ei
|
228
|
+
Le: lei
|
229
|
+
Zi: zei
|
230
|
+
Zhi: zhei
|
231
|
+
He: hei
|
232
|
+
Ge: gei
|
233
|
+
Shi: shei
|
234
|
+
Fo: fei
|
235
|
+
Po: pei
|
236
|
+
O:
|
237
|
+
Mo: mo
|
238
|
+
Bo: bo
|
239
|
+
Empty: o
|
240
|
+
Fo: fo
|
241
|
+
Po: po
|
242
|
+
Ue:
|
243
|
+
Ne: nüe
|
244
|
+
Qi: que
|
245
|
+
Empty: yue
|
246
|
+
Le: lüe
|
247
|
+
Xi: xue
|
248
|
+
Ji: jue
|
249
|
+
In:
|
250
|
+
Ne: nin
|
251
|
+
Qi: qin
|
252
|
+
Mo: min
|
253
|
+
Bo: bin
|
254
|
+
Empty: yin
|
255
|
+
Le: lin
|
256
|
+
Xi: xin
|
257
|
+
Ji: jin
|
258
|
+
Po: pin
|
259
|
+
E:
|
260
|
+
Si: se
|
261
|
+
Ri: re
|
262
|
+
Ne: ne
|
263
|
+
De: de
|
264
|
+
Mo: me
|
265
|
+
Empty: e
|
266
|
+
Le: le
|
267
|
+
Zi: ze
|
268
|
+
Chi: che
|
269
|
+
Ke: ke
|
270
|
+
Zhi: zhe
|
271
|
+
Ci: ce
|
272
|
+
Te: te
|
273
|
+
He: he
|
274
|
+
Ge: ge
|
275
|
+
Shi: she
|
276
|
+
Iang:
|
277
|
+
Ne: niang
|
278
|
+
Qi: qiang
|
279
|
+
Empty: yang
|
280
|
+
Le: liang
|
281
|
+
Xi: xiang
|
282
|
+
Ji: jiang
|
283
|
+
Iai:
|
284
|
+
Empty: yai
|
285
|
+
Ie:
|
286
|
+
Ne: nie
|
287
|
+
De: die
|
288
|
+
Qi: qie
|
289
|
+
Mo: mie
|
290
|
+
Bo: bie
|
291
|
+
Empty: ye
|
292
|
+
Le: lie
|
293
|
+
Xi: xie
|
294
|
+
Te: tie
|
295
|
+
Ji: jie
|
296
|
+
Po: pie
|
297
|
+
Io:
|
298
|
+
Empty: yo
|
299
|
+
Ou:
|
300
|
+
Si: sou
|
301
|
+
Ri: rou
|
302
|
+
Ne: nou
|
303
|
+
De: dou
|
304
|
+
Mo: mou
|
305
|
+
Empty: ou
|
306
|
+
Le: lou
|
307
|
+
Zi: zou
|
308
|
+
Chi: chou
|
309
|
+
Ke: kou
|
310
|
+
Zhi: zhou
|
311
|
+
Ci: cou
|
312
|
+
Te: tou
|
313
|
+
He: hou
|
314
|
+
Ge: gou
|
315
|
+
Shi: shou
|
316
|
+
Fo: fou
|
317
|
+
Po: pou
|
318
|
+
Uai:
|
319
|
+
Empty: wai
|
320
|
+
Chi: chuai
|
321
|
+
Ke: kuai
|
322
|
+
Zhi: zhuai
|
323
|
+
He: huai
|
324
|
+
Ge: guai
|
325
|
+
Shi: shuai
|
326
|
+
Ueng:
|
327
|
+
Empty: weng
|
328
|
+
Ong:
|
329
|
+
Si: song
|
330
|
+
Ri: rong
|
331
|
+
Ne: nong
|
332
|
+
De: dong
|
333
|
+
Le: long
|
334
|
+
Zi: zong
|
335
|
+
Chi: chong
|
336
|
+
Ke: kong
|
337
|
+
Zhi: zhong
|
338
|
+
Ci: cong
|
339
|
+
Te: tong
|
340
|
+
He: hong
|
341
|
+
Ge: gong
|
342
|
+
Eng:
|
343
|
+
Si: seng
|
344
|
+
Ri: reng
|
345
|
+
Ne: neng
|
346
|
+
De: deng
|
347
|
+
Mo: meng
|
348
|
+
Bo: beng
|
349
|
+
Empty: eng
|
350
|
+
Le: leng
|
351
|
+
Zi: zeng
|
352
|
+
Chi: cheng
|
353
|
+
Ke: keng
|
354
|
+
Zhi: zheng
|
355
|
+
Ci: ceng
|
356
|
+
Te: teng
|
357
|
+
He: heng
|
358
|
+
Ge: geng
|
359
|
+
Shi: sheng
|
360
|
+
Fo: feng
|
361
|
+
Po: peng
|
362
|
+
Ai:
|
363
|
+
Si: sai
|
364
|
+
Ne: nai
|
365
|
+
De: dai
|
366
|
+
Mo: mai
|
367
|
+
Bo: bai
|
368
|
+
Empty: ai
|
369
|
+
Le: lai
|
370
|
+
Zi: zai
|
371
|
+
Chi: chai
|
372
|
+
Ke: kai
|
373
|
+
Zhi: zhai
|
374
|
+
Ci: cai
|
375
|
+
Te: tai
|
376
|
+
He: hai
|
377
|
+
Ge: gai
|
378
|
+
Shi: shai
|
379
|
+
Po: pai
|
380
|
+
Iong:
|
381
|
+
Qi: qiong
|
382
|
+
Empty: yong
|
383
|
+
Xi: xiong
|
384
|
+
Ji: jiong
|
385
|
+
Uang:
|
386
|
+
Empty: wang
|
387
|
+
Chi: chuang
|
388
|
+
Ke: kuang
|
389
|
+
Zhi: zhuang
|
390
|
+
He: huang
|
391
|
+
Ge: guang
|
392
|
+
Shi: shuang
|
393
|
+
Ui:
|
394
|
+
Si: sui
|
395
|
+
Ri: rui
|
396
|
+
De: dui
|
397
|
+
Empty: wei
|
398
|
+
Zi: zui
|
399
|
+
Chi: chui
|
400
|
+
Ke: kui
|
401
|
+
Zhi: zhui
|
402
|
+
Ci: cui
|
403
|
+
Te: tui
|
404
|
+
He: hui
|
405
|
+
Ge: gui
|
406
|
+
Shi: shui
|
407
|
+
I:
|
408
|
+
Ne: ni
|
409
|
+
De: di
|
410
|
+
Qi: qi
|
411
|
+
Mo: mi
|
412
|
+
Bo: bi
|
413
|
+
Empty: yi
|
414
|
+
Le: li
|
415
|
+
Xi: xi
|
416
|
+
Te: ti
|
417
|
+
Ji: ji
|
418
|
+
Po: pi
|
419
|
+
En:
|
420
|
+
Si: sen
|
421
|
+
Ri: ren
|
422
|
+
Ne: nen
|
423
|
+
Mo: men
|
424
|
+
Bo: ben
|
425
|
+
Empty: en
|
426
|
+
Zi: zen
|
427
|
+
Chi: chen
|
428
|
+
Ke: ken
|
429
|
+
Zhi: zhen
|
430
|
+
Ci: cen
|
431
|
+
He: hen
|
432
|
+
Ge: gen
|
433
|
+
Shi: shen
|
434
|
+
Fo: fen
|
435
|
+
Po: pen
|
436
|
+
U:
|
437
|
+
Si: su
|
438
|
+
Ri: ru
|
439
|
+
Ne: nu
|
440
|
+
De: du
|
441
|
+
Mo: mu
|
442
|
+
Bo: bu
|
443
|
+
Empty: wu
|
444
|
+
Le: lu
|
445
|
+
Zi: zu
|
446
|
+
Chi: chu
|
447
|
+
Ke: ku
|
448
|
+
Zhi: zhu
|
449
|
+
Ci: cu
|
450
|
+
Te: tu
|
451
|
+
He: hu
|
452
|
+
Ge: gu
|
453
|
+
Shi: shu
|
454
|
+
Fo: fu
|
455
|
+
Po: pu
|
data/lib/exception.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
module Pinyin
|
2
|
+
# All exceptions arising from this module inherit from Pinyin::Error
|
3
|
+
Error = Class.new StandardError
|
4
|
+
|
5
|
+
class ParseError < Error
|
6
|
+
attr_reader :input, :position
|
7
|
+
|
8
|
+
def initialize(input, position)
|
9
|
+
@input=input
|
10
|
+
@position=position
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
data/lib/groundwork.rb
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
# Classes and constants used throughout the module
|
2
|
+
# * Initial
|
3
|
+
# * Final
|
4
|
+
# * TonelessSyllable
|
5
|
+
# * Syllable
|
6
|
+
# * ILLEGAL_COMBINATIONS
|
7
|
+
|
8
|
+
module Pinyin
|
9
|
+
# A Chinese initial (start of a syllable)
|
10
|
+
class Initial
|
11
|
+
attr :name
|
12
|
+
def initialize(n)
|
13
|
+
@name=n
|
14
|
+
end
|
15
|
+
|
16
|
+
All = %w(
|
17
|
+
Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
|
18
|
+
Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
|
19
|
+
).map{|c| const_set c, Initial.new(c)}
|
20
|
+
|
21
|
+
class <<self
|
22
|
+
private :new
|
23
|
+
end
|
24
|
+
|
25
|
+
Groups=[
|
26
|
+
Group_0=[ Empty ],
|
27
|
+
Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
|
28
|
+
Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
|
29
|
+
Group_3=[ Ge,Ke,He ], #Velar
|
30
|
+
Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
|
31
|
+
Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
|
32
|
+
Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
|
33
|
+
]
|
34
|
+
|
35
|
+
def +(f)
|
36
|
+
TonelessSyllable.new(self,f)
|
37
|
+
end
|
38
|
+
|
39
|
+
def inspect()
|
40
|
+
"<#{self.class.name}::#{@name}>"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
# A Chinese final (end of a syllable)
|
46
|
+
class Final
|
47
|
+
attr :name
|
48
|
+
def initialize(n)
|
49
|
+
@name=n
|
50
|
+
end
|
51
|
+
|
52
|
+
All=%w(
|
53
|
+
Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
|
54
|
+
I Ia Io Ie Iai Iao Iu Ian In Iang Ing
|
55
|
+
U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
|
56
|
+
).map{|c| const_set c, Final.new(c)}
|
57
|
+
|
58
|
+
class <<self
|
59
|
+
private :new
|
60
|
+
end
|
61
|
+
|
62
|
+
Groups=[
|
63
|
+
Group_0=[ Empty ],
|
64
|
+
Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
|
65
|
+
Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
|
66
|
+
Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
|
67
|
+
Group_V=[ V,Ue,Van,Vn,Iong]
|
68
|
+
]
|
69
|
+
def inspect()
|
70
|
+
"<#{self.class.name}::#{name}>"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
# Combination of an initial and a final
|
76
|
+
# Not to be confused with a syllable that has the neutral tone
|
77
|
+
class TonelessSyllable
|
78
|
+
attr_accessor :initial, :final
|
79
|
+
|
80
|
+
def initialize(initial, final)
|
81
|
+
self.initial = initial
|
82
|
+
self.final = final
|
83
|
+
end
|
84
|
+
|
85
|
+
def +(tone)
|
86
|
+
Syllable.new(initial, final, tone)
|
87
|
+
end
|
88
|
+
|
89
|
+
def inspect
|
90
|
+
"<#{self.class.name} <initial=#{initial.name}, final=#{final.name}>>"
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.illegal?(i,f)
|
94
|
+
ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
|
95
|
+
end
|
96
|
+
|
97
|
+
alias :to_s :inspect
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
# Syllable : initial, final and tone
|
102
|
+
class Syllable < TonelessSyllable
|
103
|
+
attr_accessor :tone
|
104
|
+
|
105
|
+
def initialize(initial, final, tone)
|
106
|
+
super(initial, final)
|
107
|
+
self.tone = tone
|
108
|
+
end
|
109
|
+
|
110
|
+
def inspect
|
111
|
+
"<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}>>"
|
112
|
+
end
|
113
|
+
|
114
|
+
alias :to_s :inspect
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
# Some groups of initials and finals may not be combined
|
119
|
+
# This list is not exhaustive but is sufficient to resolve ambiguity
|
120
|
+
ILLEGAL_COMBINATIONS=
|
121
|
+
[
|
122
|
+
[Initial::Group_0, Final::Group_0],
|
123
|
+
[Initial::Group_1, Final::Group_0],
|
124
|
+
[Initial::Group_2, Final::Group_0],
|
125
|
+
[Initial::Group_3, Final::Group_0],
|
126
|
+
[Initial::Group_4, Final::Group_0],
|
127
|
+
|
128
|
+
[Initial::Group_4, Final::Group_U],
|
129
|
+
[Initial::Group_4, Final::Group_A],
|
130
|
+
|
131
|
+
[Initial::Group_3, Final::Group_I],
|
132
|
+
[Initial::Group_5, Final::Group_I],
|
133
|
+
[Initial::Group_6, Final::Group_I],
|
134
|
+
|
135
|
+
[Initial::Group_1, Final::Group_V],
|
136
|
+
[Initial::Group_3, Final::Group_V],
|
137
|
+
|
138
|
+
[Initial::Group_2, [Final::O]], #Only bo, po, mo and fo are valid -o combinations
|
139
|
+
[Initial::Group_3, [Final::O]],
|
140
|
+
[Initial::Group_4, [Final::O]],
|
141
|
+
[Initial::Group_5, [Final::O]],
|
142
|
+
[Initial::Group_6, [Final::O]],
|
143
|
+
|
144
|
+
[[Initial::Empty], [Final::Ong]] # Some say ong and ueng is actually the same final, zhuyin uses the same representation, but ueng only has standalone form weng
|
145
|
+
|
146
|
+
]
|
147
|
+
|
148
|
+
end
|
data/lib/pinyin.rb
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
# Handle several romanization systems for Mandarin Chinese
|
2
|
+
#
|
3
|
+
# Author:: Arne Brasseur (pinyin@arnebrasseur.net)
|
4
|
+
# Copyright:: Copyright (c) 2007, Arne Brasseur
|
5
|
+
# Licence:: GNU General Public License, latest version
|
6
|
+
|
7
|
+
$: << File.dirname(__FILE__)
|
8
|
+
|
9
|
+
require 'support'
|
10
|
+
require 'groundwork'
|
11
|
+
require 'exception'
|
12
|
+
|
13
|
+
require 'tones'
|
14
|
+
Pinyin::Tones::All.each{|m| require 'tones/'+m}
|
15
|
+
|
16
|
+
require 'conversions'
|
17
|
+
|
18
|
+
|
19
|
+
module Pinyin
|
20
|
+
class Reader
|
21
|
+
def initialize(conv, tone)
|
22
|
+
@conv = conv.to_s #Conversions.const_get conv.to_s.camelize
|
23
|
+
@tone = Tones.const_get tone.to_s.camelize
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse(str)
|
27
|
+
Conversions.tokenize(str).map do |s, pos|
|
28
|
+
tone,syll = @tone.pop_tone(s)
|
29
|
+
tsyll = Conversions.parse(@conv,syll)
|
30
|
+
ini, fin = tsyll.initial, tsyll.final
|
31
|
+
raise ParseError.new(s,pos),"Illegal syllable <#{s}> in input <#{str}> at position #{pos}." unless tone && fin && ini
|
32
|
+
Syllable.new(ini, fin, tone)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
alias :<< :parse
|
37
|
+
end
|
38
|
+
|
39
|
+
class Writer
|
40
|
+
def initialize(conv, tone)
|
41
|
+
@conv = conv.to_s #Conversions.const_get conv.to_s.camelize
|
42
|
+
@tone = Tones.const_get tone.to_s.camelize
|
43
|
+
end
|
44
|
+
|
45
|
+
def unparse(py)
|
46
|
+
conv=lambda {|syll| @tone.add_tone(Conversions.unparse(@conv,syll),syll.tone)}
|
47
|
+
if py.respond_to? :map
|
48
|
+
py.map(&conv).join(' ')
|
49
|
+
else
|
50
|
+
conv.call(py)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
alias :<< :unparse
|
55
|
+
end
|
56
|
+
|
57
|
+
class Converter
|
58
|
+
def initialize(from, from_tone, to, to_tone)
|
59
|
+
@reader = Reader.new(from, from_tone)
|
60
|
+
@writer = Writer.new(to, to_tone)
|
61
|
+
end
|
62
|
+
|
63
|
+
def convert(str)
|
64
|
+
@writer.unparse @reader.parse(str)
|
65
|
+
end
|
66
|
+
|
67
|
+
alias :<< :convert
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
|