pinyin 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TODO +22 -0
- data/examples/cgiform/cgiform.rb +24 -0
- data/examples/cgiform/template.rhtml +69 -0
- data/examples/hello.rb +12 -0
- data/lib/conversions.rb +74 -0
- data/lib/data/comparison.csv +410 -0
- data/lib/data/final.csv +10 -0
- data/lib/data/initial.csv +7 -0
- data/lib/data/paladiy.txt +421 -0
- data/lib/data/rules.yaml +24 -0
- data/lib/data/valid_pinyin.yaml +455 -0
- data/lib/exception.rb +14 -0
- data/lib/groundwork.rb +148 -0
- data/lib/pinyin.rb +71 -0
- data/lib/support.rb +16 -0
- data/lib/tones/accents.rb +59 -0
- data/lib/tones/marks.rb +25 -0
- data/lib/tones/no_tones.rb +16 -0
- data/lib/tones/numbers.rb +24 -0
- data/lib/tones.rb +19 -0
- data/rakefile +39 -0
- data/test/comparison_test.rb +35 -0
- data/test/hanyu_coverage.rb +33 -0
- metadata +74 -0
@@ -0,0 +1,455 @@
|
|
1
|
+
# Valid pinyin syllables by final and initial
|
2
|
+
# Hpricoted from http://en.wikipedia.org/wiki/Pinyin_table
|
3
|
+
# Hand-edited for Ueng/weng which is under Ong in the table
|
4
|
+
#
|
5
|
+
# Note that, in order to only use 7-bit ASCII characters,
|
6
|
+
# the u with two dots (umlaut) is written v, and the e with
|
7
|
+
# a little hat on top (circumflex) is written E.
|
8
|
+
---
|
9
|
+
V:
|
10
|
+
Ne: nü
|
11
|
+
Qi: qu
|
12
|
+
Empty: yu
|
13
|
+
Le: lü
|
14
|
+
Xi: xu
|
15
|
+
Ji: ju
|
16
|
+
Ian:
|
17
|
+
Ne: nian
|
18
|
+
De: dian
|
19
|
+
Qi: qian
|
20
|
+
Mo: mian
|
21
|
+
Bo: bian
|
22
|
+
Empty: yan
|
23
|
+
Le: lian
|
24
|
+
Xi: xian
|
25
|
+
Te: tian
|
26
|
+
Ji: jian
|
27
|
+
Po: pian
|
28
|
+
Ee:
|
29
|
+
Empty: ê
|
30
|
+
Ua:
|
31
|
+
Empty: wa
|
32
|
+
Chi: chua
|
33
|
+
Ke: kua
|
34
|
+
Zhi: zhua
|
35
|
+
He: hua
|
36
|
+
Ge: gua
|
37
|
+
Shi: shua
|
38
|
+
Iu:
|
39
|
+
Ne: niu
|
40
|
+
De: diu
|
41
|
+
Qi: qiu
|
42
|
+
Mo: miu
|
43
|
+
Empty: you
|
44
|
+
Le: liu
|
45
|
+
Xi: xiu
|
46
|
+
Ji: jiu
|
47
|
+
Iao:
|
48
|
+
Ne: niao
|
49
|
+
De: diao
|
50
|
+
Qi: qiao
|
51
|
+
Mo: miao
|
52
|
+
Bo: biao
|
53
|
+
Empty: yao
|
54
|
+
Le: liao
|
55
|
+
Xi: xiao
|
56
|
+
Te: tiao
|
57
|
+
Ji: jiao
|
58
|
+
Po: piao
|
59
|
+
A:
|
60
|
+
Si: sa
|
61
|
+
Ne: na
|
62
|
+
De: da
|
63
|
+
Mo: ma
|
64
|
+
Bo: ba
|
65
|
+
Empty: a
|
66
|
+
Le: la
|
67
|
+
Zi: za
|
68
|
+
Chi: cha
|
69
|
+
Ke: ka
|
70
|
+
Zhi: zha
|
71
|
+
Ci: ca
|
72
|
+
Te: ta
|
73
|
+
He: ha
|
74
|
+
Ge: ga
|
75
|
+
Shi: sha
|
76
|
+
Fo: fa
|
77
|
+
Po: pa
|
78
|
+
Vn:
|
79
|
+
Qi: qun
|
80
|
+
Empty: yun
|
81
|
+
Xi: xun
|
82
|
+
Ji: jun
|
83
|
+
Uan:
|
84
|
+
Si: suan
|
85
|
+
Ri: ruan
|
86
|
+
Ne: nuan
|
87
|
+
De: duan
|
88
|
+
Empty: wan
|
89
|
+
Le: luan
|
90
|
+
Zi: zuan
|
91
|
+
Chi: chuan
|
92
|
+
Ke: kuan
|
93
|
+
Zhi: zhuan
|
94
|
+
Ci: cuan
|
95
|
+
Te: tuan
|
96
|
+
He: huan
|
97
|
+
Ge: guan
|
98
|
+
Shi: shuan
|
99
|
+
Ing:
|
100
|
+
Ne: ning
|
101
|
+
De: ding
|
102
|
+
Qi: qing
|
103
|
+
Mo: ming
|
104
|
+
Bo: bing
|
105
|
+
Empty: ying
|
106
|
+
Le: ling
|
107
|
+
Xi: xing
|
108
|
+
Te: ting
|
109
|
+
Ji: jing
|
110
|
+
Po: ping
|
111
|
+
Ia:
|
112
|
+
Qi: qia
|
113
|
+
Empty: ya
|
114
|
+
Le: lia
|
115
|
+
Xi: xia
|
116
|
+
Ji: jia
|
117
|
+
Er:
|
118
|
+
Empty: er
|
119
|
+
An:
|
120
|
+
Si: san
|
121
|
+
Ri: ran
|
122
|
+
Ne: nan
|
123
|
+
De: dan
|
124
|
+
Mo: man
|
125
|
+
Bo: ban
|
126
|
+
Empty: an
|
127
|
+
Le: lan
|
128
|
+
Zi: zan
|
129
|
+
Chi: chan
|
130
|
+
Ke: kan
|
131
|
+
Zhi: zhan
|
132
|
+
Ci: can
|
133
|
+
Te: tan
|
134
|
+
He: han
|
135
|
+
Ge: gan
|
136
|
+
Shi: shan
|
137
|
+
Fo: fan
|
138
|
+
Po: pan
|
139
|
+
Empty:
|
140
|
+
Si: si
|
141
|
+
Ri: ri
|
142
|
+
Zi: zi
|
143
|
+
Chi: chi
|
144
|
+
Zhi: zhi
|
145
|
+
Ci: ci
|
146
|
+
Shi: shi
|
147
|
+
Van:
|
148
|
+
Qi: quan
|
149
|
+
Empty: yuan
|
150
|
+
Xi: xuan
|
151
|
+
Ji: juan
|
152
|
+
Un:
|
153
|
+
Si: sun
|
154
|
+
Ri: run
|
155
|
+
De: dun
|
156
|
+
Empty: wen
|
157
|
+
Le: lun
|
158
|
+
Zi: zun
|
159
|
+
Chi: chun
|
160
|
+
Ke: kun
|
161
|
+
Zhi: zhun
|
162
|
+
Ci: cun
|
163
|
+
Te: tun
|
164
|
+
He: hun
|
165
|
+
Ge: gun
|
166
|
+
Shi: shun
|
167
|
+
Ao:
|
168
|
+
Si: sao
|
169
|
+
Ri: rao
|
170
|
+
Ne: nao
|
171
|
+
De: dao
|
172
|
+
Mo: mao
|
173
|
+
Bo: bao
|
174
|
+
Empty: ao
|
175
|
+
Le: lao
|
176
|
+
Zi: zao
|
177
|
+
Chi: chao
|
178
|
+
Ke: kao
|
179
|
+
Zhi: zhao
|
180
|
+
Ci: cao
|
181
|
+
Te: tao
|
182
|
+
He: hao
|
183
|
+
Ge: gao
|
184
|
+
Shi: shao
|
185
|
+
Po: pao
|
186
|
+
Uo:
|
187
|
+
Si: suo
|
188
|
+
Ri: ruo
|
189
|
+
Ne: nuo
|
190
|
+
De: duo
|
191
|
+
Empty: wo
|
192
|
+
Le: luo
|
193
|
+
Zi: zuo
|
194
|
+
Chi: chuo
|
195
|
+
Ke: kuo
|
196
|
+
Zhi: zhuo
|
197
|
+
Ci: cuo
|
198
|
+
Te: tuo
|
199
|
+
He: huo
|
200
|
+
Ge: guo
|
201
|
+
Shi: shuo
|
202
|
+
Ang:
|
203
|
+
Si: sang
|
204
|
+
Ri: rang
|
205
|
+
Ne: nang
|
206
|
+
De: dang
|
207
|
+
Mo: mang
|
208
|
+
Bo: bang
|
209
|
+
Empty: ang
|
210
|
+
Le: lang
|
211
|
+
Zi: zang
|
212
|
+
Chi: chang
|
213
|
+
Ke: kang
|
214
|
+
Zhi: zhang
|
215
|
+
Ci: cang
|
216
|
+
Te: tang
|
217
|
+
He: hang
|
218
|
+
Ge: gang
|
219
|
+
Shi: shang
|
220
|
+
Fo: fang
|
221
|
+
Po: pang
|
222
|
+
Ei:
|
223
|
+
Ne: nei
|
224
|
+
De: dei
|
225
|
+
Mo: mei
|
226
|
+
Bo: bei
|
227
|
+
Empty: ei
|
228
|
+
Le: lei
|
229
|
+
Zi: zei
|
230
|
+
Zhi: zhei
|
231
|
+
He: hei
|
232
|
+
Ge: gei
|
233
|
+
Shi: shei
|
234
|
+
Fo: fei
|
235
|
+
Po: pei
|
236
|
+
O:
|
237
|
+
Mo: mo
|
238
|
+
Bo: bo
|
239
|
+
Empty: o
|
240
|
+
Fo: fo
|
241
|
+
Po: po
|
242
|
+
Ue:
|
243
|
+
Ne: nüe
|
244
|
+
Qi: que
|
245
|
+
Empty: yue
|
246
|
+
Le: lüe
|
247
|
+
Xi: xue
|
248
|
+
Ji: jue
|
249
|
+
In:
|
250
|
+
Ne: nin
|
251
|
+
Qi: qin
|
252
|
+
Mo: min
|
253
|
+
Bo: bin
|
254
|
+
Empty: yin
|
255
|
+
Le: lin
|
256
|
+
Xi: xin
|
257
|
+
Ji: jin
|
258
|
+
Po: pin
|
259
|
+
E:
|
260
|
+
Si: se
|
261
|
+
Ri: re
|
262
|
+
Ne: ne
|
263
|
+
De: de
|
264
|
+
Mo: me
|
265
|
+
Empty: e
|
266
|
+
Le: le
|
267
|
+
Zi: ze
|
268
|
+
Chi: che
|
269
|
+
Ke: ke
|
270
|
+
Zhi: zhe
|
271
|
+
Ci: ce
|
272
|
+
Te: te
|
273
|
+
He: he
|
274
|
+
Ge: ge
|
275
|
+
Shi: she
|
276
|
+
Iang:
|
277
|
+
Ne: niang
|
278
|
+
Qi: qiang
|
279
|
+
Empty: yang
|
280
|
+
Le: liang
|
281
|
+
Xi: xiang
|
282
|
+
Ji: jiang
|
283
|
+
Iai:
|
284
|
+
Empty: yai
|
285
|
+
Ie:
|
286
|
+
Ne: nie
|
287
|
+
De: die
|
288
|
+
Qi: qie
|
289
|
+
Mo: mie
|
290
|
+
Bo: bie
|
291
|
+
Empty: ye
|
292
|
+
Le: lie
|
293
|
+
Xi: xie
|
294
|
+
Te: tie
|
295
|
+
Ji: jie
|
296
|
+
Po: pie
|
297
|
+
Io:
|
298
|
+
Empty: yo
|
299
|
+
Ou:
|
300
|
+
Si: sou
|
301
|
+
Ri: rou
|
302
|
+
Ne: nou
|
303
|
+
De: dou
|
304
|
+
Mo: mou
|
305
|
+
Empty: ou
|
306
|
+
Le: lou
|
307
|
+
Zi: zou
|
308
|
+
Chi: chou
|
309
|
+
Ke: kou
|
310
|
+
Zhi: zhou
|
311
|
+
Ci: cou
|
312
|
+
Te: tou
|
313
|
+
He: hou
|
314
|
+
Ge: gou
|
315
|
+
Shi: shou
|
316
|
+
Fo: fou
|
317
|
+
Po: pou
|
318
|
+
Uai:
|
319
|
+
Empty: wai
|
320
|
+
Chi: chuai
|
321
|
+
Ke: kuai
|
322
|
+
Zhi: zhuai
|
323
|
+
He: huai
|
324
|
+
Ge: guai
|
325
|
+
Shi: shuai
|
326
|
+
Ueng:
|
327
|
+
Empty: weng
|
328
|
+
Ong:
|
329
|
+
Si: song
|
330
|
+
Ri: rong
|
331
|
+
Ne: nong
|
332
|
+
De: dong
|
333
|
+
Le: long
|
334
|
+
Zi: zong
|
335
|
+
Chi: chong
|
336
|
+
Ke: kong
|
337
|
+
Zhi: zhong
|
338
|
+
Ci: cong
|
339
|
+
Te: tong
|
340
|
+
He: hong
|
341
|
+
Ge: gong
|
342
|
+
Eng:
|
343
|
+
Si: seng
|
344
|
+
Ri: reng
|
345
|
+
Ne: neng
|
346
|
+
De: deng
|
347
|
+
Mo: meng
|
348
|
+
Bo: beng
|
349
|
+
Empty: eng
|
350
|
+
Le: leng
|
351
|
+
Zi: zeng
|
352
|
+
Chi: cheng
|
353
|
+
Ke: keng
|
354
|
+
Zhi: zheng
|
355
|
+
Ci: ceng
|
356
|
+
Te: teng
|
357
|
+
He: heng
|
358
|
+
Ge: geng
|
359
|
+
Shi: sheng
|
360
|
+
Fo: feng
|
361
|
+
Po: peng
|
362
|
+
Ai:
|
363
|
+
Si: sai
|
364
|
+
Ne: nai
|
365
|
+
De: dai
|
366
|
+
Mo: mai
|
367
|
+
Bo: bai
|
368
|
+
Empty: ai
|
369
|
+
Le: lai
|
370
|
+
Zi: zai
|
371
|
+
Chi: chai
|
372
|
+
Ke: kai
|
373
|
+
Zhi: zhai
|
374
|
+
Ci: cai
|
375
|
+
Te: tai
|
376
|
+
He: hai
|
377
|
+
Ge: gai
|
378
|
+
Shi: shai
|
379
|
+
Po: pai
|
380
|
+
Iong:
|
381
|
+
Qi: qiong
|
382
|
+
Empty: yong
|
383
|
+
Xi: xiong
|
384
|
+
Ji: jiong
|
385
|
+
Uang:
|
386
|
+
Empty: wang
|
387
|
+
Chi: chuang
|
388
|
+
Ke: kuang
|
389
|
+
Zhi: zhuang
|
390
|
+
He: huang
|
391
|
+
Ge: guang
|
392
|
+
Shi: shuang
|
393
|
+
Ui:
|
394
|
+
Si: sui
|
395
|
+
Ri: rui
|
396
|
+
De: dui
|
397
|
+
Empty: wei
|
398
|
+
Zi: zui
|
399
|
+
Chi: chui
|
400
|
+
Ke: kui
|
401
|
+
Zhi: zhui
|
402
|
+
Ci: cui
|
403
|
+
Te: tui
|
404
|
+
He: hui
|
405
|
+
Ge: gui
|
406
|
+
Shi: shui
|
407
|
+
I:
|
408
|
+
Ne: ni
|
409
|
+
De: di
|
410
|
+
Qi: qi
|
411
|
+
Mo: mi
|
412
|
+
Bo: bi
|
413
|
+
Empty: yi
|
414
|
+
Le: li
|
415
|
+
Xi: xi
|
416
|
+
Te: ti
|
417
|
+
Ji: ji
|
418
|
+
Po: pi
|
419
|
+
En:
|
420
|
+
Si: sen
|
421
|
+
Ri: ren
|
422
|
+
Ne: nen
|
423
|
+
Mo: men
|
424
|
+
Bo: ben
|
425
|
+
Empty: en
|
426
|
+
Zi: zen
|
427
|
+
Chi: chen
|
428
|
+
Ke: ken
|
429
|
+
Zhi: zhen
|
430
|
+
Ci: cen
|
431
|
+
He: hen
|
432
|
+
Ge: gen
|
433
|
+
Shi: shen
|
434
|
+
Fo: fen
|
435
|
+
Po: pen
|
436
|
+
U:
|
437
|
+
Si: su
|
438
|
+
Ri: ru
|
439
|
+
Ne: nu
|
440
|
+
De: du
|
441
|
+
Mo: mu
|
442
|
+
Bo: bu
|
443
|
+
Empty: wu
|
444
|
+
Le: lu
|
445
|
+
Zi: zu
|
446
|
+
Chi: chu
|
447
|
+
Ke: ku
|
448
|
+
Zhi: zhu
|
449
|
+
Ci: cu
|
450
|
+
Te: tu
|
451
|
+
He: hu
|
452
|
+
Ge: gu
|
453
|
+
Shi: shu
|
454
|
+
Fo: fu
|
455
|
+
Po: pu
|
data/lib/exception.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
module Pinyin
|
2
|
+
# All exceptions arising from this module inherit from Pinyin::Error
|
3
|
+
Error = Class.new StandardError
|
4
|
+
|
5
|
+
class ParseError < Error
|
6
|
+
attr_reader :input, :position
|
7
|
+
|
8
|
+
def initialize(input, position)
|
9
|
+
@input=input
|
10
|
+
@position=position
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
data/lib/groundwork.rb
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
# Classes and constants used throughout the module
|
2
|
+
# * Initial
|
3
|
+
# * Final
|
4
|
+
# * TonelessSyllable
|
5
|
+
# * Syllable
|
6
|
+
# * ILLEGAL_COMBINATIONS
|
7
|
+
|
8
|
+
module Pinyin
|
9
|
+
# A Chinese initial (start of a syllable)
|
10
|
+
class Initial
|
11
|
+
attr :name
|
12
|
+
def initialize(n)
|
13
|
+
@name=n
|
14
|
+
end
|
15
|
+
|
16
|
+
All = %w(
|
17
|
+
Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
|
18
|
+
Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
|
19
|
+
).map{|c| const_set c, Initial.new(c)}
|
20
|
+
|
21
|
+
class <<self
|
22
|
+
private :new
|
23
|
+
end
|
24
|
+
|
25
|
+
Groups=[
|
26
|
+
Group_0=[ Empty ],
|
27
|
+
Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
|
28
|
+
Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
|
29
|
+
Group_3=[ Ge,Ke,He ], #Velar
|
30
|
+
Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
|
31
|
+
Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
|
32
|
+
Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
|
33
|
+
]
|
34
|
+
|
35
|
+
def +(f)
|
36
|
+
TonelessSyllable.new(self,f)
|
37
|
+
end
|
38
|
+
|
39
|
+
def inspect()
|
40
|
+
"<#{self.class.name}::#{@name}>"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
# A Chinese final (end of a syllable)
|
46
|
+
class Final
|
47
|
+
attr :name
|
48
|
+
def initialize(n)
|
49
|
+
@name=n
|
50
|
+
end
|
51
|
+
|
52
|
+
All=%w(
|
53
|
+
Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
|
54
|
+
I Ia Io Ie Iai Iao Iu Ian In Iang Ing
|
55
|
+
U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
|
56
|
+
).map{|c| const_set c, Final.new(c)}
|
57
|
+
|
58
|
+
class <<self
|
59
|
+
private :new
|
60
|
+
end
|
61
|
+
|
62
|
+
Groups=[
|
63
|
+
Group_0=[ Empty ],
|
64
|
+
Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
|
65
|
+
Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
|
66
|
+
Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
|
67
|
+
Group_V=[ V,Ue,Van,Vn,Iong]
|
68
|
+
]
|
69
|
+
def inspect()
|
70
|
+
"<#{self.class.name}::#{name}>"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
# Combination of an initial and a final
|
76
|
+
# Not to be confused with a syllable that has the neutral tone
|
77
|
+
class TonelessSyllable
|
78
|
+
attr_accessor :initial, :final
|
79
|
+
|
80
|
+
def initialize(initial, final)
|
81
|
+
self.initial = initial
|
82
|
+
self.final = final
|
83
|
+
end
|
84
|
+
|
85
|
+
def +(tone)
|
86
|
+
Syllable.new(initial, final, tone)
|
87
|
+
end
|
88
|
+
|
89
|
+
def inspect
|
90
|
+
"<#{self.class.name} <initial=#{initial.name}, final=#{final.name}>>"
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.illegal?(i,f)
|
94
|
+
ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
|
95
|
+
end
|
96
|
+
|
97
|
+
alias :to_s :inspect
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
# Syllable : initial, final and tone
|
102
|
+
class Syllable < TonelessSyllable
|
103
|
+
attr_accessor :tone
|
104
|
+
|
105
|
+
def initialize(initial, final, tone)
|
106
|
+
super(initial, final)
|
107
|
+
self.tone = tone
|
108
|
+
end
|
109
|
+
|
110
|
+
def inspect
|
111
|
+
"<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}>>"
|
112
|
+
end
|
113
|
+
|
114
|
+
alias :to_s :inspect
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
# Some groups of initials and finals may not be combined
|
119
|
+
# This list is not exhaustive but is sufficient to resolve ambiguity
|
120
|
+
ILLEGAL_COMBINATIONS=
|
121
|
+
[
|
122
|
+
[Initial::Group_0, Final::Group_0],
|
123
|
+
[Initial::Group_1, Final::Group_0],
|
124
|
+
[Initial::Group_2, Final::Group_0],
|
125
|
+
[Initial::Group_3, Final::Group_0],
|
126
|
+
[Initial::Group_4, Final::Group_0],
|
127
|
+
|
128
|
+
[Initial::Group_4, Final::Group_U],
|
129
|
+
[Initial::Group_4, Final::Group_A],
|
130
|
+
|
131
|
+
[Initial::Group_3, Final::Group_I],
|
132
|
+
[Initial::Group_5, Final::Group_I],
|
133
|
+
[Initial::Group_6, Final::Group_I],
|
134
|
+
|
135
|
+
[Initial::Group_1, Final::Group_V],
|
136
|
+
[Initial::Group_3, Final::Group_V],
|
137
|
+
|
138
|
+
[Initial::Group_2, [Final::O]], #Only bo, po, mo and fo are valid -o combinations
|
139
|
+
[Initial::Group_3, [Final::O]],
|
140
|
+
[Initial::Group_4, [Final::O]],
|
141
|
+
[Initial::Group_5, [Final::O]],
|
142
|
+
[Initial::Group_6, [Final::O]],
|
143
|
+
|
144
|
+
[[Initial::Empty], [Final::Ong]] # Some say ong and ueng is actually the same final, zhuyin uses the same representation, but ueng only has standalone form weng
|
145
|
+
|
146
|
+
]
|
147
|
+
|
148
|
+
end
|
data/lib/pinyin.rb
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
# Handle several romanization systems for Mandarin Chinese
|
2
|
+
#
|
3
|
+
# Author:: Arne Brasseur (pinyin@arnebrasseur.net)
|
4
|
+
# Copyright:: Copyright (c) 2007, Arne Brasseur
|
5
|
+
# Licence:: GNU General Public License, latest version
|
6
|
+
|
7
|
+
$: << File.dirname(__FILE__)
|
8
|
+
|
9
|
+
require 'support'
|
10
|
+
require 'groundwork'
|
11
|
+
require 'exception'
|
12
|
+
|
13
|
+
require 'tones'
|
14
|
+
Pinyin::Tones::All.each{|m| require 'tones/'+m}
|
15
|
+
|
16
|
+
require 'conversions'
|
17
|
+
|
18
|
+
|
19
|
+
module Pinyin
|
20
|
+
class Reader
|
21
|
+
def initialize(conv, tone)
|
22
|
+
@conv = conv.to_s #Conversions.const_get conv.to_s.camelize
|
23
|
+
@tone = Tones.const_get tone.to_s.camelize
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse(str)
|
27
|
+
Conversions.tokenize(str).map do |s, pos|
|
28
|
+
tone,syll = @tone.pop_tone(s)
|
29
|
+
tsyll = Conversions.parse(@conv,syll)
|
30
|
+
ini, fin = tsyll.initial, tsyll.final
|
31
|
+
raise ParseError.new(s,pos),"Illegal syllable <#{s}> in input <#{str}> at position #{pos}." unless tone && fin && ini
|
32
|
+
Syllable.new(ini, fin, tone)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
alias :<< :parse
|
37
|
+
end
|
38
|
+
|
39
|
+
class Writer
|
40
|
+
def initialize(conv, tone)
|
41
|
+
@conv = conv.to_s #Conversions.const_get conv.to_s.camelize
|
42
|
+
@tone = Tones.const_get tone.to_s.camelize
|
43
|
+
end
|
44
|
+
|
45
|
+
def unparse(py)
|
46
|
+
conv=lambda {|syll| @tone.add_tone(Conversions.unparse(@conv,syll),syll.tone)}
|
47
|
+
if py.respond_to? :map
|
48
|
+
py.map(&conv).join(' ')
|
49
|
+
else
|
50
|
+
conv.call(py)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
alias :<< :unparse
|
55
|
+
end
|
56
|
+
|
57
|
+
class Converter
|
58
|
+
def initialize(from, from_tone, to, to_tone)
|
59
|
+
@reader = Reader.new(from, from_tone)
|
60
|
+
@writer = Writer.new(to, to_tone)
|
61
|
+
end
|
62
|
+
|
63
|
+
def convert(str)
|
64
|
+
@writer.unparse @reader.parse(str)
|
65
|
+
end
|
66
|
+
|
67
|
+
alias :<< :convert
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
|