ting 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +16 -0
- data/README.rdoc +94 -0
- data/Rakefile +15 -0
- data/TODO +15 -0
- data/examples/cgiform/cgiform.rb +24 -0
- data/examples/cgiform/template.rhtml +69 -0
- data/examples/hello.rb +12 -0
- data/lib/ting.rb +93 -0
- data/lib/ting/conversion.rb +51 -0
- data/lib/ting/conversions.rb +75 -0
- data/lib/ting/conversions/hanyu.rb +77 -0
- data/lib/ting/data/comparison.csv +410 -0
- data/lib/ting/data/final.csv +10 -0
- data/lib/ting/data/initial.csv +7 -0
- data/lib/ting/data/paladiy.txt +421 -0
- data/lib/ting/data/rules.yaml +24 -0
- data/lib/ting/data/valid_pinyin.yaml +454 -0
- data/lib/ting/exception.rb +17 -0
- data/lib/ting/groundwork.rb +177 -0
- data/lib/ting/string.rb +17 -0
- data/lib/ting/support.rb +19 -0
- data/lib/ting/tones.rb +65 -0
- data/lib/ting/tones/accents.rb +62 -0
- data/lib/ting/tones/ipa.rb +24 -0
- data/lib/ting/tones/marks.rb +30 -0
- data/lib/ting/tones/no_tones.rb +7 -0
- data/lib/ting/tones/numbers.rb +25 -0
- data/lib/ting/tones/supernum.rb +24 -0
- data/test/test_comparison.rb +35 -0
- data/test/test_hanyu_coverage.rb +35 -0
- metadata +95 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
module Ting
|
2
|
+
|
3
|
+
# All exceptions arising from this module inherit from Ting::Error
|
4
|
+
|
5
|
+
class Error < StandardError ; end
|
6
|
+
|
7
|
+
class ParseError < Error
|
8
|
+
attr_reader :input, :position
|
9
|
+
|
10
|
+
def initialize(input, position)
|
11
|
+
@input=input
|
12
|
+
@position=position
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
@@ -0,0 +1,177 @@
|
|
1
|
+
# Classes and constants used throughout the module
|
2
|
+
# * Initial
|
3
|
+
# * Final
|
4
|
+
# * TonelessSyllable
|
5
|
+
# * Syllable
|
6
|
+
# * ILLEGAL_COMBINATIONS
|
7
|
+
|
8
|
+
module Ting
|
9
|
+
|
10
|
+
#
|
11
|
+
# A Chinese initial (start of a syllable)
|
12
|
+
#
|
13
|
+
|
14
|
+
class Initial
|
15
|
+
attr :name
|
16
|
+
|
17
|
+
def initialize(n) ; @name=n ; end
|
18
|
+
|
19
|
+
All = %w(
|
20
|
+
Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
|
21
|
+
Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
|
22
|
+
).map{|c| const_set c, Initial.new(c)}
|
23
|
+
|
24
|
+
class <<self
|
25
|
+
private :new
|
26
|
+
end
|
27
|
+
|
28
|
+
Groups=[
|
29
|
+
Group_0=[ Empty ],
|
30
|
+
Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
|
31
|
+
Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
|
32
|
+
Group_3=[ Ge,Ke,He ], #Velar
|
33
|
+
Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
|
34
|
+
Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
|
35
|
+
Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
|
36
|
+
]
|
37
|
+
|
38
|
+
def +(f)
|
39
|
+
TonelessSyllable.new(self,f)
|
40
|
+
end
|
41
|
+
|
42
|
+
def inspect() ; "<#{self.class.name}::#{@name}>" ; end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
#
|
47
|
+
# A Chinese final (end of a syllable)
|
48
|
+
#
|
49
|
+
|
50
|
+
class Final
|
51
|
+
attr :name
|
52
|
+
|
53
|
+
def initialize(n) ; @name=n ; end
|
54
|
+
|
55
|
+
All=%w(
|
56
|
+
Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
|
57
|
+
I Ia Io Ie Iai Iao Iu Ian In Iang Ing
|
58
|
+
U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
|
59
|
+
).map{|c| const_set c, Final.new(c)}
|
60
|
+
|
61
|
+
class <<self ; private :new ; end
|
62
|
+
|
63
|
+
Groups=[
|
64
|
+
Group_0=[ Empty ],
|
65
|
+
Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
|
66
|
+
Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
|
67
|
+
Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
|
68
|
+
Group_V=[ V,Ue,Van,Vn,Iong]
|
69
|
+
]
|
70
|
+
|
71
|
+
def inspect() ; "<#{self.class.name}::#{name}>" ; end
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
#
|
76
|
+
# Combination of an initial and a final
|
77
|
+
# Not to be confused with a syllable that has the neutral tone
|
78
|
+
#
|
79
|
+
|
80
|
+
class TonelessSyllable
|
81
|
+
attr_accessor :initial, :final
|
82
|
+
|
83
|
+
def initialize(initial, final)
|
84
|
+
self.initial = initial
|
85
|
+
self.final = final
|
86
|
+
end
|
87
|
+
|
88
|
+
def +(tone)
|
89
|
+
Syllable.new(initial, final, tone)
|
90
|
+
end
|
91
|
+
|
92
|
+
def inspect
|
93
|
+
"<#{self.class.name} <initial=#{initial.name}, final=#{final.name}>>"
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.illegal?(i,f)
|
97
|
+
ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
|
98
|
+
end
|
99
|
+
|
100
|
+
alias :to_s :inspect
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
#
|
105
|
+
# Syllable : initial, final and tone
|
106
|
+
#
|
107
|
+
|
108
|
+
class Syllable < TonelessSyllable
|
109
|
+
attr_accessor :tone
|
110
|
+
|
111
|
+
def initialize(initial, final, tone)
|
112
|
+
super(initial, final)
|
113
|
+
self.tone = tone
|
114
|
+
end
|
115
|
+
|
116
|
+
def inspect
|
117
|
+
"<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}>>"
|
118
|
+
end
|
119
|
+
|
120
|
+
alias :to_s :inspect
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
#
|
125
|
+
# Some groups of initials and finals may not be combined
|
126
|
+
# This list is not exhaustive but is sufficient to resolve ambiguity
|
127
|
+
#
|
128
|
+
|
129
|
+
ILLEGAL_COMBINATIONS=
|
130
|
+
[
|
131
|
+
[Initial::Group_0, Final::Group_0],
|
132
|
+
[Initial::Group_1, Final::Group_0],
|
133
|
+
[Initial::Group_2, Final::Group_0],
|
134
|
+
[Initial::Group_3, Final::Group_0],
|
135
|
+
[Initial::Group_4, Final::Group_0],
|
136
|
+
|
137
|
+
[Initial::Group_4, Final::Group_U],
|
138
|
+
[Initial::Group_4, Final::Group_A],
|
139
|
+
|
140
|
+
[Initial::Group_3, Final::Group_I],
|
141
|
+
[Initial::Group_5, Final::Group_I],
|
142
|
+
[Initial::Group_6, Final::Group_I],
|
143
|
+
|
144
|
+
[Initial::Group_1, Final::Group_V],
|
145
|
+
[Initial::Group_3, Final::Group_V],
|
146
|
+
|
147
|
+
#2008.05.26 lo is also valid!
|
148
|
+
#[Initial::Group_2, [Final::O]], #Only bo, po, mo and fo are valid -o combinations
|
149
|
+
[Initial::Group_3, [Final::O]],
|
150
|
+
[Initial::Group_4, [Final::O]],
|
151
|
+
[Initial::Group_5, [Final::O]],
|
152
|
+
[Initial::Group_6, [Final::O]],
|
153
|
+
|
154
|
+
[[Initial::Empty], [Final::Ong]]
|
155
|
+
# TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
|
156
|
+
]
|
157
|
+
|
158
|
+
class <<self
|
159
|
+
|
160
|
+
#
|
161
|
+
# Yields a block for any valid initial/final pair
|
162
|
+
#
|
163
|
+
|
164
|
+
def valid_combinations
|
165
|
+
require 'yaml'
|
166
|
+
inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
|
167
|
+
inp.each do |final, initials|
|
168
|
+
final = Final.const_get(final)
|
169
|
+
initials.each do |initial, pinyin|
|
170
|
+
initial = Initial.const_get(initial)
|
171
|
+
yield(initial, final)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
177
|
+
end
|
data/lib/ting/string.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
class String
|
2
|
+
PINYIN_CACHE={}
|
3
|
+
|
4
|
+
def pretty_tones
|
5
|
+
self.gsub('u:','ü').gsub(/[A-Za-züÜ]{1,5}\d/) do |m|
|
6
|
+
m.downcase!
|
7
|
+
PINYIN_CACHE[m] || PINYIN_CACHE[m]=(Ting.writer(:hanyu, :accents) << Ting.reader(:hanyu, :numbers).parse(m.downcase))
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def bpmf
|
12
|
+
self.gsub('u:','ü').scan(/[A-Za-züÜ]{1,5}\d/).map do |m|
|
13
|
+
Ting.writer(:zhuyin, :marks) <<
|
14
|
+
(Ting.reader(:hanyu, :numbers) << m.downcase)
|
15
|
+
end.join(' ')
|
16
|
+
end
|
17
|
+
end
|
data/lib/ting/support.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
class String
|
2
|
+
def chars
|
3
|
+
self.unpack('U*').map{|c| [c].pack('U')}
|
4
|
+
end
|
5
|
+
|
6
|
+
def camelcase
|
7
|
+
str = dup
|
8
|
+
str.gsub!(/(?:_+|-+)([a-z])/){ $1.upcase }
|
9
|
+
str.gsub!(/(\A|\s)([a-z])/){ $1 + $2.upcase }
|
10
|
+
str
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
module Kernel
|
15
|
+
def returning(s)
|
16
|
+
yield(s)
|
17
|
+
s
|
18
|
+
end
|
19
|
+
end
|
data/lib/ting/tones.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
module Ting
|
2
|
+
#
|
3
|
+
# Base class for Tone classes
|
4
|
+
#
|
5
|
+
class Tone
|
6
|
+
VALID_TONES = 1..5
|
7
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
8
|
+
|
9
|
+
class <<self
|
10
|
+
# Add a tone to a syllable
|
11
|
+
def add_tone(s,t)
|
12
|
+
s
|
13
|
+
end
|
14
|
+
|
15
|
+
# Determine the tone of a syllable
|
16
|
+
def peek_tone(s)
|
17
|
+
NEUTRAL_TONE
|
18
|
+
end
|
19
|
+
|
20
|
+
# Remove the tone from a syllable
|
21
|
+
def pop_tone(s)
|
22
|
+
[NEUTRAL_TONE, s]
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
# Make sure the tone number is in the valid range.
|
27
|
+
# Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
|
28
|
+
def normalize(t)
|
29
|
+
if VALID_TONES === t
|
30
|
+
t
|
31
|
+
else
|
32
|
+
t %= MAX_TONE
|
33
|
+
t = NEUTRAL_TONE if t == 0
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Tone marks as a separate glyph, e.g. for Bopomofo
|
42
|
+
require "ting/tones/marks"
|
43
|
+
|
44
|
+
# Tone numbers added after the syllable
|
45
|
+
require "ting/tones/numbers"
|
46
|
+
|
47
|
+
# Tone accents, for Hanyu pinyin
|
48
|
+
require "ting/tones/accents"
|
49
|
+
|
50
|
+
# Superscript numerals, for Wade-Giles
|
51
|
+
require "ting/tones/supernum"
|
52
|
+
|
53
|
+
# IPA tone symbols
|
54
|
+
require "ting/tones/ipa"
|
55
|
+
|
56
|
+
# No tones
|
57
|
+
require "ting/tones/no_tones"
|
58
|
+
|
59
|
+
module Ting
|
60
|
+
module Tones
|
61
|
+
All = [Numbers, Marks, Accents, NoTones]
|
62
|
+
VALID_TONES = 1..5
|
63
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Ting
|
2
|
+
module Tones
|
3
|
+
class Accents < Tone
|
4
|
+
class <<self
|
5
|
+
|
6
|
+
UNICODE_TONE_GLYPHS={
|
7
|
+
:a=>[97, 257, 225, 462, 224],
|
8
|
+
:e=>[101, 275, 233, 283, 232],
|
9
|
+
:i=>[105, 299, 237, 464, 236],
|
10
|
+
:o=>[111, 333, 243, 466, 242],
|
11
|
+
:u=>[117, 363, 250, 468, 249],
|
12
|
+
:v=>[252, 470, 472, 474, 476]
|
13
|
+
}
|
14
|
+
|
15
|
+
def tone_glyph(letter,tone)
|
16
|
+
if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
|
17
|
+
[u].pack('U')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_tone(syll, tone)
|
22
|
+
syll.gsub!('ü','v')
|
23
|
+
tone %= MAX_TONE
|
24
|
+
case syll
|
25
|
+
when /a/ : syll.sub(/a/, tone_glyph(:a,tone))
|
26
|
+
when /e/ : syll.sub(/e/, tone_glyph(:e,tone))
|
27
|
+
when /o/ : syll.sub(/o/, tone_glyph(:o,tone))
|
28
|
+
when /(i|u|v)/ : syll.sub($1, tone_glyph($1,tone))
|
29
|
+
else syll
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def peek_tone(syll)
|
34
|
+
unpacked = syll.unpack('U*')
|
35
|
+
each_tone_glyph do |vowel, tones|
|
36
|
+
tone_glyph=unpacked.find {|t| tones.include?(t)}
|
37
|
+
normalize( tones.index(tone_glyph) ) if tone_glyph
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def pop_tone(syll)
|
42
|
+
unpacked = syll.unpack('U*')
|
43
|
+
each_tone_glyph do |vowel, tones|
|
44
|
+
if tone_glyph = unpacked.find {|t| tones.include?(t)}
|
45
|
+
unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
|
46
|
+
break [normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def each_tone_glyph
|
53
|
+
[:a,:e,:i,:o,:u,:v].each do |v| #Order is significant
|
54
|
+
vowel, tones = v, UNICODE_TONE_GLYPHS[v]
|
55
|
+
yield vowel,tones
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Ting
|
2
|
+
module Tones
|
3
|
+
class Ipa < Tone
|
4
|
+
class <<self
|
5
|
+
|
6
|
+
GLYPHS=['', '˥˥', '˧˥', '˧˩˧', '˥˩',] #http://wapedia.mobi/en/Wikipedia:IPA_for_Mandarin
|
7
|
+
|
8
|
+
def add_tone(syll,tone)
|
9
|
+
syll + GLYPHS[normalize(tone) % 5]
|
10
|
+
end
|
11
|
+
|
12
|
+
def peek_tone(syll)
|
13
|
+
return t if t = GLYPHS.index(syll.chars[-1])
|
14
|
+
return NEUTRAL_TONE
|
15
|
+
end
|
16
|
+
|
17
|
+
def pop_tone(syll)
|
18
|
+
[ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Ting
|
2
|
+
module Tones
|
3
|
+
class Marks < Tone
|
4
|
+
class <<self
|
5
|
+
|
6
|
+
GLYPHS=['˙', '', 'ˊ', 'ˇ', 'ˋ']
|
7
|
+
|
8
|
+
def add_tone(syll,tone)
|
9
|
+
syll + GLYPHS[normalize(tone) % 5]
|
10
|
+
end
|
11
|
+
|
12
|
+
def peek_tone(syll)
|
13
|
+
case syll
|
14
|
+
when /ˊ/ : 2
|
15
|
+
when /ˇ/ : 3
|
16
|
+
when /ˋ/ : 4
|
17
|
+
when /˙/ : NEUTRAL_TONE
|
18
|
+
else
|
19
|
+
1
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def pop_tone(syll)
|
24
|
+
[ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Ting
|
2
|
+
module Tones
|
3
|
+
class Numbers < Tone
|
4
|
+
class <<self
|
5
|
+
|
6
|
+
def add_tone(syll, tone)
|
7
|
+
syll + normalize(tone).to_s
|
8
|
+
end
|
9
|
+
|
10
|
+
def peek_tone(syll)
|
11
|
+
if syll =~ /(\d)\Z/
|
12
|
+
normalize Integer($1)
|
13
|
+
else
|
14
|
+
NEUTRAL_TONE
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def pop_tone(syll)
|
19
|
+
[ peek_tone(syll), syll[/\A\D+/] ]
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|