ting 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +16 -0
- data/README.rdoc +94 -0
- data/Rakefile +15 -0
- data/TODO +15 -0
- data/examples/cgiform/cgiform.rb +24 -0
- data/examples/cgiform/template.rhtml +69 -0
- data/examples/hello.rb +12 -0
- data/lib/ting.rb +93 -0
- data/lib/ting/conversion.rb +51 -0
- data/lib/ting/conversions.rb +75 -0
- data/lib/ting/conversions/hanyu.rb +77 -0
- data/lib/ting/data/comparison.csv +410 -0
- data/lib/ting/data/final.csv +10 -0
- data/lib/ting/data/initial.csv +7 -0
- data/lib/ting/data/paladiy.txt +421 -0
- data/lib/ting/data/rules.yaml +24 -0
- data/lib/ting/data/valid_pinyin.yaml +454 -0
- data/lib/ting/exception.rb +17 -0
- data/lib/ting/groundwork.rb +177 -0
- data/lib/ting/string.rb +17 -0
- data/lib/ting/support.rb +19 -0
- data/lib/ting/tones.rb +65 -0
- data/lib/ting/tones/accents.rb +62 -0
- data/lib/ting/tones/ipa.rb +24 -0
- data/lib/ting/tones/marks.rb +30 -0
- data/lib/ting/tones/no_tones.rb +7 -0
- data/lib/ting/tones/numbers.rb +25 -0
- data/lib/ting/tones/supernum.rb +24 -0
- data/test/test_comparison.rb +35 -0
- data/test/test_hanyu_coverage.rb +35 -0
- metadata +95 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
module Ting
|
2
|
+
|
3
|
+
# All exceptions arising from this module inherit from Ting::Error
|
4
|
+
|
5
|
+
class Error < StandardError ; end
|
6
|
+
|
7
|
+
class ParseError < Error
|
8
|
+
attr_reader :input, :position
|
9
|
+
|
10
|
+
def initialize(input, position)
|
11
|
+
@input=input
|
12
|
+
@position=position
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
@@ -0,0 +1,177 @@
|
|
1
|
+
# Classes and constants used throughout the module
|
2
|
+
# * Initial
|
3
|
+
# * Final
|
4
|
+
# * TonelessSyllable
|
5
|
+
# * Syllable
|
6
|
+
# * ILLEGAL_COMBINATIONS
|
7
|
+
|
8
|
+
module Ting
|
9
|
+
|
10
|
+
#
|
11
|
+
# A Chinese initial (start of a syllable)
|
12
|
+
#
|
13
|
+
|
14
|
+
class Initial
|
15
|
+
attr :name
|
16
|
+
|
17
|
+
def initialize(n) ; @name=n ; end
|
18
|
+
|
19
|
+
All = %w(
|
20
|
+
Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
|
21
|
+
Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
|
22
|
+
).map{|c| const_set c, Initial.new(c)}
|
23
|
+
|
24
|
+
class <<self
|
25
|
+
private :new
|
26
|
+
end
|
27
|
+
|
28
|
+
Groups=[
|
29
|
+
Group_0=[ Empty ],
|
30
|
+
Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
|
31
|
+
Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
|
32
|
+
Group_3=[ Ge,Ke,He ], #Velar
|
33
|
+
Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
|
34
|
+
Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
|
35
|
+
Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
|
36
|
+
]
|
37
|
+
|
38
|
+
def +(f)
|
39
|
+
TonelessSyllable.new(self,f)
|
40
|
+
end
|
41
|
+
|
42
|
+
def inspect() ; "<#{self.class.name}::#{@name}>" ; end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
#
|
47
|
+
# A Chinese final (end of a syllable)
|
48
|
+
#
|
49
|
+
|
50
|
+
class Final
|
51
|
+
attr :name
|
52
|
+
|
53
|
+
def initialize(n) ; @name=n ; end
|
54
|
+
|
55
|
+
All=%w(
|
56
|
+
Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
|
57
|
+
I Ia Io Ie Iai Iao Iu Ian In Iang Ing
|
58
|
+
U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
|
59
|
+
).map{|c| const_set c, Final.new(c)}
|
60
|
+
|
61
|
+
class <<self ; private :new ; end
|
62
|
+
|
63
|
+
Groups=[
|
64
|
+
Group_0=[ Empty ],
|
65
|
+
Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
|
66
|
+
Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
|
67
|
+
Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
|
68
|
+
Group_V=[ V,Ue,Van,Vn,Iong]
|
69
|
+
]
|
70
|
+
|
71
|
+
def inspect() ; "<#{self.class.name}::#{name}>" ; end
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
#
|
76
|
+
# Combination of an initial and a final
|
77
|
+
# Not to be confused with a syllable that has the neutral tone
|
78
|
+
#
|
79
|
+
|
80
|
+
class TonelessSyllable
|
81
|
+
attr_accessor :initial, :final
|
82
|
+
|
83
|
+
def initialize(initial, final)
|
84
|
+
self.initial = initial
|
85
|
+
self.final = final
|
86
|
+
end
|
87
|
+
|
88
|
+
def +(tone)
|
89
|
+
Syllable.new(initial, final, tone)
|
90
|
+
end
|
91
|
+
|
92
|
+
def inspect
|
93
|
+
"<#{self.class.name} <initial=#{initial.name}, final=#{final.name}>>"
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.illegal?(i,f)
|
97
|
+
ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
|
98
|
+
end
|
99
|
+
|
100
|
+
alias :to_s :inspect
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
#
|
105
|
+
# Syllable : initial, final and tone
|
106
|
+
#
|
107
|
+
|
108
|
+
class Syllable < TonelessSyllable
|
109
|
+
attr_accessor :tone
|
110
|
+
|
111
|
+
def initialize(initial, final, tone)
|
112
|
+
super(initial, final)
|
113
|
+
self.tone = tone
|
114
|
+
end
|
115
|
+
|
116
|
+
def inspect
|
117
|
+
"<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}>>"
|
118
|
+
end
|
119
|
+
|
120
|
+
alias :to_s :inspect
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
#
|
125
|
+
# Some groups of initials and finals may not be combined
|
126
|
+
# This list is not exhaustive but is sufficient to resolve ambiguity
|
127
|
+
#
|
128
|
+
|
129
|
+
ILLEGAL_COMBINATIONS=
|
130
|
+
[
|
131
|
+
[Initial::Group_0, Final::Group_0],
|
132
|
+
[Initial::Group_1, Final::Group_0],
|
133
|
+
[Initial::Group_2, Final::Group_0],
|
134
|
+
[Initial::Group_3, Final::Group_0],
|
135
|
+
[Initial::Group_4, Final::Group_0],
|
136
|
+
|
137
|
+
[Initial::Group_4, Final::Group_U],
|
138
|
+
[Initial::Group_4, Final::Group_A],
|
139
|
+
|
140
|
+
[Initial::Group_3, Final::Group_I],
|
141
|
+
[Initial::Group_5, Final::Group_I],
|
142
|
+
[Initial::Group_6, Final::Group_I],
|
143
|
+
|
144
|
+
[Initial::Group_1, Final::Group_V],
|
145
|
+
[Initial::Group_3, Final::Group_V],
|
146
|
+
|
147
|
+
#2008.05.26 lo is also valid!
|
148
|
+
#[Initial::Group_2, [Final::O]], #Only bo, po, mo and fo are valid -o combinations
|
149
|
+
[Initial::Group_3, [Final::O]],
|
150
|
+
[Initial::Group_4, [Final::O]],
|
151
|
+
[Initial::Group_5, [Final::O]],
|
152
|
+
[Initial::Group_6, [Final::O]],
|
153
|
+
|
154
|
+
[[Initial::Empty], [Final::Ong]]
|
155
|
+
# TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
|
156
|
+
]
|
157
|
+
|
158
|
+
class <<self
|
159
|
+
|
160
|
+
#
|
161
|
+
# Yields a block for any valid initial/final pair
|
162
|
+
#
|
163
|
+
|
164
|
+
def valid_combinations
|
165
|
+
require 'yaml'
|
166
|
+
inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
|
167
|
+
inp.each do |final, initials|
|
168
|
+
final = Final.const_get(final)
|
169
|
+
initials.each do |initial, pinyin|
|
170
|
+
initial = Initial.const_get(initial)
|
171
|
+
yield(initial, final)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
177
|
+
end
|
data/lib/ting/string.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
class String
|
2
|
+
PINYIN_CACHE={}
|
3
|
+
|
4
|
+
def pretty_tones
|
5
|
+
self.gsub('u:','ü').gsub(/[A-Za-züÜ]{1,5}\d/) do |m|
|
6
|
+
m.downcase!
|
7
|
+
PINYIN_CACHE[m] || PINYIN_CACHE[m]=(Ting.writer(:hanyu, :accents) << Ting.reader(:hanyu, :numbers).parse(m.downcase))
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def bpmf
|
12
|
+
self.gsub('u:','ü').scan(/[A-Za-züÜ]{1,5}\d/).map do |m|
|
13
|
+
Ting.writer(:zhuyin, :marks) <<
|
14
|
+
(Ting.reader(:hanyu, :numbers) << m.downcase)
|
15
|
+
end.join(' ')
|
16
|
+
end
|
17
|
+
end
|
data/lib/ting/support.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
class String
|
2
|
+
def chars
|
3
|
+
self.unpack('U*').map{|c| [c].pack('U')}
|
4
|
+
end
|
5
|
+
|
6
|
+
def camelcase
|
7
|
+
str = dup
|
8
|
+
str.gsub!(/(?:_+|-+)([a-z])/){ $1.upcase }
|
9
|
+
str.gsub!(/(\A|\s)([a-z])/){ $1 + $2.upcase }
|
10
|
+
str
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
module Kernel
|
15
|
+
def returning(s)
|
16
|
+
yield(s)
|
17
|
+
s
|
18
|
+
end
|
19
|
+
end
|
data/lib/ting/tones.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
module Ting
|
2
|
+
#
|
3
|
+
# Base class for Tone classes
|
4
|
+
#
|
5
|
+
class Tone
|
6
|
+
VALID_TONES = 1..5
|
7
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
8
|
+
|
9
|
+
class <<self
|
10
|
+
# Add a tone to a syllable
|
11
|
+
def add_tone(s,t)
|
12
|
+
s
|
13
|
+
end
|
14
|
+
|
15
|
+
# Determine the tone of a syllable
|
16
|
+
def peek_tone(s)
|
17
|
+
NEUTRAL_TONE
|
18
|
+
end
|
19
|
+
|
20
|
+
# Remove the tone from a syllable
|
21
|
+
def pop_tone(s)
|
22
|
+
[NEUTRAL_TONE, s]
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
# Make sure the tone number is in the valid range.
|
27
|
+
# Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
|
28
|
+
def normalize(t)
|
29
|
+
if VALID_TONES === t
|
30
|
+
t
|
31
|
+
else
|
32
|
+
t %= MAX_TONE
|
33
|
+
t = NEUTRAL_TONE if t == 0
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Tone marks as a separate glyph, e.g. for Bopomofo
|
42
|
+
require "ting/tones/marks"
|
43
|
+
|
44
|
+
# Tone numbers added after the syllable
|
45
|
+
require "ting/tones/numbers"
|
46
|
+
|
47
|
+
# Tone accents, for Hanyu pinyin
|
48
|
+
require "ting/tones/accents"
|
49
|
+
|
50
|
+
# Superscript numerals, for Wade-Giles
|
51
|
+
require "ting/tones/supernum"
|
52
|
+
|
53
|
+
# IPA tone symbols
|
54
|
+
require "ting/tones/ipa"
|
55
|
+
|
56
|
+
# No tones
|
57
|
+
require "ting/tones/no_tones"
|
58
|
+
|
59
|
+
module Ting
|
60
|
+
module Tones
|
61
|
+
All = [Numbers, Marks, Accents, NoTones]
|
62
|
+
VALID_TONES = 1..5
|
63
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Ting
|
2
|
+
module Tones
|
3
|
+
class Accents < Tone
|
4
|
+
class <<self
|
5
|
+
|
6
|
+
UNICODE_TONE_GLYPHS={
|
7
|
+
:a=>[97, 257, 225, 462, 224],
|
8
|
+
:e=>[101, 275, 233, 283, 232],
|
9
|
+
:i=>[105, 299, 237, 464, 236],
|
10
|
+
:o=>[111, 333, 243, 466, 242],
|
11
|
+
:u=>[117, 363, 250, 468, 249],
|
12
|
+
:v=>[252, 470, 472, 474, 476]
|
13
|
+
}
|
14
|
+
|
15
|
+
def tone_glyph(letter,tone)
|
16
|
+
if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
|
17
|
+
[u].pack('U')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_tone(syll, tone)
|
22
|
+
syll.gsub!('ü','v')
|
23
|
+
tone %= MAX_TONE
|
24
|
+
case syll
|
25
|
+
when /a/ : syll.sub(/a/, tone_glyph(:a,tone))
|
26
|
+
when /e/ : syll.sub(/e/, tone_glyph(:e,tone))
|
27
|
+
when /o/ : syll.sub(/o/, tone_glyph(:o,tone))
|
28
|
+
when /(i|u|v)/ : syll.sub($1, tone_glyph($1,tone))
|
29
|
+
else syll
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def peek_tone(syll)
|
34
|
+
unpacked = syll.unpack('U*')
|
35
|
+
each_tone_glyph do |vowel, tones|
|
36
|
+
tone_glyph=unpacked.find {|t| tones.include?(t)}
|
37
|
+
normalize( tones.index(tone_glyph) ) if tone_glyph
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def pop_tone(syll)
|
42
|
+
unpacked = syll.unpack('U*')
|
43
|
+
each_tone_glyph do |vowel, tones|
|
44
|
+
if tone_glyph = unpacked.find {|t| tones.include?(t)}
|
45
|
+
unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
|
46
|
+
break [normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def each_tone_glyph
|
53
|
+
[:a,:e,:i,:o,:u,:v].each do |v| #Order is significant
|
54
|
+
vowel, tones = v, UNICODE_TONE_GLYPHS[v]
|
55
|
+
yield vowel,tones
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Ting
|
2
|
+
module Tones
|
3
|
+
class Ipa < Tone
|
4
|
+
class <<self
|
5
|
+
|
6
|
+
GLYPHS=['', '˥˥', '˧˥', '˧˩˧', '˥˩',] #http://wapedia.mobi/en/Wikipedia:IPA_for_Mandarin
|
7
|
+
|
8
|
+
def add_tone(syll,tone)
|
9
|
+
syll + GLYPHS[normalize(tone) % 5]
|
10
|
+
end
|
11
|
+
|
12
|
+
def peek_tone(syll)
|
13
|
+
return t if t = GLYPHS.index(syll.chars[-1])
|
14
|
+
return NEUTRAL_TONE
|
15
|
+
end
|
16
|
+
|
17
|
+
def pop_tone(syll)
|
18
|
+
[ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Ting
|
2
|
+
module Tones
|
3
|
+
class Marks < Tone
|
4
|
+
class <<self
|
5
|
+
|
6
|
+
GLYPHS=['˙', '', 'ˊ', 'ˇ', 'ˋ']
|
7
|
+
|
8
|
+
def add_tone(syll,tone)
|
9
|
+
syll + GLYPHS[normalize(tone) % 5]
|
10
|
+
end
|
11
|
+
|
12
|
+
def peek_tone(syll)
|
13
|
+
case syll
|
14
|
+
when /ˊ/ : 2
|
15
|
+
when /ˇ/ : 3
|
16
|
+
when /ˋ/ : 4
|
17
|
+
when /˙/ : NEUTRAL_TONE
|
18
|
+
else
|
19
|
+
1
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def pop_tone(syll)
|
24
|
+
[ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Ting
|
2
|
+
module Tones
|
3
|
+
class Numbers < Tone
|
4
|
+
class <<self
|
5
|
+
|
6
|
+
def add_tone(syll, tone)
|
7
|
+
syll + normalize(tone).to_s
|
8
|
+
end
|
9
|
+
|
10
|
+
def peek_tone(syll)
|
11
|
+
if syll =~ /(\d)\Z/
|
12
|
+
normalize Integer($1)
|
13
|
+
else
|
14
|
+
NEUTRAL_TONE
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def pop_tone(syll)
|
19
|
+
[ peek_tone(syll), syll[/\A\D+/] ]
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|