ting 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ module Ting
2
+
3
+ # All exceptions arising from this module inherit from Ting::Error
4
+
5
+ class Error < StandardError ; end
6
+
7
+ class ParseError < Error
8
+ attr_reader :input, :position
9
+
10
+ def initialize(input, position)
11
+ @input=input
12
+ @position=position
13
+ end
14
+ end
15
+
16
+ end
17
+
@@ -0,0 +1,177 @@
1
+ # Classes and constants used throughout the module
2
+ # * Initial
3
+ # * Final
4
+ # * TonelessSyllable
5
+ # * Syllable
6
+ # * ILLEGAL_COMBINATIONS
7
+
8
+ module Ting
9
+
10
+ #
11
+ # A Chinese initial (start of a syllable)
12
+ #
13
+
14
+ class Initial
15
+ attr :name
16
+
17
+ def initialize(n) ; @name=n ; end
18
+
19
+ All = %w(
20
+ Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
21
+ Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
22
+ ).map{|c| const_set c, Initial.new(c)}
23
+
24
+ class <<self
25
+ private :new
26
+ end
27
+
28
+ Groups=[
29
+ Group_0=[ Empty ],
30
+ Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
31
+ Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
32
+ Group_3=[ Ge,Ke,He ], #Velar
33
+ Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
34
+ Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
35
+ Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
36
+ ]
37
+
38
+ def +(f)
39
+ TonelessSyllable.new(self,f)
40
+ end
41
+
42
+ def inspect() ; "<#{self.class.name}::#{@name}>" ; end
43
+ end
44
+
45
+
46
+ #
47
+ # A Chinese final (end of a syllable)
48
+ #
49
+
50
+ class Final
51
+ attr :name
52
+
53
+ def initialize(n) ; @name=n ; end
54
+
55
+ All=%w(
56
+ Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
57
+ I Ia Io Ie Iai Iao Iu Ian In Iang Ing
58
+ U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
59
+ ).map{|c| const_set c, Final.new(c)}
60
+
61
+ class <<self ; private :new ; end
62
+
63
+ Groups=[
64
+ Group_0=[ Empty ],
65
+ Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
66
+ Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
67
+ Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
68
+ Group_V=[ V,Ue,Van,Vn,Iong]
69
+ ]
70
+
71
+ def inspect() ; "<#{self.class.name}::#{name}>" ; end
72
+ end
73
+
74
+
75
+ #
76
+ # Combination of an initial and a final
77
+ # Not to be confused with a syllable that has the neutral tone
78
+ #
79
+
80
+ class TonelessSyllable
81
+ attr_accessor :initial, :final
82
+
83
+ def initialize(initial, final)
84
+ self.initial = initial
85
+ self.final = final
86
+ end
87
+
88
+ def +(tone)
89
+ Syllable.new(initial, final, tone)
90
+ end
91
+
92
+ def inspect
93
+ "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}>>"
94
+ end
95
+
96
+ def self.illegal?(i,f)
97
+ ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
98
+ end
99
+
100
+ alias :to_s :inspect
101
+ end
102
+
103
+
104
+ #
105
+ # Syllable : initial, final and tone
106
+ #
107
+
108
+ class Syllable < TonelessSyllable
109
+ attr_accessor :tone
110
+
111
+ def initialize(initial, final, tone)
112
+ super(initial, final)
113
+ self.tone = tone
114
+ end
115
+
116
+ def inspect
117
+ "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}>>"
118
+ end
119
+
120
+ alias :to_s :inspect
121
+ end
122
+
123
+
124
+ #
125
+ # Some groups of initials and finals may not be combined
126
+ # This list is not exhaustive but is sufficient to resolve ambiguity
127
+ #
128
+
129
+ ILLEGAL_COMBINATIONS=
130
+ [
131
+ [Initial::Group_0, Final::Group_0],
132
+ [Initial::Group_1, Final::Group_0],
133
+ [Initial::Group_2, Final::Group_0],
134
+ [Initial::Group_3, Final::Group_0],
135
+ [Initial::Group_4, Final::Group_0],
136
+
137
+ [Initial::Group_4, Final::Group_U],
138
+ [Initial::Group_4, Final::Group_A],
139
+
140
+ [Initial::Group_3, Final::Group_I],
141
+ [Initial::Group_5, Final::Group_I],
142
+ [Initial::Group_6, Final::Group_I],
143
+
144
+ [Initial::Group_1, Final::Group_V],
145
+ [Initial::Group_3, Final::Group_V],
146
+
147
+ #2008.05.26 lo is also valid!
148
+ #[Initial::Group_2, [Final::O]], #Only bo, po, mo and fo are valid -o combinations
149
+ [Initial::Group_3, [Final::O]],
150
+ [Initial::Group_4, [Final::O]],
151
+ [Initial::Group_5, [Final::O]],
152
+ [Initial::Group_6, [Final::O]],
153
+
154
+ [[Initial::Empty], [Final::Ong]]
155
+ # TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
156
+ ]
157
+
158
+ class <<self
159
+
160
+ #
161
+ # Yields a block for any valid initial/final pair
162
+ #
163
+
164
+ def valid_combinations
165
+ require 'yaml'
166
+ inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
167
+ inp.each do |final, initials|
168
+ final = Final.const_get(final)
169
+ initials.each do |initial, pinyin|
170
+ initial = Initial.const_get(initial)
171
+ yield(initial, final)
172
+ end
173
+ end
174
+ end
175
+
176
+ end
177
+ end
@@ -0,0 +1,17 @@
1
+ class String
2
+ PINYIN_CACHE={}
3
+
4
+ def pretty_tones
5
+ self.gsub('u:','ü').gsub(/[A-Za-züÜ]{1,5}\d/) do |m|
6
+ m.downcase!
7
+ PINYIN_CACHE[m] || PINYIN_CACHE[m]=(Ting.writer(:hanyu, :accents) << Ting.reader(:hanyu, :numbers).parse(m.downcase))
8
+ end
9
+ end
10
+
11
+ def bpmf
12
+ self.gsub('u:','ü').scan(/[A-Za-züÜ]{1,5}\d/).map do |m|
13
+ Ting.writer(:zhuyin, :marks) <<
14
+ (Ting.reader(:hanyu, :numbers) << m.downcase)
15
+ end.join(' ')
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ class String
2
+ def chars
3
+ self.unpack('U*').map{|c| [c].pack('U')}
4
+ end
5
+
6
+ def camelcase
7
+ str = dup
8
+ str.gsub!(/(?:_+|-+)([a-z])/){ $1.upcase }
9
+ str.gsub!(/(\A|\s)([a-z])/){ $1 + $2.upcase }
10
+ str
11
+ end
12
+ end
13
+
14
+ module Kernel
15
+ def returning(s)
16
+ yield(s)
17
+ s
18
+ end
19
+ end
@@ -0,0 +1,65 @@
1
+ module Ting
2
+ #
3
+ # Base class for Tone classes
4
+ #
5
+ class Tone
6
+ VALID_TONES = 1..5
7
+ MAX_TONE = NEUTRAL_TONE = 5
8
+
9
+ class <<self
10
+ # Add a tone to a syllable
11
+ def add_tone(s,t)
12
+ s
13
+ end
14
+
15
+ # Determine the tone of a syllable
16
+ def peek_tone(s)
17
+ NEUTRAL_TONE
18
+ end
19
+
20
+ # Remove the tone from a syllable
21
+ def pop_tone(s)
22
+ [NEUTRAL_TONE, s]
23
+ end
24
+
25
+ private
26
+ # Make sure the tone number is in the valid range.
27
+ # Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
28
+ def normalize(t)
29
+ if VALID_TONES === t
30
+ t
31
+ else
32
+ t %= MAX_TONE
33
+ t = NEUTRAL_TONE if t == 0
34
+ end
35
+ end
36
+
37
+ end
38
+ end
39
+ end
40
+
41
+ # Tone marks as a separate glyph, e.g. for Bopomofo
42
+ require "ting/tones/marks"
43
+
44
+ # Tone numbers added after the syllable
45
+ require "ting/tones/numbers"
46
+
47
+ # Tone accents, for Hanyu pinyin
48
+ require "ting/tones/accents"
49
+
50
+ # Superscript numerals, for Wade-Giles
51
+ require "ting/tones/supernum"
52
+
53
+ # IPA tone symbols
54
+ require "ting/tones/ipa"
55
+
56
+ # No tones
57
+ require "ting/tones/no_tones"
58
+
59
+ module Ting
60
+ module Tones
61
+ All = [Numbers, Marks, Accents, NoTones]
62
+ VALID_TONES = 1..5
63
+ MAX_TONE = NEUTRAL_TONE = 5
64
+ end
65
+ end
@@ -0,0 +1,62 @@
1
+ module Ting
2
+ module Tones
3
+ class Accents < Tone
4
+ class <<self
5
+
6
+ UNICODE_TONE_GLYPHS={
7
+ :a=>[97, 257, 225, 462, 224],
8
+ :e=>[101, 275, 233, 283, 232],
9
+ :i=>[105, 299, 237, 464, 236],
10
+ :o=>[111, 333, 243, 466, 242],
11
+ :u=>[117, 363, 250, 468, 249],
12
+ :v=>[252, 470, 472, 474, 476]
13
+ }
14
+
15
+ def tone_glyph(letter,tone)
16
+ if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
17
+ [u].pack('U')
18
+ end
19
+ end
20
+
21
+ def add_tone(syll, tone)
22
+ syll.gsub!('ü','v')
23
+ tone %= MAX_TONE
24
+ case syll
25
+ when /a/ : syll.sub(/a/, tone_glyph(:a,tone))
26
+ when /e/ : syll.sub(/e/, tone_glyph(:e,tone))
27
+ when /o/ : syll.sub(/o/, tone_glyph(:o,tone))
28
+ when /(i|u|v)/ : syll.sub($1, tone_glyph($1,tone))
29
+ else syll
30
+ end
31
+ end
32
+
33
+ def peek_tone(syll)
34
+ unpacked = syll.unpack('U*')
35
+ each_tone_glyph do |vowel, tones|
36
+ tone_glyph=unpacked.find {|t| tones.include?(t)}
37
+ normalize( tones.index(tone_glyph) ) if tone_glyph
38
+ end
39
+ end
40
+
41
+ def pop_tone(syll)
42
+ unpacked = syll.unpack('U*')
43
+ each_tone_glyph do |vowel, tones|
44
+ if tone_glyph = unpacked.find {|t| tones.include?(t)}
45
+ unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
46
+ break [normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+ def each_tone_glyph
53
+ [:a,:e,:i,:o,:u,:v].each do |v| #Order is significant
54
+ vowel, tones = v, UNICODE_TONE_GLYPHS[v]
55
+ yield vowel,tones
56
+ end
57
+ end
58
+
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,24 @@
1
+ module Ting
2
+ module Tones
3
+ class Ipa < Tone
4
+ class <<self
5
+
6
+ GLYPHS=['', '˥˥', '˧˥', '˧˩˧', '˥˩',] #http://wapedia.mobi/en/Wikipedia:IPA_for_Mandarin
7
+
8
+ def add_tone(syll,tone)
9
+ syll + GLYPHS[normalize(tone) % 5]
10
+ end
11
+
12
+ def peek_tone(syll)
13
+ return t if t = GLYPHS.index(syll.chars[-1])
14
+ return NEUTRAL_TONE
15
+ end
16
+
17
+ def pop_tone(syll)
18
+ [ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
19
+ end
20
+
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ module Ting
2
+ module Tones
3
+ class Marks < Tone
4
+ class <<self
5
+
6
+ GLYPHS=['˙', '', 'ˊ', 'ˇ', 'ˋ']
7
+
8
+ def add_tone(syll,tone)
9
+ syll + GLYPHS[normalize(tone) % 5]
10
+ end
11
+
12
+ def peek_tone(syll)
13
+ case syll
14
+ when /ˊ/ : 2
15
+ when /ˇ/ : 3
16
+ when /ˋ/ : 4
17
+ when /˙/ : NEUTRAL_TONE
18
+ else
19
+ 1
20
+ end
21
+ end
22
+
23
+ def pop_tone(syll)
24
+ [ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
25
+ end
26
+
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,7 @@
1
+ module Ting
2
+ module Tones
3
+ class NoTones < Tone
4
+ end
5
+ end
6
+ end
7
+
@@ -0,0 +1,25 @@
1
+ module Ting
2
+ module Tones
3
+ class Numbers < Tone
4
+ class <<self
5
+
6
+ def add_tone(syll, tone)
7
+ syll + normalize(tone).to_s
8
+ end
9
+
10
+ def peek_tone(syll)
11
+ if syll =~ /(\d)\Z/
12
+ normalize Integer($1)
13
+ else
14
+ NEUTRAL_TONE
15
+ end
16
+ end
17
+
18
+ def pop_tone(syll)
19
+ [ peek_tone(syll), syll[/\A\D+/] ]
20
+ end
21
+
22
+ end
23
+ end
24
+ end
25
+ end