ting 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ module Ting
2
+
3
+ # All exceptions arising from this module inherit from Ting::Error
4
+
5
+ class Error < StandardError ; end
6
+
7
+ class ParseError < Error
8
+ attr_reader :input, :position
9
+
10
+ def initialize(input, position)
11
+ @input=input
12
+ @position=position
13
+ end
14
+ end
15
+
16
+ end
17
+
@@ -0,0 +1,177 @@
1
+ # Classes and constants used throughout the module
2
+ # * Initial
3
+ # * Final
4
+ # * TonelessSyllable
5
+ # * Syllable
6
+ # * ILLEGAL_COMBINATIONS
7
+
8
+ module Ting
9
+
10
+ #
11
+ # A Chinese initial (start of a syllable)
12
+ #
13
+
14
+ class Initial
15
+ attr :name
16
+
17
+ def initialize(n) ; @name=n ; end
18
+
19
+ All = %w(
20
+ Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
21
+ Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
22
+ ).map{|c| const_set c, Initial.new(c)}
23
+
24
+ class <<self
25
+ private :new
26
+ end
27
+
28
+ Groups=[
29
+ Group_0=[ Empty ],
30
+ Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
31
+ Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
32
+ Group_3=[ Ge,Ke,He ], #Velar
33
+ Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
34
+ Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
35
+ Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
36
+ ]
37
+
38
+ def +(f)
39
+ TonelessSyllable.new(self,f)
40
+ end
41
+
42
+ def inspect() ; "<#{self.class.name}::#{@name}>" ; end
43
+ end
44
+
45
+
46
+ #
47
+ # A Chinese final (end of a syllable)
48
+ #
49
+
50
+ class Final
51
+ attr :name
52
+
53
+ def initialize(n) ; @name=n ; end
54
+
55
+ All=%w(
56
+ Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
57
+ I Ia Io Ie Iai Iao Iu Ian In Iang Ing
58
+ U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
59
+ ).map{|c| const_set c, Final.new(c)}
60
+
61
+ class <<self ; private :new ; end
62
+
63
+ Groups=[
64
+ Group_0=[ Empty ],
65
+ Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
66
+ Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
67
+ Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
68
+ Group_V=[ V,Ue,Van,Vn,Iong]
69
+ ]
70
+
71
+ def inspect() ; "<#{self.class.name}::#{name}>" ; end
72
+ end
73
+
74
+
75
+ #
76
+ # Combination of an initial and a final
77
+ # Not to be confused with a syllable that has the neutral tone
78
+ #
79
+
80
+ class TonelessSyllable
81
+ attr_accessor :initial, :final
82
+
83
+ def initialize(initial, final)
84
+ self.initial = initial
85
+ self.final = final
86
+ end
87
+
88
+ def +(tone)
89
+ Syllable.new(initial, final, tone)
90
+ end
91
+
92
+ def inspect
93
+ "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}>>"
94
+ end
95
+
96
+ def self.illegal?(i,f)
97
+ ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
98
+ end
99
+
100
+ alias :to_s :inspect
101
+ end
102
+
103
+
104
+ #
105
+ # Syllable : initial, final and tone
106
+ #
107
+
108
+ class Syllable < TonelessSyllable
109
+ attr_accessor :tone
110
+
111
+ def initialize(initial, final, tone)
112
+ super(initial, final)
113
+ self.tone = tone
114
+ end
115
+
116
+ def inspect
117
+ "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}>>"
118
+ end
119
+
120
+ alias :to_s :inspect
121
+ end
122
+
123
+
124
+ #
125
+ # Some groups of initials and finals may not be combined
126
+ # This list is not exhaustive but is sufficient to resolve ambiguity
127
+ #
128
+
129
+ ILLEGAL_COMBINATIONS=
130
+ [
131
+ [Initial::Group_0, Final::Group_0],
132
+ [Initial::Group_1, Final::Group_0],
133
+ [Initial::Group_2, Final::Group_0],
134
+ [Initial::Group_3, Final::Group_0],
135
+ [Initial::Group_4, Final::Group_0],
136
+
137
+ [Initial::Group_4, Final::Group_U],
138
+ [Initial::Group_4, Final::Group_A],
139
+
140
+ [Initial::Group_3, Final::Group_I],
141
+ [Initial::Group_5, Final::Group_I],
142
+ [Initial::Group_6, Final::Group_I],
143
+
144
+ [Initial::Group_1, Final::Group_V],
145
+ [Initial::Group_3, Final::Group_V],
146
+
147
+ #2008.05.26 lo is also valid!
148
+ #[Initial::Group_2, [Final::O]], #Only bo, po, mo and fo are valid -o combinations
149
+ [Initial::Group_3, [Final::O]],
150
+ [Initial::Group_4, [Final::O]],
151
+ [Initial::Group_5, [Final::O]],
152
+ [Initial::Group_6, [Final::O]],
153
+
154
+ [[Initial::Empty], [Final::Ong]]
155
+ # TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
156
+ ]
157
+
158
+ class <<self
159
+
160
+ #
161
+ # Yields a block for any valid initial/final pair
162
+ #
163
+
164
+ def valid_combinations
165
+ require 'yaml'
166
+ inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
167
+ inp.each do |final, initials|
168
+ final = Final.const_get(final)
169
+ initials.each do |initial, pinyin|
170
+ initial = Initial.const_get(initial)
171
+ yield(initial, final)
172
+ end
173
+ end
174
+ end
175
+
176
+ end
177
+ end
@@ -0,0 +1,17 @@
1
+ class String
2
+ PINYIN_CACHE={}
3
+
4
+ def pretty_tones
5
+ self.gsub('u:','ü').gsub(/[A-Za-züÜ]{1,5}\d/) do |m|
6
+ m.downcase!
7
+ PINYIN_CACHE[m] || PINYIN_CACHE[m]=(Ting.writer(:hanyu, :accents) << Ting.reader(:hanyu, :numbers).parse(m.downcase))
8
+ end
9
+ end
10
+
11
+ def bpmf
12
+ self.gsub('u:','ü').scan(/[A-Za-züÜ]{1,5}\d/).map do |m|
13
+ Ting.writer(:zhuyin, :marks) <<
14
+ (Ting.reader(:hanyu, :numbers) << m.downcase)
15
+ end.join(' ')
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ class String
2
+ def chars
3
+ self.unpack('U*').map{|c| [c].pack('U')}
4
+ end
5
+
6
+ def camelcase
7
+ str = dup
8
+ str.gsub!(/(?:_+|-+)([a-z])/){ $1.upcase }
9
+ str.gsub!(/(\A|\s)([a-z])/){ $1 + $2.upcase }
10
+ str
11
+ end
12
+ end
13
+
14
+ module Kernel
15
+ def returning(s)
16
+ yield(s)
17
+ s
18
+ end
19
+ end
@@ -0,0 +1,65 @@
1
+ module Ting
2
+ #
3
+ # Base class for Tone classes
4
+ #
5
+ class Tone
6
+ VALID_TONES = 1..5
7
+ MAX_TONE = NEUTRAL_TONE = 5
8
+
9
+ class <<self
10
+ # Add a tone to a syllable
11
+ def add_tone(s,t)
12
+ s
13
+ end
14
+
15
+ # Determine the tone of a syllable
16
+ def peek_tone(s)
17
+ NEUTRAL_TONE
18
+ end
19
+
20
+ # Remove the tone from a syllable
21
+ def pop_tone(s)
22
+ [NEUTRAL_TONE, s]
23
+ end
24
+
25
+ private
26
+ # Make sure the tone number is in the valid range.
27
+ # Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
28
+ def normalize(t)
29
+ if VALID_TONES === t
30
+ t
31
+ else
32
+ t %= MAX_TONE
33
+ t = NEUTRAL_TONE if t == 0
34
+ end
35
+ end
36
+
37
+ end
38
+ end
39
+ end
40
+
41
+ # Tone marks as a separate glyph, e.g. for Bopomofo
42
+ require "ting/tones/marks"
43
+
44
+ # Tone numbers added after the syllable
45
+ require "ting/tones/numbers"
46
+
47
+ # Tone accents, for Hanyu pinyin
48
+ require "ting/tones/accents"
49
+
50
+ # Superscript numerals, for Wade-Giles
51
+ require "ting/tones/supernum"
52
+
53
+ # IPA tone symbols
54
+ require "ting/tones/ipa"
55
+
56
+ # No tones
57
+ require "ting/tones/no_tones"
58
+
59
+ module Ting
60
+ module Tones
61
+ All = [Numbers, Marks, Accents, NoTones]
62
+ VALID_TONES = 1..5
63
+ MAX_TONE = NEUTRAL_TONE = 5
64
+ end
65
+ end
@@ -0,0 +1,62 @@
1
+ module Ting
2
+ module Tones
3
+ class Accents < Tone
4
+ class <<self
5
+
6
+ UNICODE_TONE_GLYPHS={
7
+ :a=>[97, 257, 225, 462, 224],
8
+ :e=>[101, 275, 233, 283, 232],
9
+ :i=>[105, 299, 237, 464, 236],
10
+ :o=>[111, 333, 243, 466, 242],
11
+ :u=>[117, 363, 250, 468, 249],
12
+ :v=>[252, 470, 472, 474, 476]
13
+ }
14
+
15
+ def tone_glyph(letter,tone)
16
+ if (u=UNICODE_TONE_GLYPHS[letter.to_sym][tone%MAX_TONE])
17
+ [u].pack('U')
18
+ end
19
+ end
20
+
21
+ def add_tone(syll, tone)
22
+ syll.gsub!('ü','v')
23
+ tone %= MAX_TONE
24
+ case syll
25
+ when /a/ : syll.sub(/a/, tone_glyph(:a,tone))
26
+ when /e/ : syll.sub(/e/, tone_glyph(:e,tone))
27
+ when /o/ : syll.sub(/o/, tone_glyph(:o,tone))
28
+ when /(i|u|v)/ : syll.sub($1, tone_glyph($1,tone))
29
+ else syll
30
+ end
31
+ end
32
+
33
+ def peek_tone(syll)
34
+ unpacked = syll.unpack('U*')
35
+ each_tone_glyph do |vowel, tones|
36
+ tone_glyph=unpacked.find {|t| tones.include?(t)}
37
+ normalize( tones.index(tone_glyph) ) if tone_glyph
38
+ end
39
+ end
40
+
41
+ def pop_tone(syll)
42
+ unpacked = syll.unpack('U*')
43
+ each_tone_glyph do |vowel, tones|
44
+ if tone_glyph = unpacked.find {|t| tones.include?(t)}
45
+ unpacked[unpacked.index(tone_glyph)]=vowel.to_s[0]
46
+ break [normalize(tones.index(tone_glyph)), unpacked.pack('U*')]
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+ def each_tone_glyph
53
+ [:a,:e,:i,:o,:u,:v].each do |v| #Order is significant
54
+ vowel, tones = v, UNICODE_TONE_GLYPHS[v]
55
+ yield vowel,tones
56
+ end
57
+ end
58
+
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,24 @@
1
+ module Ting
2
+ module Tones
3
+ class Ipa < Tone
4
+ class <<self
5
+
6
+ GLYPHS=['', '˥˥', '˧˥', '˧˩˧', '˥˩',] #http://wapedia.mobi/en/Wikipedia:IPA_for_Mandarin
7
+
8
+ def add_tone(syll,tone)
9
+ syll + GLYPHS[normalize(tone) % 5]
10
+ end
11
+
12
+ def peek_tone(syll)
13
+ return t if t = GLYPHS.index(syll.chars[-1])
14
+ return NEUTRAL_TONE
15
+ end
16
+
17
+ def pop_tone(syll)
18
+ [ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
19
+ end
20
+
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ module Ting
2
+ module Tones
3
+ class Marks < Tone
4
+ class <<self
5
+
6
+ GLYPHS=['˙', '', 'ˊ', 'ˇ', 'ˋ']
7
+
8
+ def add_tone(syll,tone)
9
+ syll + GLYPHS[normalize(tone) % 5]
10
+ end
11
+
12
+ def peek_tone(syll)
13
+ case syll
14
+ when /ˊ/ : 2
15
+ when /ˇ/ : 3
16
+ when /ˋ/ : 4
17
+ when /˙/ : NEUTRAL_TONE
18
+ else
19
+ 1
20
+ end
21
+ end
22
+
23
+ def pop_tone(syll)
24
+ [ peek_tone(syll), syll[/\A[^#{GLYPHS.join}]+/] ]
25
+ end
26
+
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,7 @@
1
+ module Ting
2
+ module Tones
3
+ class NoTones < Tone
4
+ end
5
+ end
6
+ end
7
+
@@ -0,0 +1,25 @@
1
+ module Ting
2
+ module Tones
3
+ class Numbers < Tone
4
+ class <<self
5
+
6
+ def add_tone(syll, tone)
7
+ syll + normalize(tone).to_s
8
+ end
9
+
10
+ def peek_tone(syll)
11
+ if syll =~ /(\d)\Z/
12
+ normalize Integer($1)
13
+ else
14
+ NEUTRAL_TONE
15
+ end
16
+ end
17
+
18
+ def pop_tone(syll)
19
+ [ peek_tone(syll), syll[/\A\D+/] ]
20
+ end
21
+
22
+ end
23
+ end
24
+ end
25
+ end