ting 0.3.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,17 +1,14 @@
1
- module Ting
2
-
3
- # All exceptions arising from this module inherit from Ting::Error
4
-
5
- class Error < StandardError ; end
6
-
7
- class ParseError < Error
8
- attr_reader :input, :position
9
-
10
- def initialize(input, position)
11
- @input=input
12
- @position=position
13
- end
14
- end
15
-
16
- end
17
-
1
+ module Ting
2
+
3
+ class ParseError < StandardError
4
+ attr_reader :input, :position
5
+
6
+ def initialize(input, position, error = nil)
7
+ super(error)
8
+ @input=input
9
+ @position=position
10
+ end
11
+ end
12
+
13
+ end
14
+
@@ -1,177 +1,181 @@
1
- # Classes and constants used throughout the module
2
- # * Initial
3
- # * Final
4
- # * TonelessSyllable
5
- # * Syllable
6
- # * ILLEGAL_COMBINATIONS
7
-
8
- module Ting
9
-
10
- #
11
- # A Chinese initial (start of a syllable)
12
- #
13
-
14
- class Initial
15
- attr :name
16
-
17
- def initialize(n) ; @name=n ; end
18
-
19
- All = %w(
20
- Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
21
- Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
22
- ).map{|c| const_set c, Initial.new(c)}
23
-
24
- class <<self
25
- private :new
26
- end
27
-
28
- Groups=[
29
- Group_0=[ Empty ],
30
- Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
31
- Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
32
- Group_3=[ Ge,Ke,He ], #Velar
33
- Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
34
- Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
35
- Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
36
- ]
37
-
38
- def +(f)
39
- TonelessSyllable.new(self,f)
40
- end
41
-
42
- def inspect() ; "<#{self.class.name}::#{@name}>" ; end
43
- end
44
-
45
-
46
- #
47
- # A Chinese final (end of a syllable)
48
- #
49
-
50
- class Final
51
- attr :name
52
-
53
- def initialize(n) ; @name=n ; end
54
-
55
- All=%w(
56
- Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
57
- I Ia Io Ie Iai Iao Iu Ian In Iang Ing
58
- U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
59
- ).map{|c| const_set c, Final.new(c)}
60
-
61
- class <<self ; private :new ; end
62
-
63
- Groups=[
64
- Group_0=[ Empty ],
65
- Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
66
- Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
67
- Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
68
- Group_V=[ V,Ue,Van,Vn,Iong]
69
- ]
70
-
71
- def inspect() ; "<#{self.class.name}::#{name}>" ; end
72
- end
73
-
74
-
75
- #
76
- # Combination of an initial and a final
77
- # Not to be confused with a syllable that has the neutral tone
78
- #
79
-
80
- class TonelessSyllable
81
- attr_accessor :initial, :final
82
-
83
- def initialize(initial, final)
84
- self.initial = initial
85
- self.final = final
86
- end
87
-
88
- def +(tone)
89
- Syllable.new(initial, final, tone)
90
- end
91
-
92
- def inspect
93
- "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}>>"
94
- end
95
-
96
- def self.illegal?(i,f)
97
- ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
98
- end
99
-
100
- alias :to_s :inspect
101
- end
102
-
103
-
104
- #
105
- # Syllable : initial, final and tone
106
- #
107
-
108
- class Syllable < TonelessSyllable
109
- attr_accessor :tone
110
-
111
- def initialize(initial, final, tone)
112
- super(initial, final)
113
- self.tone = tone
114
- end
115
-
116
- def inspect
117
- "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}>>"
118
- end
119
-
120
- alias :to_s :inspect
121
- end
122
-
123
-
124
- #
125
- # Some groups of initials and finals may not be combined
126
- # This list is not exhaustive but is sufficient to resolve ambiguity
127
- #
128
-
129
- ILLEGAL_COMBINATIONS=
130
- [
131
- [Initial::Group_0, Final::Group_0],
132
- [Initial::Group_1, Final::Group_0],
133
- [Initial::Group_2, Final::Group_0],
134
- [Initial::Group_3, Final::Group_0],
135
- [Initial::Group_4, Final::Group_0],
136
-
137
- [Initial::Group_4, Final::Group_U],
138
- [Initial::Group_4, Final::Group_A],
139
-
140
- [Initial::Group_3, Final::Group_I],
141
- [Initial::Group_5, Final::Group_I],
142
- [Initial::Group_6, Final::Group_I],
143
-
144
- [Initial::Group_1, Final::Group_V],
145
- [Initial::Group_3, Final::Group_V],
146
-
147
- # For "咯 / lo5" to parse correctly we need to list "Le + O" as valid,
148
- [Initial::Group_2 - [Initial::Le], [Final::O]], #Only bo, po, mo and fo are valid -o combinations
149
- [Initial::Group_3, [Final::O]],
150
- [Initial::Group_4, [Final::O]],
151
- [Initial::Group_5, [Final::O]],
152
- [Initial::Group_6, [Final::O]],
153
-
154
- [[Initial::Empty], [Final::Ong]]
155
- # TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
156
- ]
157
-
158
- class <<self
159
-
160
- #
161
- # Yields a block for any valid initial/final pair
162
- #
163
-
164
- def valid_combinations
165
- require 'yaml'
166
- inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
167
- inp.each do |final, initials|
168
- final = Final.const_get(final)
169
- initials.each do |initial, pinyin|
170
- initial = Initial.const_get(initial)
171
- yield(initial, final)
172
- end
173
- end
174
- end
175
-
176
- end
177
- end
1
+ # -*- coding: utf-8 -*-
2
+ # Classes and constants used throughout the module
3
+ # * Initial
4
+ # * Final
5
+ # * Syllable
6
+ # * ILLEGAL_COMBINATIONS
7
+
8
+ require 'yaml'
9
+
10
+ module Ting
11
+
12
+ #
13
+ # A Chinese initial (start of a syllable)
14
+ #
15
+
16
+ class Initial
17
+ attr :name
18
+
19
+ def initialize(n) ; @name=n ; end
20
+
21
+ All = %w(
22
+ Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
23
+ Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
24
+ ).map{|c| const_set c, Initial.new(c)}
25
+
26
+ class << self
27
+ private :new
28
+ include Enumerable
29
+ def each(&blk) ; All.each(&blk) ; end
30
+ end
31
+
32
+ Groups=[
33
+ Group_0=[ Empty ],
34
+ Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
35
+ Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
36
+ Group_3=[ Ge,Ke,He ], #Velar
37
+ Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
38
+ Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
39
+ Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
40
+ ]
41
+
42
+ def +(f)
43
+ Syllable.new(self,f)
44
+ end
45
+
46
+ def inspect() ; "<#{self.class.name}::#{@name}>" ; end
47
+ end
48
+
49
+
50
+ #
51
+ # A Chinese final (end of a syllable)
52
+ #
53
+
54
+ class Final
55
+ attr :name
56
+
57
+ def initialize(n) ; @name=n ; end
58
+
59
+ All=%w(
60
+ Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
61
+ I Ia Io Ie Iai Iao Iu Ian In Iang Ing
62
+ U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
63
+ ).map{|c| const_set c, Final.new(c)}
64
+
65
+ class << self
66
+ private :new
67
+ include Enumerable
68
+ def each(&blk) ; All.each(&blk) ; end
69
+ end
70
+
71
+ Groups=[
72
+ Group_0=[ Empty ],
73
+ Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
74
+ Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
75
+ Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
76
+ Group_V=[ V,Ue,Van,Vn,Iong]
77
+ ]
78
+
79
+ def inspect() ; "<#{self.class.name}::#{name}>" ; end
80
+ end
81
+
82
+
83
+ #
84
+ # Combination of an initial and a final, a tone, and possible capitalization
85
+ # A tone of 'nil' means the tone is not specified
86
+
87
+ class Syllable
88
+ attr_accessor :initial, :final, :tone, :capitalized
89
+
90
+ def initialize(initial, final, tone = nil, capitalized = false)
91
+ self.initial = initial
92
+ self.final = final
93
+ self.tone = tone
94
+ self.capitalized = capitalized
95
+ end
96
+
97
+ def +(tone)
98
+ self.class.new(self.initial, self.final, tone, self.capitalized)
99
+ end
100
+
101
+ def inspect
102
+ "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}#{', capitalized' if capitalized}>>"
103
+ end
104
+
105
+ alias :capitalized? :capitalized
106
+
107
+ def self.illegal?(i,f)
108
+ ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
109
+ end
110
+
111
+ alias :to_s :inspect
112
+
113
+ def ==( other )
114
+ [ other.initial, other.final, other.tone, other.capitalized ] ==
115
+ [ self.initial, self.final, self.tone, self.capitalized ]
116
+ end
117
+ end
118
+
119
+ #
120
+ # Some groups of initials and finals may not be combined
121
+ # This list is not exhaustive but is sufficient to resolve ambiguity
122
+ #
123
+
124
+ ILLEGAL_COMBINATIONS=
125
+ [
126
+ [Initial::Group_0, Final::Group_0],
127
+ [Initial::Group_1, Final::Group_0],
128
+ [Initial::Group_2, Final::Group_0],
129
+ [Initial::Group_3, Final::Group_0],
130
+ [Initial::Group_4, Final::Group_0],
131
+
132
+ [Initial::Group_4, Final::Group_U],
133
+ [Initial::Group_4, Final::Group_A],
134
+
135
+ [Initial::Group_3, Final::Group_I],
136
+ [Initial::Group_5, Final::Group_I],
137
+ [Initial::Group_6, Final::Group_I],
138
+
139
+ [Initial::Group_1, Final::Group_V],
140
+ [Initial::Group_3, Final::Group_V],
141
+
142
+ # For "咯 / lo5" to parse correctly we need to list "Le + O" as valid,
143
+ [Initial::Group_2 - [Initial::Le], [Final::O]], #Only bo, po, mo and fo are valid -o combinations
144
+ [Initial::Group_3, [Final::O]],
145
+ [Initial::Group_4, [Final::O]],
146
+ [Initial::Group_5, [Final::O]],
147
+ [Initial::Group_6, [Final::O]],
148
+
149
+ [[Initial::Empty], [Final::Ong]]
150
+ # TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
151
+ ]
152
+
153
+ class << self
154
+
155
+ #
156
+ # Yields a block for any valid initial/final pair
157
+ #
158
+
159
+ def valid_combinations( &blk )
160
+ return to_enum(__message__) unless block_given?
161
+ inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
162
+ inp.each do |final, initials|
163
+ final = Final.const_get(final)
164
+ initials.each do |initial, pinyin|
165
+ initial = Initial.const_get(initial)
166
+ yield [initial, final]
167
+ end
168
+ end
169
+ end
170
+
171
+ def all_syllables( &blk )
172
+ return to_enum(__message__) unless block_given?
173
+ valid_combinations.map do |i,f|
174
+ 1.upto(5) do |t|
175
+ yield Syllable.new(i,f,t,false)
176
+ yield Syllable.new(i,f,t,true)
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,7 @@
1
+ module Ting
2
+ module Procable
3
+ def to_proc
4
+ method(:call).to_proc
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,27 @@
1
+ module Ting
2
+ class Reader
3
+ include Procable
4
+
5
+ def initialize(conv, tone)
6
+ @conv = conv.to_s
7
+ @tone = Tones.const_get Ting.camelize(tone.to_s)
8
+ end
9
+
10
+ def parse(str)
11
+ Conversions.tokenize(str).map do |token, pos|
12
+ tone, syll = @tone.pop_tone(token)
13
+ tsyll = Conversions.parse(@conv, syll)
14
+ ini, fin = tsyll.initial, tsyll.final
15
+ unless tone && fin && ini
16
+ raise ParseError.new(token, pos),"Illegal syllable <#{token}> in input <#{str}> at position #{pos}."
17
+ end
18
+ tsyll + tone
19
+ end
20
+ rescue Object => e
21
+ raise ParseError.new(str, 0, e), "Parsing of #{str.inspect} failed : #{e}"
22
+ end
23
+
24
+ alias :<< :parse
25
+ alias :call :parse
26
+ end
27
+ end
@@ -1,19 +1,4 @@
1
1
  # coding: utf-8
2
2
 
3
3
  class String
4
- PINYIN_CACHE={}
5
-
6
- def pretty_tones
7
- self.gsub('u:','ü').gsub(/[A-Za-züÜ]{1,5}\d/) do |m|
8
- m.downcase!
9
- PINYIN_CACHE[m] || PINYIN_CACHE[m]=(Ting.writer(:hanyu, :accents) << Ting.reader(:hanyu, :numbers).parse(m.downcase))
10
- end
11
- end
12
-
13
- def bpmf
14
- self.gsub('u:','ü').scan(/[A-Za-züÜ]{1,5}\d/).map do |m|
15
- Ting.writer(:zhuyin, :marks) <<
16
- (Ting.reader(:hanyu, :numbers) << m.downcase)
17
- end.join(' ')
18
- end
19
4
  end
@@ -1,65 +1,65 @@
1
- module Ting
2
- #
3
- # Base class for Tone classes
4
- #
5
- class Tone
6
- VALID_TONES = 1..5
7
- MAX_TONE = NEUTRAL_TONE = 5
8
-
9
- class <<self
10
- # Add a tone to a syllable
11
- def add_tone(s,t)
12
- s
13
- end
14
-
15
- # Determine the tone of a syllable
16
- def peek_tone(s)
17
- NEUTRAL_TONE
18
- end
19
-
20
- # Remove the tone from a syllable
21
- def pop_tone(s)
22
- [NEUTRAL_TONE, s]
23
- end
24
-
25
- private
26
- # Make sure the tone number is in the valid range.
27
- # Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
28
- def normalize(t)
29
- if VALID_TONES === t
30
- t
31
- else
32
- t %= MAX_TONE
33
- t = NEUTRAL_TONE if t == 0
34
- end
35
- end
36
-
37
- end
38
- end
39
- end
40
-
41
- # Tone marks as a separate glyph, e.g. for Bopomofo
42
- require "ting/tones/marks"
43
-
44
- # Tone numbers added after the syllable
45
- require "ting/tones/numbers"
46
-
47
- # Tone accents, for Hanyu pinyin
48
- require "ting/tones/accents"
49
-
50
- # Superscript numerals, for Wade-Giles
51
- require "ting/tones/supernum"
52
-
53
- # IPA tone symbols
54
- require "ting/tones/ipa"
55
-
56
- # No tones
57
- require "ting/tones/no_tones"
58
-
59
- module Ting
60
- module Tones
61
- All = [Numbers, Marks, Accents, NoTones]
62
- VALID_TONES = 1..5
63
- MAX_TONE = NEUTRAL_TONE = 5
64
- end
65
- end
1
+ module Ting
2
+ #
3
+ # Base class for Tone classes
4
+ #
5
+ class Tone
6
+ VALID_TONES = 1..5
7
+ MAX_TONE = NEUTRAL_TONE = 5
8
+
9
+ class << self
10
+ # Add a tone to a syllable
11
+ def add_tone(s,t)
12
+ s
13
+ end
14
+
15
+ # Determine the tone of a syllable
16
+ def peek_tone(s)
17
+ NEUTRAL_TONE
18
+ end
19
+
20
+ # Remove the tone from a syllable
21
+ def pop_tone(s)
22
+ [NEUTRAL_TONE, s]
23
+ end
24
+
25
+ private
26
+ # Make sure the tone number is in the valid range.
27
+ # Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
28
+ def normalize(t)
29
+ if VALID_TONES === t
30
+ t
31
+ else
32
+ t %= MAX_TONE
33
+ t = NEUTRAL_TONE if t == 0
34
+ end
35
+ end
36
+
37
+ end
38
+ end
39
+ end
40
+
41
+ # Tone marks as a separate glyph, e.g. for Bopomofo
42
+ require "ting/tones/marks"
43
+
44
+ # Tone numbers added after the syllable
45
+ require "ting/tones/numbers"
46
+
47
+ # Tone accents, for Hanyu pinyin
48
+ require "ting/tones/accents"
49
+
50
+ # Superscript numerals, for Wade-Giles
51
+ require "ting/tones/supernum"
52
+
53
+ # IPA tone symbols
54
+ require "ting/tones/ipa"
55
+
56
+ # No tones
57
+ require "ting/tones/no_tones"
58
+
59
+ module Ting
60
+ module Tones
61
+ All = [Numbers, Marks, Accents, NoTones]
62
+ VALID_TONES = 1..5
63
+ MAX_TONE = NEUTRAL_TONE = 5
64
+ end
65
+ end