ting 0.3.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,14 @@
1
- module Ting
2
-
3
- # All exceptions arising from this module inherit from Ting::Error
4
-
5
- class Error < StandardError ; end
6
-
7
- class ParseError < Error
8
- attr_reader :input, :position
9
-
10
- def initialize(input, position)
11
- @input=input
12
- @position=position
13
- end
14
- end
15
-
16
- end
17
-
1
+ module Ting
2
+
3
+ class ParseError < StandardError
4
+ attr_reader :input, :position
5
+
6
+ def initialize(input, position, error = nil)
7
+ super(error)
8
+ @input=input
9
+ @position=position
10
+ end
11
+ end
12
+
13
+ end
14
+
@@ -1,177 +1,181 @@
1
- # Classes and constants used throughout the module
2
- # * Initial
3
- # * Final
4
- # * TonelessSyllable
5
- # * Syllable
6
- # * ILLEGAL_COMBINATIONS
7
-
8
- module Ting
9
-
10
- #
11
- # A Chinese initial (start of a syllable)
12
- #
13
-
14
- class Initial
15
- attr :name
16
-
17
- def initialize(n) ; @name=n ; end
18
-
19
- All = %w(
20
- Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
21
- Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
22
- ).map{|c| const_set c, Initial.new(c)}
23
-
24
- class <<self
25
- private :new
26
- end
27
-
28
- Groups=[
29
- Group_0=[ Empty ],
30
- Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
31
- Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
32
- Group_3=[ Ge,Ke,He ], #Velar
33
- Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
34
- Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
35
- Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
36
- ]
37
-
38
- def +(f)
39
- TonelessSyllable.new(self,f)
40
- end
41
-
42
- def inspect() ; "<#{self.class.name}::#{@name}>" ; end
43
- end
44
-
45
-
46
- #
47
- # A Chinese final (end of a syllable)
48
- #
49
-
50
- class Final
51
- attr :name
52
-
53
- def initialize(n) ; @name=n ; end
54
-
55
- All=%w(
56
- Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
57
- I Ia Io Ie Iai Iao Iu Ian In Iang Ing
58
- U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
59
- ).map{|c| const_set c, Final.new(c)}
60
-
61
- class <<self ; private :new ; end
62
-
63
- Groups=[
64
- Group_0=[ Empty ],
65
- Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
66
- Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
67
- Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
68
- Group_V=[ V,Ue,Van,Vn,Iong]
69
- ]
70
-
71
- def inspect() ; "<#{self.class.name}::#{name}>" ; end
72
- end
73
-
74
-
75
- #
76
- # Combination of an initial and a final
77
- # Not to be confused with a syllable that has the neutral tone
78
- #
79
-
80
- class TonelessSyllable
81
- attr_accessor :initial, :final
82
-
83
- def initialize(initial, final)
84
- self.initial = initial
85
- self.final = final
86
- end
87
-
88
- def +(tone)
89
- Syllable.new(initial, final, tone)
90
- end
91
-
92
- def inspect
93
- "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}>>"
94
- end
95
-
96
- def self.illegal?(i,f)
97
- ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
98
- end
99
-
100
- alias :to_s :inspect
101
- end
102
-
103
-
104
- #
105
- # Syllable : initial, final and tone
106
- #
107
-
108
- class Syllable < TonelessSyllable
109
- attr_accessor :tone
110
-
111
- def initialize(initial, final, tone)
112
- super(initial, final)
113
- self.tone = tone
114
- end
115
-
116
- def inspect
117
- "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}>>"
118
- end
119
-
120
- alias :to_s :inspect
121
- end
122
-
123
-
124
- #
125
- # Some groups of initials and finals may not be combined
126
- # This list is not exhaustive but is sufficient to resolve ambiguity
127
- #
128
-
129
- ILLEGAL_COMBINATIONS=
130
- [
131
- [Initial::Group_0, Final::Group_0],
132
- [Initial::Group_1, Final::Group_0],
133
- [Initial::Group_2, Final::Group_0],
134
- [Initial::Group_3, Final::Group_0],
135
- [Initial::Group_4, Final::Group_0],
136
-
137
- [Initial::Group_4, Final::Group_U],
138
- [Initial::Group_4, Final::Group_A],
139
-
140
- [Initial::Group_3, Final::Group_I],
141
- [Initial::Group_5, Final::Group_I],
142
- [Initial::Group_6, Final::Group_I],
143
-
144
- [Initial::Group_1, Final::Group_V],
145
- [Initial::Group_3, Final::Group_V],
146
-
147
- # For "咯 / lo5" to parse correctly we need to list "Le + O" as valid,
148
- [Initial::Group_2 - [Initial::Le], [Final::O]], #Only bo, po, mo and fo are valid -o combinations
149
- [Initial::Group_3, [Final::O]],
150
- [Initial::Group_4, [Final::O]],
151
- [Initial::Group_5, [Final::O]],
152
- [Initial::Group_6, [Final::O]],
153
-
154
- [[Initial::Empty], [Final::Ong]]
155
- # TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
156
- ]
157
-
158
- class <<self
159
-
160
- #
161
- # Yields a block for any valid initial/final pair
162
- #
163
-
164
- def valid_combinations
165
- require 'yaml'
166
- inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
167
- inp.each do |final, initials|
168
- final = Final.const_get(final)
169
- initials.each do |initial, pinyin|
170
- initial = Initial.const_get(initial)
171
- yield(initial, final)
172
- end
173
- end
174
- end
175
-
176
- end
177
- end
1
+ # -*- coding: utf-8 -*-
2
+ # Classes and constants used throughout the module
3
+ # * Initial
4
+ # * Final
5
+ # * Syllable
6
+ # * ILLEGAL_COMBINATIONS
7
+
8
+ require 'yaml'
9
+
10
+ module Ting
11
+
12
+ #
13
+ # A Chinese initial (start of a syllable)
14
+ #
15
+
16
+ class Initial
17
+ attr :name
18
+
19
+ def initialize(n) ; @name=n ; end
20
+
21
+ All = %w(
22
+ Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
23
+ Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
24
+ ).map{|c| const_set c, Initial.new(c)}
25
+
26
+ class << self
27
+ private :new
28
+ include Enumerable
29
+ def each(&blk) ; All.each(&blk) ; end
30
+ end
31
+
32
+ Groups=[
33
+ Group_0=[ Empty ],
34
+ Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
35
+ Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
36
+ Group_3=[ Ge,Ke,He ], #Velar
37
+ Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
38
+ Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
39
+ Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
40
+ ]
41
+
42
+ def +(f)
43
+ Syllable.new(self,f)
44
+ end
45
+
46
+ def inspect() ; "<#{self.class.name}::#{@name}>" ; end
47
+ end
48
+
49
+
50
+ #
51
+ # A Chinese final (end of a syllable)
52
+ #
53
+
54
+ class Final
55
+ attr :name
56
+
57
+ def initialize(n) ; @name=n ; end
58
+
59
+ All=%w(
60
+ Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
61
+ I Ia Io Ie Iai Iao Iu Ian In Iang Ing
62
+ U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
63
+ ).map{|c| const_set c, Final.new(c)}
64
+
65
+ class << self
66
+ private :new
67
+ include Enumerable
68
+ def each(&blk) ; All.each(&blk) ; end
69
+ end
70
+
71
+ Groups=[
72
+ Group_0=[ Empty ],
73
+ Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
74
+ Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
75
+ Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
76
+ Group_V=[ V,Ue,Van,Vn,Iong]
77
+ ]
78
+
79
+ def inspect() ; "<#{self.class.name}::#{name}>" ; end
80
+ end
81
+
82
+
83
+ #
84
+ # Combination of an initial and a final, a tone, and possible capitalization
85
+ # A tone of 'nil' means the tone is not specified
86
+
87
+ class Syllable
88
+ attr_accessor :initial, :final, :tone, :capitalized
89
+
90
+ def initialize(initial, final, tone = nil, capitalized = false)
91
+ self.initial = initial
92
+ self.final = final
93
+ self.tone = tone
94
+ self.capitalized = capitalized
95
+ end
96
+
97
+ def +(tone)
98
+ self.class.new(self.initial, self.final, tone, self.capitalized)
99
+ end
100
+
101
+ def inspect
102
+ "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}#{', capitalized' if capitalized}>>"
103
+ end
104
+
105
+ alias :capitalized? :capitalized
106
+
107
+ def self.illegal?(i,f)
108
+ ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
109
+ end
110
+
111
+ alias :to_s :inspect
112
+
113
+ def ==( other )
114
+ [ other.initial, other.final, other.tone, other.capitalized ] ==
115
+ [ self.initial, self.final, self.tone, self.capitalized ]
116
+ end
117
+ end
118
+
119
+ #
120
+ # Some groups of initials and finals may not be combined
121
+ # This list is not exhaustive but is sufficient to resolve ambiguity
122
+ #
123
+
124
+ ILLEGAL_COMBINATIONS=
125
+ [
126
+ [Initial::Group_0, Final::Group_0],
127
+ [Initial::Group_1, Final::Group_0],
128
+ [Initial::Group_2, Final::Group_0],
129
+ [Initial::Group_3, Final::Group_0],
130
+ [Initial::Group_4, Final::Group_0],
131
+
132
+ [Initial::Group_4, Final::Group_U],
133
+ [Initial::Group_4, Final::Group_A],
134
+
135
+ [Initial::Group_3, Final::Group_I],
136
+ [Initial::Group_5, Final::Group_I],
137
+ [Initial::Group_6, Final::Group_I],
138
+
139
+ [Initial::Group_1, Final::Group_V],
140
+ [Initial::Group_3, Final::Group_V],
141
+
142
+ # For "咯 / lo5" to parse correctly we need to list "Le + O" as valid,
143
+ [Initial::Group_2 - [Initial::Le], [Final::O]], #Only bo, po, mo and fo are valid -o combinations
144
+ [Initial::Group_3, [Final::O]],
145
+ [Initial::Group_4, [Final::O]],
146
+ [Initial::Group_5, [Final::O]],
147
+ [Initial::Group_6, [Final::O]],
148
+
149
+ [[Initial::Empty], [Final::Ong]]
150
+ # TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
151
+ ]
152
+
153
+ class << self
154
+
155
+ #
156
+ # Yields a block for any valid initial/final pair
157
+ #
158
+
159
+ def valid_combinations( &blk )
160
+ return to_enum(__message__) unless block_given?
161
+ inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
162
+ inp.each do |final, initials|
163
+ final = Final.const_get(final)
164
+ initials.each do |initial, pinyin|
165
+ initial = Initial.const_get(initial)
166
+ yield [initial, final]
167
+ end
168
+ end
169
+ end
170
+
171
+ def all_syllables( &blk )
172
+ return to_enum(__message__) unless block_given?
173
+ valid_combinations.map do |i,f|
174
+ 1.upto(5) do |t|
175
+ yield Syllable.new(i,f,t,false)
176
+ yield Syllable.new(i,f,t,true)
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,7 @@
1
+ module Ting
2
+ module Procable
3
+ def to_proc
4
+ method(:call).to_proc
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,27 @@
1
+ module Ting
2
+ class Reader
3
+ include Procable
4
+
5
+ def initialize(conv, tone)
6
+ @conv = conv.to_s
7
+ @tone = Tones.const_get Ting.camelize(tone.to_s)
8
+ end
9
+
10
+ def parse(str)
11
+ Conversions.tokenize(str).map do |token, pos|
12
+ tone, syll = @tone.pop_tone(token)
13
+ tsyll = Conversions.parse(@conv, syll)
14
+ ini, fin = tsyll.initial, tsyll.final
15
+ unless tone && fin && ini
16
+ raise ParseError.new(token, pos),"Illegal syllable <#{token}> in input <#{str}> at position #{pos}."
17
+ end
18
+ tsyll + tone
19
+ end
20
+ rescue Object => e
21
+ raise ParseError.new(str, 0, e), "Parsing of #{str.inspect} failed : #{e}"
22
+ end
23
+
24
+ alias :<< :parse
25
+ alias :call :parse
26
+ end
27
+ end
@@ -1,19 +1,4 @@
1
1
  # coding: utf-8
2
2
 
3
3
  class String
4
- PINYIN_CACHE={}
5
-
6
- def pretty_tones
7
- self.gsub('u:','ü').gsub(/[A-Za-züÜ]{1,5}\d/) do |m|
8
- m.downcase!
9
- PINYIN_CACHE[m] || PINYIN_CACHE[m]=(Ting.writer(:hanyu, :accents) << Ting.reader(:hanyu, :numbers).parse(m.downcase))
10
- end
11
- end
12
-
13
- def bpmf
14
- self.gsub('u:','ü').scan(/[A-Za-züÜ]{1,5}\d/).map do |m|
15
- Ting.writer(:zhuyin, :marks) <<
16
- (Ting.reader(:hanyu, :numbers) << m.downcase)
17
- end.join(' ')
18
- end
19
4
  end
@@ -1,65 +1,65 @@
1
- module Ting
2
- #
3
- # Base class for Tone classes
4
- #
5
- class Tone
6
- VALID_TONES = 1..5
7
- MAX_TONE = NEUTRAL_TONE = 5
8
-
9
- class <<self
10
- # Add a tone to a syllable
11
- def add_tone(s,t)
12
- s
13
- end
14
-
15
- # Determine the tone of a syllable
16
- def peek_tone(s)
17
- NEUTRAL_TONE
18
- end
19
-
20
- # Remove the tone from a syllable
21
- def pop_tone(s)
22
- [NEUTRAL_TONE, s]
23
- end
24
-
25
- private
26
- # Make sure the tone number is in the valid range.
27
- # Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
28
- def normalize(t)
29
- if VALID_TONES === t
30
- t
31
- else
32
- t %= MAX_TONE
33
- t = NEUTRAL_TONE if t == 0
34
- end
35
- end
36
-
37
- end
38
- end
39
- end
40
-
41
- # Tone marks as a separate glyph, e.g. for Bopomofo
42
- require "ting/tones/marks"
43
-
44
- # Tone numbers added after the syllable
45
- require "ting/tones/numbers"
46
-
47
- # Tone accents, for Hanyu pinyin
48
- require "ting/tones/accents"
49
-
50
- # Superscript numerals, for Wade-Giles
51
- require "ting/tones/supernum"
52
-
53
- # IPA tone symbols
54
- require "ting/tones/ipa"
55
-
56
- # No tones
57
- require "ting/tones/no_tones"
58
-
59
- module Ting
60
- module Tones
61
- All = [Numbers, Marks, Accents, NoTones]
62
- VALID_TONES = 1..5
63
- MAX_TONE = NEUTRAL_TONE = 5
64
- end
65
- end
1
+ module Ting
2
+ #
3
+ # Base class for Tone classes
4
+ #
5
+ class Tone
6
+ VALID_TONES = 1..5
7
+ MAX_TONE = NEUTRAL_TONE = 5
8
+
9
+ class << self
10
+ # Add a tone to a syllable
11
+ def add_tone(s,t)
12
+ s
13
+ end
14
+
15
+ # Determine the tone of a syllable
16
+ def peek_tone(s)
17
+ NEUTRAL_TONE
18
+ end
19
+
20
+ # Remove the tone from a syllable
21
+ def pop_tone(s)
22
+ [NEUTRAL_TONE, s]
23
+ end
24
+
25
+ private
26
+ # Make sure the tone number is in the valid range.
27
+ # Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
28
+ def normalize(t)
29
+ if VALID_TONES === t
30
+ t
31
+ else
32
+ t %= MAX_TONE
33
+ t = NEUTRAL_TONE if t == 0
34
+ end
35
+ end
36
+
37
+ end
38
+ end
39
+ end
40
+
41
+ # Tone marks as a separate glyph, e.g. for Bopomofo
42
+ require "ting/tones/marks"
43
+
44
+ # Tone numbers added after the syllable
45
+ require "ting/tones/numbers"
46
+
47
+ # Tone accents, for Hanyu pinyin
48
+ require "ting/tones/accents"
49
+
50
+ # Superscript numerals, for Wade-Giles
51
+ require "ting/tones/supernum"
52
+
53
+ # IPA tone symbols
54
+ require "ting/tones/ipa"
55
+
56
+ # No tones
57
+ require "ting/tones/no_tones"
58
+
59
+ module Ting
60
+ module Tones
61
+ All = [Numbers, Marks, Accents, NoTones]
62
+ VALID_TONES = 1..5
63
+ MAX_TONE = NEUTRAL_TONE = 5
64
+ end
65
+ end