ting 0.3.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.travis.yml +13 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +235 -0
- data/LICENSE.txt +674 -0
- data/{README.rdoc → README.md} +43 -35
- data/Rakefile +28 -15
- data/TODO +16 -15
- data/examples/hello.rb +12 -12
- data/lib/ting.rb +36 -61
- data/lib/ting/conversion.rb +6 -5
- data/lib/ting/conversions.rb +88 -80
- data/lib/ting/conversions/hanyu.rb +5 -9
- data/lib/ting/converter.rb +30 -0
- data/lib/ting/data/comparison.csv +410 -410
- data/lib/ting/data/final.csv +12 -10
- data/lib/ting/data/initial.csv +8 -7
- data/lib/ting/data/paladiy.txt +421 -421
- data/lib/ting/data/rules.yaml +38 -27
- data/lib/ting/data/valid_pinyin.yaml +454 -453
- data/lib/ting/exception.rb +14 -17
- data/lib/ting/groundwork.rb +181 -177
- data/lib/ting/procable.rb +7 -0
- data/lib/ting/reader.rb +27 -0
- data/lib/ting/string.rb +0 -15
- data/lib/ting/tones.rb +65 -65
- data/lib/ting/tones/accents.rb +75 -69
- data/lib/ting/tones/ipa.rb +1 -1
- data/lib/ting/tones/no_tones.rb +7 -7
- data/lib/ting/tones/numbers.rb +25 -25
- data/lib/ting/tones/supernum.rb +1 -1
- data/lib/ting/version.rb +1 -1
- data/lib/ting/writer.rb +23 -0
- data/spec/jruby_csv_spec.rb +78 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/ting_spec.rb +19 -0
- data/test/test_comparison.rb +43 -35
- data/test/test_hanyu_coverage.rb +42 -37
- data/ting.gemspec +23 -0
- metadata +95 -71
- data/examples/cgiform/cgiform.rb +0 -24
- data/examples/cgiform/template.rhtml +0 -69
- data/lib/ting/support.rb +0 -19
data/lib/ting/exception.rb
CHANGED
@@ -1,17 +1,14 @@
|
|
1
|
-
module Ting
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
end
|
17
|
-
|
1
|
+
module Ting
|
2
|
+
|
3
|
+
class ParseError < StandardError
|
4
|
+
attr_reader :input, :position
|
5
|
+
|
6
|
+
def initialize(input, position, error = nil)
|
7
|
+
super(error)
|
8
|
+
@input=input
|
9
|
+
@position=position
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
data/lib/ting/groundwork.rb
CHANGED
@@ -1,177 +1,181 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
# *
|
4
|
-
# *
|
5
|
-
# * Syllable
|
6
|
-
# * ILLEGAL_COMBINATIONS
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
#
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
def
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
[Initial::
|
133
|
-
[Initial::
|
134
|
-
|
135
|
-
[Initial::
|
136
|
-
|
137
|
-
[Initial::
|
138
|
-
|
139
|
-
|
140
|
-
[Initial::Group_3, Final::
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
[Initial::
|
145
|
-
[Initial::
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
[Initial::
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
end
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Classes and constants used throughout the module
|
3
|
+
# * Initial
|
4
|
+
# * Final
|
5
|
+
# * Syllable
|
6
|
+
# * ILLEGAL_COMBINATIONS
|
7
|
+
|
8
|
+
require 'yaml'
|
9
|
+
|
10
|
+
module Ting
|
11
|
+
|
12
|
+
#
|
13
|
+
# A Chinese initial (start of a syllable)
|
14
|
+
#
|
15
|
+
|
16
|
+
class Initial
|
17
|
+
attr :name
|
18
|
+
|
19
|
+
def initialize(n) ; @name=n ; end
|
20
|
+
|
21
|
+
All = %w(
|
22
|
+
Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
|
23
|
+
Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
|
24
|
+
).map{|c| const_set c, Initial.new(c)}
|
25
|
+
|
26
|
+
class << self
|
27
|
+
private :new
|
28
|
+
include Enumerable
|
29
|
+
def each(&blk) ; All.each(&blk) ; end
|
30
|
+
end
|
31
|
+
|
32
|
+
Groups=[
|
33
|
+
Group_0=[ Empty ],
|
34
|
+
Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
|
35
|
+
Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
|
36
|
+
Group_3=[ Ge,Ke,He ], #Velar
|
37
|
+
Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
|
38
|
+
Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
|
39
|
+
Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
|
40
|
+
]
|
41
|
+
|
42
|
+
def +(f)
|
43
|
+
Syllable.new(self,f)
|
44
|
+
end
|
45
|
+
|
46
|
+
def inspect() ; "<#{self.class.name}::#{@name}>" ; end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
#
|
51
|
+
# A Chinese final (end of a syllable)
|
52
|
+
#
|
53
|
+
|
54
|
+
class Final
|
55
|
+
attr :name
|
56
|
+
|
57
|
+
def initialize(n) ; @name=n ; end
|
58
|
+
|
59
|
+
All=%w(
|
60
|
+
Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
|
61
|
+
I Ia Io Ie Iai Iao Iu Ian In Iang Ing
|
62
|
+
U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
|
63
|
+
).map{|c| const_set c, Final.new(c)}
|
64
|
+
|
65
|
+
class << self
|
66
|
+
private :new
|
67
|
+
include Enumerable
|
68
|
+
def each(&blk) ; All.each(&blk) ; end
|
69
|
+
end
|
70
|
+
|
71
|
+
Groups=[
|
72
|
+
Group_0=[ Empty ],
|
73
|
+
Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
|
74
|
+
Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
|
75
|
+
Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
|
76
|
+
Group_V=[ V,Ue,Van,Vn,Iong]
|
77
|
+
]
|
78
|
+
|
79
|
+
def inspect() ; "<#{self.class.name}::#{name}>" ; end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
#
|
84
|
+
# Combination of an initial and a final, a tone, and possible capitalization
|
85
|
+
# A tone of 'nil' means the tone is not specified
|
86
|
+
|
87
|
+
class Syllable
|
88
|
+
attr_accessor :initial, :final, :tone, :capitalized
|
89
|
+
|
90
|
+
def initialize(initial, final, tone = nil, capitalized = false)
|
91
|
+
self.initial = initial
|
92
|
+
self.final = final
|
93
|
+
self.tone = tone
|
94
|
+
self.capitalized = capitalized
|
95
|
+
end
|
96
|
+
|
97
|
+
def +(tone)
|
98
|
+
self.class.new(self.initial, self.final, tone, self.capitalized)
|
99
|
+
end
|
100
|
+
|
101
|
+
def inspect
|
102
|
+
"<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}#{', capitalized' if capitalized}>>"
|
103
|
+
end
|
104
|
+
|
105
|
+
alias :capitalized? :capitalized
|
106
|
+
|
107
|
+
def self.illegal?(i,f)
|
108
|
+
ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
|
109
|
+
end
|
110
|
+
|
111
|
+
alias :to_s :inspect
|
112
|
+
|
113
|
+
def ==( other )
|
114
|
+
[ other.initial, other.final, other.tone, other.capitalized ] ==
|
115
|
+
[ self.initial, self.final, self.tone, self.capitalized ]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
#
|
120
|
+
# Some groups of initials and finals may not be combined
|
121
|
+
# This list is not exhaustive but is sufficient to resolve ambiguity
|
122
|
+
#
|
123
|
+
|
124
|
+
ILLEGAL_COMBINATIONS=
|
125
|
+
[
|
126
|
+
[Initial::Group_0, Final::Group_0],
|
127
|
+
[Initial::Group_1, Final::Group_0],
|
128
|
+
[Initial::Group_2, Final::Group_0],
|
129
|
+
[Initial::Group_3, Final::Group_0],
|
130
|
+
[Initial::Group_4, Final::Group_0],
|
131
|
+
|
132
|
+
[Initial::Group_4, Final::Group_U],
|
133
|
+
[Initial::Group_4, Final::Group_A],
|
134
|
+
|
135
|
+
[Initial::Group_3, Final::Group_I],
|
136
|
+
[Initial::Group_5, Final::Group_I],
|
137
|
+
[Initial::Group_6, Final::Group_I],
|
138
|
+
|
139
|
+
[Initial::Group_1, Final::Group_V],
|
140
|
+
[Initial::Group_3, Final::Group_V],
|
141
|
+
|
142
|
+
# For "咯 / lo5" to parse correctly we need to list "Le + O" as valid,
|
143
|
+
[Initial::Group_2 - [Initial::Le], [Final::O]], #Only bo, po, mo and fo are valid -o combinations
|
144
|
+
[Initial::Group_3, [Final::O]],
|
145
|
+
[Initial::Group_4, [Final::O]],
|
146
|
+
[Initial::Group_5, [Final::O]],
|
147
|
+
[Initial::Group_6, [Final::O]],
|
148
|
+
|
149
|
+
[[Initial::Empty], [Final::Ong]]
|
150
|
+
# TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
|
151
|
+
]
|
152
|
+
|
153
|
+
class << self
|
154
|
+
|
155
|
+
#
|
156
|
+
# Yields a block for any valid initial/final pair
|
157
|
+
#
|
158
|
+
|
159
|
+
def valid_combinations( &blk )
|
160
|
+
return to_enum(__message__) unless block_given?
|
161
|
+
inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
|
162
|
+
inp.each do |final, initials|
|
163
|
+
final = Final.const_get(final)
|
164
|
+
initials.each do |initial, pinyin|
|
165
|
+
initial = Initial.const_get(initial)
|
166
|
+
yield [initial, final]
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def all_syllables( &blk )
|
172
|
+
return to_enum(__message__) unless block_given?
|
173
|
+
valid_combinations.map do |i,f|
|
174
|
+
1.upto(5) do |t|
|
175
|
+
yield Syllable.new(i,f,t,false)
|
176
|
+
yield Syllable.new(i,f,t,true)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
data/lib/ting/reader.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Ting
|
2
|
+
class Reader
|
3
|
+
include Procable
|
4
|
+
|
5
|
+
def initialize(conv, tone)
|
6
|
+
@conv = conv.to_s
|
7
|
+
@tone = Tones.const_get Ting.camelize(tone.to_s)
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(str)
|
11
|
+
Conversions.tokenize(str).map do |token, pos|
|
12
|
+
tone, syll = @tone.pop_tone(token)
|
13
|
+
tsyll = Conversions.parse(@conv, syll)
|
14
|
+
ini, fin = tsyll.initial, tsyll.final
|
15
|
+
unless tone && fin && ini
|
16
|
+
raise ParseError.new(token, pos),"Illegal syllable <#{token}> in input <#{str}> at position #{pos}."
|
17
|
+
end
|
18
|
+
tsyll + tone
|
19
|
+
end
|
20
|
+
rescue Object => e
|
21
|
+
raise ParseError.new(str, 0, e), "Parsing of #{str.inspect} failed : #{e}"
|
22
|
+
end
|
23
|
+
|
24
|
+
alias :<< :parse
|
25
|
+
alias :call :parse
|
26
|
+
end
|
27
|
+
end
|
data/lib/ting/string.rb
CHANGED
@@ -1,19 +1,4 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
|
3
3
|
class String
|
4
|
-
PINYIN_CACHE={}
|
5
|
-
|
6
|
-
def pretty_tones
|
7
|
-
self.gsub('u:','ü').gsub(/[A-Za-züÜ]{1,5}\d/) do |m|
|
8
|
-
m.downcase!
|
9
|
-
PINYIN_CACHE[m] || PINYIN_CACHE[m]=(Ting.writer(:hanyu, :accents) << Ting.reader(:hanyu, :numbers).parse(m.downcase))
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def bpmf
|
14
|
-
self.gsub('u:','ü').scan(/[A-Za-züÜ]{1,5}\d/).map do |m|
|
15
|
-
Ting.writer(:zhuyin, :marks) <<
|
16
|
-
(Ting.reader(:hanyu, :numbers) << m.downcase)
|
17
|
-
end.join(' ')
|
18
|
-
end
|
19
4
|
end
|
data/lib/ting/tones.rb
CHANGED
@@ -1,65 +1,65 @@
|
|
1
|
-
module Ting
|
2
|
-
#
|
3
|
-
# Base class for Tone classes
|
4
|
-
#
|
5
|
-
class Tone
|
6
|
-
VALID_TONES = 1..5
|
7
|
-
MAX_TONE = NEUTRAL_TONE = 5
|
8
|
-
|
9
|
-
class <<self
|
10
|
-
# Add a tone to a syllable
|
11
|
-
def add_tone(s,t)
|
12
|
-
s
|
13
|
-
end
|
14
|
-
|
15
|
-
# Determine the tone of a syllable
|
16
|
-
def peek_tone(s)
|
17
|
-
NEUTRAL_TONE
|
18
|
-
end
|
19
|
-
|
20
|
-
# Remove the tone from a syllable
|
21
|
-
def pop_tone(s)
|
22
|
-
[NEUTRAL_TONE, s]
|
23
|
-
end
|
24
|
-
|
25
|
-
private
|
26
|
-
# Make sure the tone number is in the valid range.
|
27
|
-
# Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
|
28
|
-
def normalize(t)
|
29
|
-
if VALID_TONES === t
|
30
|
-
t
|
31
|
-
else
|
32
|
-
t %= MAX_TONE
|
33
|
-
t = NEUTRAL_TONE if t == 0
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# Tone marks as a separate glyph, e.g. for Bopomofo
|
42
|
-
require "ting/tones/marks"
|
43
|
-
|
44
|
-
# Tone numbers added after the syllable
|
45
|
-
require "ting/tones/numbers"
|
46
|
-
|
47
|
-
# Tone accents, for Hanyu pinyin
|
48
|
-
require "ting/tones/accents"
|
49
|
-
|
50
|
-
# Superscript numerals, for Wade-Giles
|
51
|
-
require "ting/tones/supernum"
|
52
|
-
|
53
|
-
# IPA tone symbols
|
54
|
-
require "ting/tones/ipa"
|
55
|
-
|
56
|
-
# No tones
|
57
|
-
require "ting/tones/no_tones"
|
58
|
-
|
59
|
-
module Ting
|
60
|
-
module Tones
|
61
|
-
All = [Numbers, Marks, Accents, NoTones]
|
62
|
-
VALID_TONES = 1..5
|
63
|
-
MAX_TONE = NEUTRAL_TONE = 5
|
64
|
-
end
|
65
|
-
end
|
1
|
+
module Ting
|
2
|
+
#
|
3
|
+
# Base class for Tone classes
|
4
|
+
#
|
5
|
+
class Tone
|
6
|
+
VALID_TONES = 1..5
|
7
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# Add a tone to a syllable
|
11
|
+
def add_tone(s,t)
|
12
|
+
s
|
13
|
+
end
|
14
|
+
|
15
|
+
# Determine the tone of a syllable
|
16
|
+
def peek_tone(s)
|
17
|
+
NEUTRAL_TONE
|
18
|
+
end
|
19
|
+
|
20
|
+
# Remove the tone from a syllable
|
21
|
+
def pop_tone(s)
|
22
|
+
[NEUTRAL_TONE, s]
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
# Make sure the tone number is in the valid range.
|
27
|
+
# Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
|
28
|
+
def normalize(t)
|
29
|
+
if VALID_TONES === t
|
30
|
+
t
|
31
|
+
else
|
32
|
+
t %= MAX_TONE
|
33
|
+
t = NEUTRAL_TONE if t == 0
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Tone marks as a separate glyph, e.g. for Bopomofo
|
42
|
+
require "ting/tones/marks"
|
43
|
+
|
44
|
+
# Tone numbers added after the syllable
|
45
|
+
require "ting/tones/numbers"
|
46
|
+
|
47
|
+
# Tone accents, for Hanyu pinyin
|
48
|
+
require "ting/tones/accents"
|
49
|
+
|
50
|
+
# Superscript numerals, for Wade-Giles
|
51
|
+
require "ting/tones/supernum"
|
52
|
+
|
53
|
+
# IPA tone symbols
|
54
|
+
require "ting/tones/ipa"
|
55
|
+
|
56
|
+
# No tones
|
57
|
+
require "ting/tones/no_tones"
|
58
|
+
|
59
|
+
module Ting
|
60
|
+
module Tones
|
61
|
+
All = [Numbers, Marks, Accents, NoTones]
|
62
|
+
VALID_TONES = 1..5
|
63
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
64
|
+
end
|
65
|
+
end
|