ting 0.3.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.travis.yml +13 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +235 -0
- data/LICENSE.txt +674 -0
- data/{README.rdoc → README.md} +43 -35
- data/Rakefile +28 -15
- data/TODO +16 -15
- data/examples/hello.rb +12 -12
- data/lib/ting.rb +36 -61
- data/lib/ting/conversion.rb +6 -5
- data/lib/ting/conversions.rb +88 -80
- data/lib/ting/conversions/hanyu.rb +5 -9
- data/lib/ting/converter.rb +30 -0
- data/lib/ting/data/comparison.csv +410 -410
- data/lib/ting/data/final.csv +12 -10
- data/lib/ting/data/initial.csv +8 -7
- data/lib/ting/data/paladiy.txt +421 -421
- data/lib/ting/data/rules.yaml +38 -27
- data/lib/ting/data/valid_pinyin.yaml +454 -453
- data/lib/ting/exception.rb +14 -17
- data/lib/ting/groundwork.rb +181 -177
- data/lib/ting/procable.rb +7 -0
- data/lib/ting/reader.rb +27 -0
- data/lib/ting/string.rb +0 -15
- data/lib/ting/tones.rb +65 -65
- data/lib/ting/tones/accents.rb +75 -69
- data/lib/ting/tones/ipa.rb +1 -1
- data/lib/ting/tones/no_tones.rb +7 -7
- data/lib/ting/tones/numbers.rb +25 -25
- data/lib/ting/tones/supernum.rb +1 -1
- data/lib/ting/version.rb +1 -1
- data/lib/ting/writer.rb +23 -0
- data/spec/jruby_csv_spec.rb +78 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/ting_spec.rb +19 -0
- data/test/test_comparison.rb +43 -35
- data/test/test_hanyu_coverage.rb +42 -37
- data/ting.gemspec +23 -0
- metadata +95 -71
- data/examples/cgiform/cgiform.rb +0 -24
- data/examples/cgiform/template.rhtml +0 -69
- data/lib/ting/support.rb +0 -19
data/lib/ting/exception.rb
CHANGED
@@ -1,17 +1,14 @@
|
|
1
|
-
module Ting
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
end
|
17
|
-
|
1
|
+
module Ting
|
2
|
+
|
3
|
+
class ParseError < StandardError
|
4
|
+
attr_reader :input, :position
|
5
|
+
|
6
|
+
def initialize(input, position, error = nil)
|
7
|
+
super(error)
|
8
|
+
@input=input
|
9
|
+
@position=position
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
data/lib/ting/groundwork.rb
CHANGED
@@ -1,177 +1,181 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
# *
|
4
|
-
# *
|
5
|
-
# * Syllable
|
6
|
-
# * ILLEGAL_COMBINATIONS
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
#
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
def
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
[Initial::
|
133
|
-
[Initial::
|
134
|
-
|
135
|
-
[Initial::
|
136
|
-
|
137
|
-
[Initial::
|
138
|
-
|
139
|
-
|
140
|
-
[Initial::Group_3, Final::
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
[Initial::
|
145
|
-
[Initial::
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
[Initial::
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
end
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Classes and constants used throughout the module
|
3
|
+
# * Initial
|
4
|
+
# * Final
|
5
|
+
# * Syllable
|
6
|
+
# * ILLEGAL_COMBINATIONS
|
7
|
+
|
8
|
+
require 'yaml'
|
9
|
+
|
10
|
+
module Ting
|
11
|
+
|
12
|
+
#
|
13
|
+
# A Chinese initial (start of a syllable)
|
14
|
+
#
|
15
|
+
|
16
|
+
class Initial
|
17
|
+
attr :name
|
18
|
+
|
19
|
+
def initialize(n) ; @name=n ; end
|
20
|
+
|
21
|
+
All = %w(
|
22
|
+
Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
|
23
|
+
Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
|
24
|
+
).map{|c| const_set c, Initial.new(c)}
|
25
|
+
|
26
|
+
class << self
|
27
|
+
private :new
|
28
|
+
include Enumerable
|
29
|
+
def each(&blk) ; All.each(&blk) ; end
|
30
|
+
end
|
31
|
+
|
32
|
+
Groups=[
|
33
|
+
Group_0=[ Empty ],
|
34
|
+
Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
|
35
|
+
Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
|
36
|
+
Group_3=[ Ge,Ke,He ], #Velar
|
37
|
+
Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
|
38
|
+
Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
|
39
|
+
Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
|
40
|
+
]
|
41
|
+
|
42
|
+
def +(f)
|
43
|
+
Syllable.new(self,f)
|
44
|
+
end
|
45
|
+
|
46
|
+
def inspect() ; "<#{self.class.name}::#{@name}>" ; end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
#
|
51
|
+
# A Chinese final (end of a syllable)
|
52
|
+
#
|
53
|
+
|
54
|
+
class Final
|
55
|
+
attr :name
|
56
|
+
|
57
|
+
def initialize(n) ; @name=n ; end
|
58
|
+
|
59
|
+
All=%w(
|
60
|
+
Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
|
61
|
+
I Ia Io Ie Iai Iao Iu Ian In Iang Ing
|
62
|
+
U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
|
63
|
+
).map{|c| const_set c, Final.new(c)}
|
64
|
+
|
65
|
+
class << self
|
66
|
+
private :new
|
67
|
+
include Enumerable
|
68
|
+
def each(&blk) ; All.each(&blk) ; end
|
69
|
+
end
|
70
|
+
|
71
|
+
Groups=[
|
72
|
+
Group_0=[ Empty ],
|
73
|
+
Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
|
74
|
+
Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
|
75
|
+
Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
|
76
|
+
Group_V=[ V,Ue,Van,Vn,Iong]
|
77
|
+
]
|
78
|
+
|
79
|
+
def inspect() ; "<#{self.class.name}::#{name}>" ; end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
#
|
84
|
+
# Combination of an initial and a final, a tone, and possible capitalization
|
85
|
+
# A tone of 'nil' means the tone is not specified
|
86
|
+
|
87
|
+
class Syllable
|
88
|
+
attr_accessor :initial, :final, :tone, :capitalized
|
89
|
+
|
90
|
+
def initialize(initial, final, tone = nil, capitalized = false)
|
91
|
+
self.initial = initial
|
92
|
+
self.final = final
|
93
|
+
self.tone = tone
|
94
|
+
self.capitalized = capitalized
|
95
|
+
end
|
96
|
+
|
97
|
+
def +(tone)
|
98
|
+
self.class.new(self.initial, self.final, tone, self.capitalized)
|
99
|
+
end
|
100
|
+
|
101
|
+
def inspect
|
102
|
+
"<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}#{', capitalized' if capitalized}>>"
|
103
|
+
end
|
104
|
+
|
105
|
+
alias :capitalized? :capitalized
|
106
|
+
|
107
|
+
def self.illegal?(i,f)
|
108
|
+
ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
|
109
|
+
end
|
110
|
+
|
111
|
+
alias :to_s :inspect
|
112
|
+
|
113
|
+
def ==( other )
|
114
|
+
[ other.initial, other.final, other.tone, other.capitalized ] ==
|
115
|
+
[ self.initial, self.final, self.tone, self.capitalized ]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
#
|
120
|
+
# Some groups of initials and finals may not be combined
|
121
|
+
# This list is not exhaustive but is sufficient to resolve ambiguity
|
122
|
+
#
|
123
|
+
|
124
|
+
ILLEGAL_COMBINATIONS=
|
125
|
+
[
|
126
|
+
[Initial::Group_0, Final::Group_0],
|
127
|
+
[Initial::Group_1, Final::Group_0],
|
128
|
+
[Initial::Group_2, Final::Group_0],
|
129
|
+
[Initial::Group_3, Final::Group_0],
|
130
|
+
[Initial::Group_4, Final::Group_0],
|
131
|
+
|
132
|
+
[Initial::Group_4, Final::Group_U],
|
133
|
+
[Initial::Group_4, Final::Group_A],
|
134
|
+
|
135
|
+
[Initial::Group_3, Final::Group_I],
|
136
|
+
[Initial::Group_5, Final::Group_I],
|
137
|
+
[Initial::Group_6, Final::Group_I],
|
138
|
+
|
139
|
+
[Initial::Group_1, Final::Group_V],
|
140
|
+
[Initial::Group_3, Final::Group_V],
|
141
|
+
|
142
|
+
# For "咯 / lo5" to parse correctly we need to list "Le + O" as valid,
|
143
|
+
[Initial::Group_2 - [Initial::Le], [Final::O]], #Only bo, po, mo and fo are valid -o combinations
|
144
|
+
[Initial::Group_3, [Final::O]],
|
145
|
+
[Initial::Group_4, [Final::O]],
|
146
|
+
[Initial::Group_5, [Final::O]],
|
147
|
+
[Initial::Group_6, [Final::O]],
|
148
|
+
|
149
|
+
[[Initial::Empty], [Final::Ong]]
|
150
|
+
# TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
|
151
|
+
]
|
152
|
+
|
153
|
+
class << self
|
154
|
+
|
155
|
+
#
|
156
|
+
# Yields a block for any valid initial/final pair
|
157
|
+
#
|
158
|
+
|
159
|
+
def valid_combinations( &blk )
|
160
|
+
return to_enum(__message__) unless block_given?
|
161
|
+
inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
|
162
|
+
inp.each do |final, initials|
|
163
|
+
final = Final.const_get(final)
|
164
|
+
initials.each do |initial, pinyin|
|
165
|
+
initial = Initial.const_get(initial)
|
166
|
+
yield [initial, final]
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def all_syllables( &blk )
|
172
|
+
return to_enum(__message__) unless block_given?
|
173
|
+
valid_combinations.map do |i,f|
|
174
|
+
1.upto(5) do |t|
|
175
|
+
yield Syllable.new(i,f,t,false)
|
176
|
+
yield Syllable.new(i,f,t,true)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
data/lib/ting/reader.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Ting
|
2
|
+
class Reader
|
3
|
+
include Procable
|
4
|
+
|
5
|
+
def initialize(conv, tone)
|
6
|
+
@conv = conv.to_s
|
7
|
+
@tone = Tones.const_get Ting.camelize(tone.to_s)
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(str)
|
11
|
+
Conversions.tokenize(str).map do |token, pos|
|
12
|
+
tone, syll = @tone.pop_tone(token)
|
13
|
+
tsyll = Conversions.parse(@conv, syll)
|
14
|
+
ini, fin = tsyll.initial, tsyll.final
|
15
|
+
unless tone && fin && ini
|
16
|
+
raise ParseError.new(token, pos),"Illegal syllable <#{token}> in input <#{str}> at position #{pos}."
|
17
|
+
end
|
18
|
+
tsyll + tone
|
19
|
+
end
|
20
|
+
rescue Object => e
|
21
|
+
raise ParseError.new(str, 0, e), "Parsing of #{str.inspect} failed : #{e}"
|
22
|
+
end
|
23
|
+
|
24
|
+
alias :<< :parse
|
25
|
+
alias :call :parse
|
26
|
+
end
|
27
|
+
end
|
data/lib/ting/string.rb
CHANGED
@@ -1,19 +1,4 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
|
3
3
|
class String
|
4
|
-
PINYIN_CACHE={}
|
5
|
-
|
6
|
-
def pretty_tones
|
7
|
-
self.gsub('u:','ü').gsub(/[A-Za-züÜ]{1,5}\d/) do |m|
|
8
|
-
m.downcase!
|
9
|
-
PINYIN_CACHE[m] || PINYIN_CACHE[m]=(Ting.writer(:hanyu, :accents) << Ting.reader(:hanyu, :numbers).parse(m.downcase))
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def bpmf
|
14
|
-
self.gsub('u:','ü').scan(/[A-Za-züÜ]{1,5}\d/).map do |m|
|
15
|
-
Ting.writer(:zhuyin, :marks) <<
|
16
|
-
(Ting.reader(:hanyu, :numbers) << m.downcase)
|
17
|
-
end.join(' ')
|
18
|
-
end
|
19
4
|
end
|
data/lib/ting/tones.rb
CHANGED
@@ -1,65 +1,65 @@
|
|
1
|
-
module Ting
|
2
|
-
#
|
3
|
-
# Base class for Tone classes
|
4
|
-
#
|
5
|
-
class Tone
|
6
|
-
VALID_TONES = 1..5
|
7
|
-
MAX_TONE = NEUTRAL_TONE = 5
|
8
|
-
|
9
|
-
class <<self
|
10
|
-
# Add a tone to a syllable
|
11
|
-
def add_tone(s,t)
|
12
|
-
s
|
13
|
-
end
|
14
|
-
|
15
|
-
# Determine the tone of a syllable
|
16
|
-
def peek_tone(s)
|
17
|
-
NEUTRAL_TONE
|
18
|
-
end
|
19
|
-
|
20
|
-
# Remove the tone from a syllable
|
21
|
-
def pop_tone(s)
|
22
|
-
[NEUTRAL_TONE, s]
|
23
|
-
end
|
24
|
-
|
25
|
-
private
|
26
|
-
# Make sure the tone number is in the valid range.
|
27
|
-
# Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
|
28
|
-
def normalize(t)
|
29
|
-
if VALID_TONES === t
|
30
|
-
t
|
31
|
-
else
|
32
|
-
t %= MAX_TONE
|
33
|
-
t = NEUTRAL_TONE if t == 0
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# Tone marks as a separate glyph, e.g. for Bopomofo
|
42
|
-
require "ting/tones/marks"
|
43
|
-
|
44
|
-
# Tone numbers added after the syllable
|
45
|
-
require "ting/tones/numbers"
|
46
|
-
|
47
|
-
# Tone accents, for Hanyu pinyin
|
48
|
-
require "ting/tones/accents"
|
49
|
-
|
50
|
-
# Superscript numerals, for Wade-Giles
|
51
|
-
require "ting/tones/supernum"
|
52
|
-
|
53
|
-
# IPA tone symbols
|
54
|
-
require "ting/tones/ipa"
|
55
|
-
|
56
|
-
# No tones
|
57
|
-
require "ting/tones/no_tones"
|
58
|
-
|
59
|
-
module Ting
|
60
|
-
module Tones
|
61
|
-
All = [Numbers, Marks, Accents, NoTones]
|
62
|
-
VALID_TONES = 1..5
|
63
|
-
MAX_TONE = NEUTRAL_TONE = 5
|
64
|
-
end
|
65
|
-
end
|
1
|
+
module Ting
|
2
|
+
#
|
3
|
+
# Base class for Tone classes
|
4
|
+
#
|
5
|
+
class Tone
|
6
|
+
VALID_TONES = 1..5
|
7
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# Add a tone to a syllable
|
11
|
+
def add_tone(s,t)
|
12
|
+
s
|
13
|
+
end
|
14
|
+
|
15
|
+
# Determine the tone of a syllable
|
16
|
+
def peek_tone(s)
|
17
|
+
NEUTRAL_TONE
|
18
|
+
end
|
19
|
+
|
20
|
+
# Remove the tone from a syllable
|
21
|
+
def pop_tone(s)
|
22
|
+
[NEUTRAL_TONE, s]
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
# Make sure the tone number is in the valid range.
|
27
|
+
# Neutral tone is always represented as NEUTRAL_TONE (5), and not 0.
|
28
|
+
def normalize(t)
|
29
|
+
if VALID_TONES === t
|
30
|
+
t
|
31
|
+
else
|
32
|
+
t %= MAX_TONE
|
33
|
+
t = NEUTRAL_TONE if t == 0
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Tone marks as a separate glyph, e.g. for Bopomofo
|
42
|
+
require "ting/tones/marks"
|
43
|
+
|
44
|
+
# Tone numbers added after the syllable
|
45
|
+
require "ting/tones/numbers"
|
46
|
+
|
47
|
+
# Tone accents, for Hanyu pinyin
|
48
|
+
require "ting/tones/accents"
|
49
|
+
|
50
|
+
# Superscript numerals, for Wade-Giles
|
51
|
+
require "ting/tones/supernum"
|
52
|
+
|
53
|
+
# IPA tone symbols
|
54
|
+
require "ting/tones/ipa"
|
55
|
+
|
56
|
+
# No tones
|
57
|
+
require "ting/tones/no_tones"
|
58
|
+
|
59
|
+
module Ting
|
60
|
+
module Tones
|
61
|
+
All = [Numbers, Marks, Accents, NoTones]
|
62
|
+
VALID_TONES = 1..5
|
63
|
+
MAX_TONE = NEUTRAL_TONE = 5
|
64
|
+
end
|
65
|
+
end
|