zhongwen_tools 0.17.5 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/zhongwen_tools/regex.rb +7 -1
- data/lib/zhongwen_tools/romanization.rb +13 -15
- data/lib/zhongwen_tools/romanization/pinyin.rb +37 -31
- data/lib/zhongwen_tools/romanization/pinyin_table.rb +39 -12
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_pinyin.rb +34 -8
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 225b43aacf009731b4034a754af359efd7d067d1
|
|
4
|
+
data.tar.gz: d65490aa4067bbedf88e2805d7635bb694261bbe
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 14e73ad7b0b16325186b0643202416957605a0726596e44db660ba324c72685393b47c705fc40869cac9cf9815de533bff985e9daa36d903fe1dddc45587579a
|
|
7
|
+
data.tar.gz: 17ed1d4fbce22e1d13f8df22a67f716a85668efa9bd00ad6d6d89dada1f8d47320dce4b207119242cb43ddbc4ec3525a528b424a851bd70398bfa039c23ed737
|
data/lib/zhongwen_tools/regex.rb
CHANGED
|
@@ -99,6 +99,11 @@ module ZhongwenTools
|
|
|
99
99
|
}
|
|
100
100
|
end
|
|
101
101
|
|
|
102
|
+
def self.py_syllabic_nasals
|
|
103
|
+
# NOTE: includes combining diatrical marks for n̄ňm̄m̌m̀
|
|
104
|
+
/((N̄|n̄|ň)g?|[ŇŃǸńǹ]g?|m̄|m̌|m̀|ḿ)/
|
|
105
|
+
end
|
|
106
|
+
|
|
102
107
|
def self.py_tones
|
|
103
108
|
{
|
|
104
109
|
'a' => '[āáǎàa]',
|
|
@@ -111,7 +116,8 @@ module ZhongwenTools
|
|
|
111
116
|
end
|
|
112
117
|
|
|
113
118
|
def self.only_tones
|
|
114
|
-
|
|
119
|
+
# NOTE: includes combining diatrical marks for n̄ňm̄m̌m̀
|
|
120
|
+
/([āáǎàēéěèīíǐìōóǒòūúǔùǖǘǚǜńǹḿŃŇǸ]|N̄|n̄|ň|m̄|m̌|m̀)/
|
|
115
121
|
end
|
|
116
122
|
end
|
|
117
123
|
end
|
|
@@ -8,10 +8,8 @@ require 'zhongwen_tools/romanization/yale'
|
|
|
8
8
|
require 'zhongwen_tools/romanization/mps2'
|
|
9
9
|
require 'zhongwen_tools/romanization/romanization_table'
|
|
10
10
|
|
|
11
|
-
# NOTE: Creates several dynamic Modules and their associated methods.
|
|
12
|
-
# e.g. ZhongwenTools::Romanization::ZhuyinFuhao.to_bpmf
|
|
13
|
-
# ZhongwenTools::Romanization::WadeGiles.to_wg
|
|
14
11
|
module ZhongwenTools
|
|
12
|
+
# Public: Romanization converts, detects and splits different romanizations.
|
|
15
13
|
module Romanization
|
|
16
14
|
def self.convert(str, to, from)
|
|
17
15
|
# NOTE: don't convert if it already is converted.
|
|
@@ -105,14 +103,14 @@ module ZhongwenTools
|
|
|
105
103
|
end
|
|
106
104
|
|
|
107
105
|
def self.convert_romanization(str, from, to)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
106
|
+
# NOTE: extract/refactor tokens cause tests to fail.
|
|
107
|
+
if from == :pyn
|
|
108
|
+
tokens = ZhongwenTools::Romanization::Pinyin.split_pyn(str).uniq
|
|
109
|
+
else
|
|
110
|
+
tokens = romanization_module(from).send(:split, str).uniq
|
|
111
|
+
end
|
|
114
112
|
|
|
115
|
-
|
|
113
|
+
tokens.collect do |t|
|
|
116
114
|
search, replace = find_token_replacement(t, str, to, from)
|
|
117
115
|
str = str.gsub(search, replace)
|
|
118
116
|
end
|
|
@@ -193,14 +191,15 @@ module ZhongwenTools
|
|
|
193
191
|
# TODO: memoize
|
|
194
192
|
@memoized_romanization_values = {}
|
|
195
193
|
@memoized_romanization_values[type] = ZhongwenTools::Romanization::ROMANIZATIONS_TABLE.map do |r|
|
|
196
|
-
"[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn]
|
|
194
|
+
"[#{ r[type][0] }#{ r[type][0].upcase }]#{ r[type][1..-1] }" || r[:pyn]
|
|
197
195
|
end.flatten
|
|
198
196
|
|
|
199
197
|
@memoized_romanization_values[type]
|
|
200
198
|
end
|
|
201
199
|
|
|
202
|
-
def self.romanization_module(type)
|
|
203
|
-
module_name =
|
|
200
|
+
def self.romanization_module(type = :py)
|
|
201
|
+
module_name = ROMANIZATION_TYPES.find{ |_k, v| v.include?(type.to_s) }.first
|
|
202
|
+
|
|
204
203
|
ZhongwenTools::Romanization.const_get(module_name)
|
|
205
204
|
end
|
|
206
205
|
|
|
@@ -208,8 +207,7 @@ module ZhongwenTools
|
|
|
208
207
|
!str[/\-/].nil?
|
|
209
208
|
end
|
|
210
209
|
|
|
211
|
-
|
|
212
|
-
RomanizationTypes = {
|
|
210
|
+
ROMANIZATION_TYPES = {
|
|
213
211
|
ZhuyinFuhao: %w(bpmf zhuyin_fuhao zhuyinfuhao zyfh zhyfh bopomofo),
|
|
214
212
|
WadeGiles: %w(wg wade_giles),
|
|
215
213
|
Yale: ['yale'],
|
|
@@ -4,41 +4,43 @@ require 'zhongwen_tools/caps'
|
|
|
4
4
|
require 'zhongwen_tools/romanization'
|
|
5
5
|
|
|
6
6
|
module ZhongwenTools
|
|
7
|
+
# Public: Romanization converts to pinyin and pyn.
|
|
7
8
|
module Romanization
|
|
8
9
|
def self.convert_to_py(str, from)
|
|
9
10
|
str = convert_romanization(str, from, :pyn) if from != :pyn
|
|
10
|
-
|
|
11
|
+
Pinyin.convert_pyn_to_pinyin(str)
|
|
11
12
|
end
|
|
12
13
|
|
|
13
14
|
def self.convert_to_pyn(str, from)
|
|
14
15
|
orig_str = str.dup
|
|
15
16
|
|
|
16
17
|
if from == :py
|
|
17
|
-
str =
|
|
18
|
+
str = Romanization::Pinyin.convert_pinyin_to_pyn(str)
|
|
18
19
|
else
|
|
19
20
|
str = convert_romanization(str, from, :pyn)
|
|
20
21
|
end
|
|
21
22
|
|
|
22
|
-
str =
|
|
23
|
+
str = Romanization::Pinyin.add_hyphens_to_pyn(str) if hyphenated?(orig_str)
|
|
23
24
|
|
|
24
25
|
str
|
|
25
26
|
end
|
|
26
27
|
|
|
28
|
+
# Public: methods to convert, detect and split pinyin or
|
|
29
|
+
# pyn (pinyin with numbers, e.g. hao3).
|
|
27
30
|
module Pinyin
|
|
28
31
|
%w(pinyin py pyn).each do |romanization|
|
|
29
32
|
define_singleton_method("to_#{romanization}") do |*args|
|
|
30
33
|
str, from = args
|
|
31
|
-
from ||=
|
|
34
|
+
from ||= Romanization.romanization? str
|
|
32
35
|
|
|
33
|
-
|
|
34
|
-
ZhongwenTools::Romanization.convert str, py_type(romanization), (py_type(from) || from)
|
|
36
|
+
Romanization.convert str, py_type(romanization), (py_type(from) || from)
|
|
35
37
|
end
|
|
36
38
|
end
|
|
37
39
|
|
|
38
40
|
def self.split_pyn(str)
|
|
39
41
|
# FIXME: ignore punctuation
|
|
40
|
-
regex = str[/[1-5]/].nil? ? /(#{
|
|
41
|
-
# NOTE: p[/[^\-]*/].to_s is 25% faster
|
|
42
|
+
regex = str[/[1-5]/].nil? ? /(#{ Regex.pinyin_toneless })/ : /(#{ Regex.pyn }|#{ Regex.pinyin_toneless })/
|
|
43
|
+
# NOTE: p[/[^\-]*/].to_s is 25% faster than gsub('-', '')
|
|
42
44
|
str.scan(regex).map{ |arr| arr[0].strip[/[^\-]*/].to_s }.flatten
|
|
43
45
|
end
|
|
44
46
|
|
|
@@ -50,7 +52,7 @@ module ZhongwenTools
|
|
|
50
52
|
# NOTE: Special Case "fǎnguāng" should be "fǎn" + "guāng"
|
|
51
53
|
# Special Case "yìnián" should be "yì" + "nián"
|
|
52
54
|
word = word.gsub('ngu', 'n-gu')
|
|
53
|
-
|
|
55
|
+
word = word.gsub(/([#{ Regex.only_tones }])(ni[#{ Regex.py_tones['a'] }])/){ "#{ $1 }-#{ $2 }" }
|
|
54
56
|
result = word.split(/['\-]/).flatten.map do |x|
|
|
55
57
|
find_py(x)
|
|
56
58
|
end
|
|
@@ -70,14 +72,15 @@ module ZhongwenTools
|
|
|
70
72
|
#
|
|
71
73
|
# Returns Boolean.
|
|
72
74
|
def self.py?(str)
|
|
73
|
-
if str[
|
|
75
|
+
if str[Regex.only_tones].nil? && str[/[1-5]/].nil?
|
|
74
76
|
pyn?(str)
|
|
75
77
|
else
|
|
76
|
-
#
|
|
78
|
+
# TODO: py regex does not include capitals with tones.
|
|
77
79
|
# NOTE: Special Case "fǎnguāng" should be "fǎn" + "guāng"
|
|
78
|
-
|
|
80
|
+
|
|
81
|
+
regex = /(#{ Regex.punc }|#{ Regex.py }|#{ Regex.py_syllabic_nasals }|[\s\-])/
|
|
79
82
|
str = str.gsub('ngu', 'n-gu')
|
|
80
|
-
|
|
83
|
+
Caps.downcase(str).gsub(regex, '').strip == ''
|
|
81
84
|
end
|
|
82
85
|
end
|
|
83
86
|
|
|
@@ -90,8 +93,9 @@ module ZhongwenTools
|
|
|
90
93
|
# Returns Boolean.
|
|
91
94
|
def self.pyn?(str)
|
|
92
95
|
# FIXME: use strip_punctuation method
|
|
93
|
-
normalized_str =
|
|
96
|
+
normalized_str = Caps.downcase(str.gsub(Regex.punc, '').gsub(/[\s\-]/, ''))
|
|
94
97
|
pyn_arr = split_pyn(normalized_str).map{ |p| p }
|
|
98
|
+
pyn_arr << normalized_str if pyn_arr.size == 0 && PYN_SYLLABIC_NASALS.include?(normalized_str.gsub(/[1-5]/, ''))
|
|
95
99
|
|
|
96
100
|
pyn_matches_properly?(pyn_arr, normalized_str) &&
|
|
97
101
|
are_all_pyn_syllables_complete?(pyn_arr)
|
|
@@ -112,7 +116,7 @@ module ZhongwenTools
|
|
|
112
116
|
end
|
|
113
117
|
|
|
114
118
|
def self.are_all_pyn_syllables_complete?(pyn_arr)
|
|
115
|
-
pyns = ROMANIZATIONS_TABLE.map{ |r| r[:pyn] }
|
|
119
|
+
pyns = ROMANIZATIONS_TABLE.map{ |r| r[:pyn] } + PYN_SYLLABIC_NASALS
|
|
116
120
|
|
|
117
121
|
pyn_syllables = pyn_arr.select do |p|
|
|
118
122
|
pyns.include?(p.gsub(/[1-5]/, ''))
|
|
@@ -128,20 +132,21 @@ module ZhongwenTools
|
|
|
128
132
|
end
|
|
129
133
|
|
|
130
134
|
def self.normalize_pinyin(pinyin)
|
|
131
|
-
[
|
|
135
|
+
[Caps.downcase(pinyin), capitalized?(pinyin)]
|
|
132
136
|
end
|
|
133
137
|
|
|
134
138
|
def self.find_py(str)
|
|
135
|
-
|
|
139
|
+
regex = /(#{ Regex.py }|#{ Regex.py_syllabic_nasals })/
|
|
140
|
+
str.scan(regex).map{ |x| x.compact[0] }
|
|
136
141
|
end
|
|
137
142
|
|
|
138
143
|
def self.recapitalize(obj, capitalized)
|
|
139
144
|
return obj unless capitalized
|
|
140
145
|
|
|
141
|
-
if obj.
|
|
142
|
-
|
|
143
|
-
elsif obj.
|
|
144
|
-
[
|
|
146
|
+
if obj.is_a? String
|
|
147
|
+
Caps.capitalize(obj)
|
|
148
|
+
elsif obj.is_a? Array
|
|
149
|
+
[Caps.capitalize(obj[0]), obj[1..-1]].flatten
|
|
145
150
|
end
|
|
146
151
|
end
|
|
147
152
|
|
|
@@ -161,9 +166,8 @@ module ZhongwenTools
|
|
|
161
166
|
# NOTE: if a word is upcase, then it will be converted the same
|
|
162
167
|
# as a word that is only capitalized.
|
|
163
168
|
word, is_capitalized = normalize_pinyin(word)
|
|
164
|
-
|
|
165
169
|
pys = split_py(word)
|
|
166
|
-
|
|
170
|
+
|
|
167
171
|
recapitalize(current_pyn(word, pys), is_capitalized)
|
|
168
172
|
end
|
|
169
173
|
|
|
@@ -171,11 +175,12 @@ module ZhongwenTools
|
|
|
171
175
|
end
|
|
172
176
|
|
|
173
177
|
def self.capitalized?(str)
|
|
174
|
-
str[0] !=
|
|
178
|
+
str[0] != Caps.downcase(str[0])
|
|
175
179
|
end
|
|
176
180
|
|
|
177
181
|
def self.current_pyn(pyn, pinyin_arr)
|
|
178
182
|
replacements = []
|
|
183
|
+
|
|
179
184
|
pinyin_arr.each do |pinyin|
|
|
180
185
|
replace = pinyin_replacement(pinyin)
|
|
181
186
|
match = pinyin
|
|
@@ -194,6 +199,7 @@ module ZhongwenTools
|
|
|
194
199
|
matches = PYN_PY.values.select do |x|
|
|
195
200
|
py.include? x
|
|
196
201
|
end
|
|
202
|
+
|
|
197
203
|
match = select_pinyin_match(matches)
|
|
198
204
|
replace = PYN_PY.find{ |k, v| k if v == match }[0]
|
|
199
205
|
|
|
@@ -220,13 +226,13 @@ module ZhongwenTools
|
|
|
220
226
|
# Returns a string with actual pinyin
|
|
221
227
|
def self.convert_pyn_to_pinyin(str)
|
|
222
228
|
regex = Regex.pinyin_num
|
|
223
|
-
# Using gsub is ~8x faster than using scan and each.
|
|
224
|
-
#
|
|
225
|
-
#
|
|
226
|
-
#
|
|
227
|
-
#
|
|
228
|
-
#
|
|
229
|
-
#
|
|
229
|
+
# NOTE: Using gsub is ~8x faster than using scan and each.
|
|
230
|
+
# NOTE: if it's pinyin without vowels, e.g. m, ng, then convert,
|
|
231
|
+
# otherwise, check if it needs an apostrophe (http://www.pinyin.info/romanization/hanyu/apostrophes.html).
|
|
232
|
+
# If it does, add it and then convert. Otherwise, just convert it.
|
|
233
|
+
# Oh, and if it has double hyphens, replace with one hyphen.
|
|
234
|
+
# And finally, correct those apostrophes at the very end.
|
|
235
|
+
# It's like magic.
|
|
230
236
|
str.gsub(regex) do
|
|
231
237
|
($3.nil? ? "#{ PYN_PY[$1] }" : ($2 == '' && %w(a e o).include?($3[0,1]))? "'#{ PYN_PY["#{ $3 }#{ $6 }"]}#{ $4 }#{ $5 }" : "#{ $2 }#{ PYN_PY["#{ $3 }#{ $6 }"] }#{ $4 }#{ $5 }") + (($7.to_s.length > 1) ? '-' : '')
|
|
232
238
|
end.gsub("-'", '-').sub(/^'/, '')
|
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
# NOTE: This table works for pyn -> pinyin conversion, but it introduces
|
|
4
4
|
# mistakes when converting pinyin to pyn. In practice, pinyin can't
|
|
5
|
-
# be converted to pyn with complete accuracy unless it is properly
|
|
5
|
+
# be converted to pyn with complete accuracy unless it is properly
|
|
6
6
|
# formatted.
|
|
7
7
|
module ZhongwenTools
|
|
8
8
|
module Romanization
|
|
9
|
+
PYN_SYLLABIC_NASALS = %w(ng m n)
|
|
10
|
+
|
|
9
11
|
PYN_PY = {
|
|
10
12
|
'A1' => 'Ā',
|
|
11
13
|
'A2' => 'Á',
|
|
@@ -152,17 +154,42 @@ module ZhongwenTools
|
|
|
152
154
|
'm3' => 'm̌', # using combining diacritical marks
|
|
153
155
|
'm4' => 'm̀', # using combining diacritical marks
|
|
154
156
|
'm5' => 'm',
|
|
155
|
-
'n1' => '
|
|
156
|
-
'n2' => '
|
|
157
|
-
'n3' => '
|
|
158
|
-
'n4' => '
|
|
159
|
-
'n5' => '
|
|
160
|
-
'
|
|
161
|
-
'
|
|
162
|
-
'
|
|
163
|
-
'
|
|
164
|
-
'
|
|
165
|
-
'
|
|
157
|
+
'n1' => 'n̄',
|
|
158
|
+
'n2' => 'ń',
|
|
159
|
+
'n3' => 'ň',
|
|
160
|
+
'n4' => 'ǹ',
|
|
161
|
+
'n5' => 'n',
|
|
162
|
+
'Ng1' => 'N̄g', # using combining diacritical marks
|
|
163
|
+
'Ng2' => 'Ńg',
|
|
164
|
+
'Ng3' => 'Ňg', # using combining diacritical marks
|
|
165
|
+
'Ng4' => 'Ǹg',
|
|
166
|
+
'Ng5' => 'Ng',
|
|
167
|
+
'ng1' => 'n̄g', # using combining diacritical marks
|
|
168
|
+
'ng2' => 'ńg',
|
|
169
|
+
'ng3' => 'ňg', # using combining diacritical marks
|
|
170
|
+
'ng4' => 'ǹg',
|
|
171
|
+
'ng5' => 'ng',
|
|
172
|
+
'r5' => 'r',
|
|
173
|
+
'ang1' => 'āng',
|
|
174
|
+
'ang2' => 'áng',
|
|
175
|
+
'ang3' => 'ǎng',
|
|
176
|
+
'ang4' => 'àng',
|
|
177
|
+
'ang5' => 'ang',
|
|
178
|
+
'eng1' => 'ēng',
|
|
179
|
+
'eng2' => 'éng',
|
|
180
|
+
'eng3' => 'ěng',
|
|
181
|
+
'eng4' => 'èng',
|
|
182
|
+
'eng5' => 'eng',
|
|
183
|
+
'ing1' => 'īng',
|
|
184
|
+
'ing2' => 'íng',
|
|
185
|
+
'ing3' => 'ǐng',
|
|
186
|
+
'ing4' => 'ìng',
|
|
187
|
+
'ing5' => 'ing',
|
|
188
|
+
'ong1' => 'ōng',
|
|
189
|
+
'ong2' => 'óng',
|
|
190
|
+
'ong3' => 'ǒng',
|
|
191
|
+
'ong4' => 'òng',
|
|
192
|
+
'ong5' => 'ong',
|
|
166
193
|
}
|
|
167
194
|
end
|
|
168
195
|
end
|
data/test/test_pinyin.rb
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# encoding: utf-8
|
|
2
|
-
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
|
3
3
|
|
|
4
4
|
require './test/test_helper'
|
|
5
5
|
require 'zhongwen_tools/romanization/pinyin'
|
|
6
6
|
class TestPinyin < Minitest::Test
|
|
7
7
|
def test_split_pyn
|
|
8
|
-
|
|
8
|
+
@split_words.each do |w|
|
|
9
9
|
assert_equal w[:split], ZhongwenTools::Romanization::Pinyin.split_pyn(w[:pyn])
|
|
10
10
|
end
|
|
11
11
|
end
|
|
@@ -24,12 +24,16 @@ class TestPinyin < Minitest::Test
|
|
|
24
24
|
refute ZhongwenTools::Romanization::Pinyin.py?(w[:pyn]), w.inspect
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
@syllabic_nasals.each do |w|
|
|
28
|
+
assert ZhongwenTools::Romanization::Pinyin.py?(w[:py]), w.inspect
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
assert ZhongwenTools::Romanization::Pinyin.py? 'fǎnguāngjìng'
|
|
28
32
|
|
|
29
33
|
english_words = %w(cyan moose cling touch)
|
|
30
34
|
|
|
31
35
|
english_words.each do |w|
|
|
32
|
-
refute ZhongwenTools::Romanization::Pinyin.py? w
|
|
36
|
+
refute ZhongwenTools::Romanization::Pinyin.py?(w), w
|
|
33
37
|
end
|
|
34
38
|
end
|
|
35
39
|
|
|
@@ -39,8 +43,14 @@ class TestPinyin < Minitest::Test
|
|
|
39
43
|
assert ZhongwenTools::Romanization::Pinyin.pyn?(w[:pyn]), w.inspect
|
|
40
44
|
end
|
|
41
45
|
|
|
42
|
-
|
|
43
|
-
|
|
46
|
+
assert ZhongwenTools::Romanization::Pinyin.pyn?('ma2-fan')
|
|
47
|
+
assert ZhongwenTools::Romanization::Pinyin.pyn?('yo1')
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def test_syllabic_nasal_pyn?
|
|
51
|
+
assert ZhongwenTools::Romanization::Pinyin.pyn?('ng3')
|
|
52
|
+
assert ZhongwenTools::Romanization::Pinyin.pyn?('m3')
|
|
53
|
+
assert ZhongwenTools::Romanization::Pinyin.pyn?('n3')
|
|
44
54
|
end
|
|
45
55
|
|
|
46
56
|
def test_pyn_to_pinyin
|
|
@@ -48,27 +58,43 @@ class TestPinyin < Minitest::Test
|
|
|
48
58
|
assert_equal word[:py], ZhongwenTools::Romanization::Pinyin.to_pinyin(word[:pyn])
|
|
49
59
|
assert_equal word[:py], ZhongwenTools::Romanization::Pinyin.to_py(word[:pyn])
|
|
50
60
|
end
|
|
61
|
+
|
|
62
|
+
@syllabic_nasals.each do |word|
|
|
63
|
+
assert_equal word[:py], ZhongwenTools::Romanization::Pinyin.to_pinyin(word[:pyn])
|
|
64
|
+
assert_equal word[:py], ZhongwenTools::Romanization::Pinyin.to_py(word[:pyn])
|
|
65
|
+
end
|
|
51
66
|
end
|
|
52
67
|
|
|
53
68
|
def test_pinyin_to_pyn
|
|
54
69
|
@words.each do |word|
|
|
55
70
|
assert_equal word[:pyn], ZhongwenTools::Romanization::Pinyin.to_pyn(word[:py])
|
|
56
71
|
end
|
|
72
|
+
|
|
73
|
+
@syllabic_nasals.each do |word|
|
|
74
|
+
assert_equal word[:pyn], ZhongwenTools::Romanization::Pinyin.to_pyn(word[:py]), word
|
|
75
|
+
end
|
|
76
|
+
|
|
57
77
|
assert_equal 'yi2ge4', ZhongwenTools::Romanization::Pinyin.to_pyn('yígè')
|
|
58
78
|
assert_equal 'yi4nian2', ZhongwenTools::Romanization::Pinyin.to_pyn('yìnián', :py)
|
|
59
79
|
end
|
|
60
80
|
|
|
61
81
|
def setup
|
|
62
82
|
@hyphenated_words = [
|
|
63
|
-
{:pyn => 'A1-la1-bo2', :py => 'Ālābó'},
|
|
83
|
+
{ :pyn => 'A1-la1-bo2', :py => 'Ālābó' },
|
|
64
84
|
{ :pyn => 'Mao2 Ze2-dong1', :py => 'Máo Zédōng' }
|
|
65
85
|
]
|
|
66
86
|
|
|
67
87
|
@split_words = [
|
|
68
|
-
{:pyn => 'A1-la1-bo2', :py => 'Ālābó', :split => %w(A1 la1 bo2), split_py: %w(Ā lā bó) },
|
|
88
|
+
{ :pyn => 'A1-la1-bo2', :py => 'Ālābó', :split => %w(A1 la1 bo2), split_py: %w(Ā lā bó) },
|
|
69
89
|
{ :pyn => 'Mao2 Ze2-dong1', :py => 'Máo Zédōng', :split => %w(Mao2 Ze2 dong1), :split_py => %w(Máo Zé dōng) }
|
|
70
90
|
]
|
|
71
91
|
|
|
92
|
+
@syllabic_nasals = [
|
|
93
|
+
{ pyn: 'ng3', py: 'ňg'},
|
|
94
|
+
{ pyn: 'm3', py: 'm̌'},
|
|
95
|
+
{ pyn: 'n3', py: 'ň'},
|
|
96
|
+
{ pyn: 'Ng3', py: 'Ňg'}
|
|
97
|
+
]
|
|
72
98
|
|
|
73
99
|
@words = [
|
|
74
100
|
{ pyn: 'A1la1bo2', py: 'Ālābó'},
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: zhongwen_tools
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.18.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steven Daniels
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-
|
|
11
|
+
date: 2015-03-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|