zhongwen_tools 0.18.2 → 0.18.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/zhongwen_tools/caps.rb +1 -1
- data/lib/zhongwen_tools/romanization/pinyin.rb +10 -13
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_caps.rb +2 -1
- data/test/test_pinyin.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d727c81b7c2271ab7dbd09761b085509f47e082
|
4
|
+
data.tar.gz: 5f96488fbcfaeb56d02ff61670c15508413ad73b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acc43754bdda9b9f26b1092f46bf0122bf7766e9a7e9effcf2c79afea97966bc8b7171b960089e3e83b3cf15cbfb3d6e651aedc1020c1e86726fac71ab16baa1
|
7
|
+
data.tar.gz: d1a828b7bcf7ed2f2c94c340ab1db739ce09723e67eeb7fdfe47b3e5cb17fa7d4dc65252d401ea1c60a682be2988e335d664d501e0fa4fc6376f831024a07a2a
|
data/lib/zhongwen_tools/caps.rb
CHANGED
@@ -14,7 +14,7 @@ module ZhongwenTools
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.capitalize(str)
|
17
|
-
first_letter = str[/#{Regex.py}|[ĀÁǍÀĒÉĚÈĪÍǏÌŌÓǑÒ]/][0]
|
17
|
+
first_letter = str[/#{Regex.py}|[ĀÁǍÀĒÉĚÈĪÍǏÌŌÓǑÒ]|[a-zA-Z]/][0]
|
18
18
|
str.sub(first_letter, ZhongwenTools::Caps.upcase(first_letter))
|
19
19
|
end
|
20
20
|
|
@@ -47,7 +47,7 @@ module ZhongwenTools
|
|
47
47
|
def self.split_py(str)
|
48
48
|
words = str.split(' ')
|
49
49
|
|
50
|
-
|
50
|
+
words.flat_map do |word|
|
51
51
|
word, is_capitalized = normalize_pinyin(word)
|
52
52
|
word = normalize_n_g(word)
|
53
53
|
word = normalize_n(word)
|
@@ -60,8 +60,6 @@ module ZhongwenTools
|
|
60
60
|
|
61
61
|
recapitalize(result.flatten, is_capitalized)
|
62
62
|
end
|
63
|
-
|
64
|
-
results.flatten
|
65
63
|
end
|
66
64
|
|
67
65
|
# Public: checks if a string is pinyin.
|
@@ -95,7 +93,7 @@ module ZhongwenTools
|
|
95
93
|
def self.pyn?(str)
|
96
94
|
# FIXME: use strip_punctuation method
|
97
95
|
normalized_str = Caps.downcase(str.gsub(Regex.punc, '').gsub(/[\s\-]/, ''))
|
98
|
-
pyn_arr = split_pyn(normalized_str).map{ |p| p }
|
96
|
+
pyn_arr = split_pyn(normalized_str).map { |p| p }
|
99
97
|
pyn_arr << normalized_str if pyn_arr.size == 0 && PYN_SYLLABIC_NASALS.include?(normalized_str.gsub(/[1-5]/, ''))
|
100
98
|
|
101
99
|
pyn_matches_properly?(pyn_arr, normalized_str) &&
|
@@ -117,7 +115,7 @@ module ZhongwenTools
|
|
117
115
|
end
|
118
116
|
|
119
117
|
def self.are_all_pyn_syllables_complete?(pyn_arr)
|
120
|
-
pyns = ROMANIZATIONS_TABLE.map{ |r| r[:pyn] } + PYN_SYLLABIC_NASALS
|
118
|
+
pyns = ROMANIZATIONS_TABLE.map { |r| r[:pyn] } + PYN_SYLLABIC_NASALS
|
121
119
|
|
122
120
|
pyn_syllables = pyn_arr.select do |p|
|
123
121
|
pyns.include?(p.gsub(/[1-5]/, ''))
|
@@ -145,7 +143,7 @@ module ZhongwenTools
|
|
145
143
|
# Special Case split_py("yìnián") # => ["yì" + "nián"]
|
146
144
|
# split_py("Xīní") # => ["Xī", "ní"]
|
147
145
|
regex = /([#{ Regex.only_tones }])(n(#{Regex.py_tones['v']}|#{Regex.py_tones['i']}|[iu]|#{Regex.py_tones['e']}|[#{Regex.py_tones['a']}]))/
|
148
|
-
pinyin.gsub(regex) { "#{
|
146
|
+
pinyin.gsub(regex) { "#{$1}-#{$2}" }
|
149
147
|
end
|
150
148
|
|
151
149
|
def self.normalize_pinyin(pinyin)
|
@@ -198,12 +196,11 @@ module ZhongwenTools
|
|
198
196
|
end
|
199
197
|
|
200
198
|
def self.current_pyn(pyn, pinyin_arr)
|
201
|
-
pinyin_arr.
|
202
|
-
|
203
|
-
|
204
|
-
end
|
199
|
+
replacements = pinyin_arr.map do |pinyin|
|
200
|
+
[pinyin, pinyin_replacement(pinyin)]
|
201
|
+
end.to_h
|
205
202
|
|
206
|
-
pyn.gsub("'", '')
|
203
|
+
pyn.gsub(/#{pinyin_arr.join('|')}/, replacements).gsub("''", '')
|
207
204
|
end
|
208
205
|
|
209
206
|
def self.pinyin_replacement(py)
|
@@ -214,7 +211,7 @@ module ZhongwenTools
|
|
214
211
|
match = select_pinyin_match(matches)
|
215
212
|
replace = PYN_PY.find { |k, v| k if v == match }[0]
|
216
213
|
|
217
|
-
py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){ $1 + $3 + $2 }
|
214
|
+
py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/) { $1 + $3 + $2 }
|
218
215
|
end
|
219
216
|
|
220
217
|
def self.select_pinyin_match(matches)
|
@@ -245,7 +242,7 @@ module ZhongwenTools
|
|
245
242
|
# And finally, correct those apostrophes at the very end.
|
246
243
|
# It's like magic.
|
247
244
|
str.gsub(regex) do
|
248
|
-
($3.nil? ? "#{ PYN_PY[$1] }" : ($2 == '' && %w(a e o).include?($3[0,1]))? "'#{ PYN_PY["#{ $3 }#{ $6 }"]}#{ $4 }#{ $5 }" : "#{ $2 }#{ PYN_PY["#{ $3 }#{ $6 }"] }#{ $4 }#{ $5 }") + (($7.to_s.length > 1) ? '-' : '')
|
245
|
+
($3.nil? ? "#{ PYN_PY[$1] }" : ($2 == '' && %w(a e o).include?($3[0, 1])) ? "'#{ PYN_PY["#{ $3 }#{ $6 }"]}#{ $4 }#{ $5 }" : "#{ $2 }#{ PYN_PY["#{ $3 }#{ $6 }"] }#{ $4 }#{ $5 }") + (($7.to_s.length > 1) ? '-' : '')
|
249
246
|
end.gsub("-'", '-').sub(/^'/, '')
|
250
247
|
end
|
251
248
|
end
|
data/test/test_caps.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require './test/test_helper'
|
5
5
|
require 'zhongwen_tools/caps'
|
@@ -18,6 +18,7 @@ class TestCaps < Minitest::Test
|
|
18
18
|
def test_capitalize
|
19
19
|
assert_equal @caps[:c], ZhongwenTools::Caps.capitalize(@caps[:d])
|
20
20
|
assert_equal '"Zheng4qie1"', ZhongwenTools::Caps.capitalize('"Zheng4qie1"')
|
21
|
+
assert_equal 'Intermediate', ZhongwenTools::Caps.capitalize('intermediate')
|
21
22
|
end
|
22
23
|
|
23
24
|
def setup
|
data/test/test_pinyin.rb
CHANGED
@@ -28,6 +28,7 @@ class TestPinyin < Minitest::Test
|
|
28
28
|
assert_equal ['nián', 'gāo'], ZhongwenTools::Romanization::Pinyin.split_py('niángāo')
|
29
29
|
assert_equal %w(fú shè néng), ZhongwenTools::Romanization::Pinyin.split_py('fúshènéng')
|
30
30
|
assert_equal ['sān', 'gēng'], ZhongwenTools::Romanization::Pinyin.split_py('sāngēng')
|
31
|
+
assert_equal ['rú', 'guǒ'], ZhongwenTools::Romanization::Pinyin.split_py('rúguǒ')
|
31
32
|
end
|
32
33
|
|
33
34
|
def test_py?
|
@@ -89,7 +90,8 @@ class TestPinyin < Minitest::Test
|
|
89
90
|
assert_equal 'yi2ge4', ZhongwenTools::Romanization::Pinyin.to_pyn('yígè')
|
90
91
|
assert_equal 'yi4nian2', ZhongwenTools::Romanization::Pinyin.to_pyn('yìnián', :py)
|
91
92
|
assert_equal 'hei1hu1hu1', ZhongwenTools::Romanization::Pinyin.to_pyn('hēihūhū', :py)
|
92
|
-
assert_equal '"Zheng4qie1"',
|
93
|
+
assert_equal '"Zheng4qie1"', ZhongwenTools::Romanization::Pinyin.to_pyn('"Zhèngqiē"', :py)
|
94
|
+
assert_equal 'wen4wen5', ZhongwenTools::Romanization::Pinyin.to_pyn('wènwen', :py)
|
93
95
|
end
|
94
96
|
|
95
97
|
def setup
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zhongwen_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.18.
|
4
|
+
version: 0.18.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steven Daniels
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|