zhongwen_tools 0.18.2 → 0.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/zhongwen_tools/caps.rb +1 -1
- data/lib/zhongwen_tools/romanization/pinyin.rb +10 -13
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_caps.rb +2 -1
- data/test/test_pinyin.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7d727c81b7c2271ab7dbd09761b085509f47e082
|
|
4
|
+
data.tar.gz: 5f96488fbcfaeb56d02ff61670c15508413ad73b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: acc43754bdda9b9f26b1092f46bf0122bf7766e9a7e9effcf2c79afea97966bc8b7171b960089e3e83b3cf15cbfb3d6e651aedc1020c1e86726fac71ab16baa1
|
|
7
|
+
data.tar.gz: d1a828b7bcf7ed2f2c94c340ab1db739ce09723e67eeb7fdfe47b3e5cb17fa7d4dc65252d401ea1c60a682be2988e335d664d501e0fa4fc6376f831024a07a2a
|
data/lib/zhongwen_tools/caps.rb
CHANGED
|
@@ -14,7 +14,7 @@ module ZhongwenTools
|
|
|
14
14
|
end
|
|
15
15
|
|
|
16
16
|
def self.capitalize(str)
|
|
17
|
-
first_letter = str[/#{Regex.py}|[ĀÁǍÀĒÉĚÈĪÍǏÌŌÓǑÒ]/][0]
|
|
17
|
+
first_letter = str[/#{Regex.py}|[ĀÁǍÀĒÉĚÈĪÍǏÌŌÓǑÒ]|[a-zA-Z]/][0]
|
|
18
18
|
str.sub(first_letter, ZhongwenTools::Caps.upcase(first_letter))
|
|
19
19
|
end
|
|
20
20
|
|
|
@@ -47,7 +47,7 @@ module ZhongwenTools
|
|
|
47
47
|
def self.split_py(str)
|
|
48
48
|
words = str.split(' ')
|
|
49
49
|
|
|
50
|
-
|
|
50
|
+
words.flat_map do |word|
|
|
51
51
|
word, is_capitalized = normalize_pinyin(word)
|
|
52
52
|
word = normalize_n_g(word)
|
|
53
53
|
word = normalize_n(word)
|
|
@@ -60,8 +60,6 @@ module ZhongwenTools
|
|
|
60
60
|
|
|
61
61
|
recapitalize(result.flatten, is_capitalized)
|
|
62
62
|
end
|
|
63
|
-
|
|
64
|
-
results.flatten
|
|
65
63
|
end
|
|
66
64
|
|
|
67
65
|
# Public: checks if a string is pinyin.
|
|
@@ -95,7 +93,7 @@ module ZhongwenTools
|
|
|
95
93
|
def self.pyn?(str)
|
|
96
94
|
# FIXME: use strip_punctuation method
|
|
97
95
|
normalized_str = Caps.downcase(str.gsub(Regex.punc, '').gsub(/[\s\-]/, ''))
|
|
98
|
-
pyn_arr = split_pyn(normalized_str).map{ |p| p }
|
|
96
|
+
pyn_arr = split_pyn(normalized_str).map { |p| p }
|
|
99
97
|
pyn_arr << normalized_str if pyn_arr.size == 0 && PYN_SYLLABIC_NASALS.include?(normalized_str.gsub(/[1-5]/, ''))
|
|
100
98
|
|
|
101
99
|
pyn_matches_properly?(pyn_arr, normalized_str) &&
|
|
@@ -117,7 +115,7 @@ module ZhongwenTools
|
|
|
117
115
|
end
|
|
118
116
|
|
|
119
117
|
def self.are_all_pyn_syllables_complete?(pyn_arr)
|
|
120
|
-
pyns = ROMANIZATIONS_TABLE.map{ |r| r[:pyn] } + PYN_SYLLABIC_NASALS
|
|
118
|
+
pyns = ROMANIZATIONS_TABLE.map { |r| r[:pyn] } + PYN_SYLLABIC_NASALS
|
|
121
119
|
|
|
122
120
|
pyn_syllables = pyn_arr.select do |p|
|
|
123
121
|
pyns.include?(p.gsub(/[1-5]/, ''))
|
|
@@ -145,7 +143,7 @@ module ZhongwenTools
|
|
|
145
143
|
# Special Case split_py("yìnián") # => ["yì" + "nián"]
|
|
146
144
|
# split_py("Xīní") # => ["Xī", "ní"]
|
|
147
145
|
regex = /([#{ Regex.only_tones }])(n(#{Regex.py_tones['v']}|#{Regex.py_tones['i']}|[iu]|#{Regex.py_tones['e']}|[#{Regex.py_tones['a']}]))/
|
|
148
|
-
pinyin.gsub(regex) { "#{
|
|
146
|
+
pinyin.gsub(regex) { "#{$1}-#{$2}" }
|
|
149
147
|
end
|
|
150
148
|
|
|
151
149
|
def self.normalize_pinyin(pinyin)
|
|
@@ -198,12 +196,11 @@ module ZhongwenTools
|
|
|
198
196
|
end
|
|
199
197
|
|
|
200
198
|
def self.current_pyn(pyn, pinyin_arr)
|
|
201
|
-
pinyin_arr.
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
end
|
|
199
|
+
replacements = pinyin_arr.map do |pinyin|
|
|
200
|
+
[pinyin, pinyin_replacement(pinyin)]
|
|
201
|
+
end.to_h
|
|
205
202
|
|
|
206
|
-
pyn.gsub("'", '')
|
|
203
|
+
pyn.gsub(/#{pinyin_arr.join('|')}/, replacements).gsub("''", '')
|
|
207
204
|
end
|
|
208
205
|
|
|
209
206
|
def self.pinyin_replacement(py)
|
|
@@ -214,7 +211,7 @@ module ZhongwenTools
|
|
|
214
211
|
match = select_pinyin_match(matches)
|
|
215
212
|
replace = PYN_PY.find { |k, v| k if v == match }[0]
|
|
216
213
|
|
|
217
|
-
py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){ $1 + $3 + $2 }
|
|
214
|
+
py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/) { $1 + $3 + $2 }
|
|
218
215
|
end
|
|
219
216
|
|
|
220
217
|
def self.select_pinyin_match(matches)
|
|
@@ -245,7 +242,7 @@ module ZhongwenTools
|
|
|
245
242
|
# And finally, correct those apostrophes at the very end.
|
|
246
243
|
# It's like magic.
|
|
247
244
|
str.gsub(regex) do
|
|
248
|
-
($3.nil? ? "#{ PYN_PY[$1] }" : ($2 == '' && %w(a e o).include?($3[0,1]))? "'#{ PYN_PY["#{ $3 }#{ $6 }"]}#{ $4 }#{ $5 }" : "#{ $2 }#{ PYN_PY["#{ $3 }#{ $6 }"] }#{ $4 }#{ $5 }") + (($7.to_s.length > 1) ? '-' : '')
|
|
245
|
+
($3.nil? ? "#{ PYN_PY[$1] }" : ($2 == '' && %w(a e o).include?($3[0, 1])) ? "'#{ PYN_PY["#{ $3 }#{ $6 }"]}#{ $4 }#{ $5 }" : "#{ $2 }#{ PYN_PY["#{ $3 }#{ $6 }"] }#{ $4 }#{ $5 }") + (($7.to_s.length > 1) ? '-' : '')
|
|
249
246
|
end.gsub("-'", '-').sub(/^'/, '')
|
|
250
247
|
end
|
|
251
248
|
end
|
data/test/test_caps.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# encoding: utf-8
|
|
2
|
-
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
|
3
3
|
|
|
4
4
|
require './test/test_helper'
|
|
5
5
|
require 'zhongwen_tools/caps'
|
|
@@ -18,6 +18,7 @@ class TestCaps < Minitest::Test
|
|
|
18
18
|
def test_capitalize
|
|
19
19
|
assert_equal @caps[:c], ZhongwenTools::Caps.capitalize(@caps[:d])
|
|
20
20
|
assert_equal '"Zheng4qie1"', ZhongwenTools::Caps.capitalize('"Zheng4qie1"')
|
|
21
|
+
assert_equal 'Intermediate', ZhongwenTools::Caps.capitalize('intermediate')
|
|
21
22
|
end
|
|
22
23
|
|
|
23
24
|
def setup
|
data/test/test_pinyin.rb
CHANGED
|
@@ -28,6 +28,7 @@ class TestPinyin < Minitest::Test
|
|
|
28
28
|
assert_equal ['nián', 'gāo'], ZhongwenTools::Romanization::Pinyin.split_py('niángāo')
|
|
29
29
|
assert_equal %w(fú shè néng), ZhongwenTools::Romanization::Pinyin.split_py('fúshènéng')
|
|
30
30
|
assert_equal ['sān', 'gēng'], ZhongwenTools::Romanization::Pinyin.split_py('sāngēng')
|
|
31
|
+
assert_equal ['rú', 'guǒ'], ZhongwenTools::Romanization::Pinyin.split_py('rúguǒ')
|
|
31
32
|
end
|
|
32
33
|
|
|
33
34
|
def test_py?
|
|
@@ -89,7 +90,8 @@ class TestPinyin < Minitest::Test
|
|
|
89
90
|
assert_equal 'yi2ge4', ZhongwenTools::Romanization::Pinyin.to_pyn('yígè')
|
|
90
91
|
assert_equal 'yi4nian2', ZhongwenTools::Romanization::Pinyin.to_pyn('yìnián', :py)
|
|
91
92
|
assert_equal 'hei1hu1hu1', ZhongwenTools::Romanization::Pinyin.to_pyn('hēihūhū', :py)
|
|
92
|
-
assert_equal '"Zheng4qie1"',
|
|
93
|
+
assert_equal '"Zheng4qie1"', ZhongwenTools::Romanization::Pinyin.to_pyn('"Zhèngqiē"', :py)
|
|
94
|
+
assert_equal 'wen4wen5', ZhongwenTools::Romanization::Pinyin.to_pyn('wènwen', :py)
|
|
93
95
|
end
|
|
94
96
|
|
|
95
97
|
def setup
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: zhongwen_tools
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.18.
|
|
4
|
+
version: 0.18.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steven Daniels
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-
|
|
11
|
+
date: 2015-05-10 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|