zhongwen_tools 0.16.2 → 0.16.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/zhongwen_tools/regex.rb +6 -2
- data/lib/zhongwen_tools/romanization/pinyin.rb +11 -5
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_pinyin.rb +16 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74821104cd7fb3bed07317f74db0e25c28055e52
|
4
|
+
data.tar.gz: 41d79cb06718ae207fafd2e8272c761b47d4c508
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3ed0f32087518bfc195af48a873f668e0b1b3ca0b4740b0151855d6d39363aca5ab58af10a1bc5dc9cecd67a222ee39adec42cd3c4e2edacb527fb969361734e
|
7
|
+
data.tar.gz: 4b8eb4a1ae1a2354195fc93714218d55ba9e29517e3001c759d1c6e25c3dad2a7fb1119b3a73ff1ee5066df71ab52942281481be2a0249a9e0e6027bff8d0b15
|
data/README.md
CHANGED
@@ -51,7 +51,7 @@ You can monkey patch the String class.
|
|
51
51
|
'hello'.halfwidth? #=> false
|
52
52
|
|
53
53
|
'hello'.fullwidth? #=> true
|
54
|
-
|
54
|
+
|
55
55
|
'hello'.to_halfwidth? #=> 'hello'
|
56
56
|
|
57
57
|
'\u4e2d\u6587'.from_codepoint #=> '中文'
|
@@ -87,9 +87,9 @@ You can monkey patch the String class.
|
|
87
87
|
'nǐ hǎo'.to_mps2 #=> 'ni3 hau3'
|
88
88
|
|
89
89
|
'nǐ hǎo'.romanization? :py
|
90
|
-
|
90
|
+
|
91
91
|
'nǐ hǎo'.py? #=> true
|
92
|
-
|
92
|
+
|
93
93
|
'nǐ hǎo'.pyn? # false
|
94
94
|
|
95
95
|
'nǐ hǎo'.bpmf? # false
|
data/lib/zhongwen_tools/regex.rb
CHANGED
@@ -62,7 +62,7 @@ module ZhongwenTools
|
|
62
62
|
end
|
63
63
|
|
64
64
|
def self.zh_number_multiple
|
65
|
-
/[
|
65
|
+
/[拾十百佰千仟万萬亿億]/
|
66
66
|
end
|
67
67
|
|
68
68
|
# Public: A Regex for bopomofo, a.k.a. Zhuyin Fuhao 注音符号.
|
@@ -100,7 +100,7 @@ module ZhongwenTools
|
|
100
100
|
end
|
101
101
|
|
102
102
|
def self.py_tones
|
103
|
-
|
103
|
+
{
|
104
104
|
'a' => '[āáǎàa]',
|
105
105
|
'e' => '[ēéěèe]',
|
106
106
|
'i' => '[īíǐìi]',
|
@@ -109,5 +109,9 @@ module ZhongwenTools
|
|
109
109
|
'v' => '[ǖǘǚǜü]'
|
110
110
|
}
|
111
111
|
end
|
112
|
+
|
113
|
+
def self.only_tones
|
114
|
+
/([āáǎàēéěèīíǐìōóǒòūúǔùǖǘǚǜ])/
|
115
|
+
end
|
112
116
|
end
|
113
117
|
end
|
@@ -48,6 +48,8 @@ module ZhongwenTools
|
|
48
48
|
|
49
49
|
results = words.map do |word|
|
50
50
|
word, is_capitalized = normalize_pinyin(word)
|
51
|
+
# NOTE: Special Case "fǎnguāng" should be "fǎn" + "guāng"
|
52
|
+
word = word.gsub('ngu', 'n-gu')
|
51
53
|
result = word.split(/['\-]/).flatten.map do |x|
|
52
54
|
find_py(x)
|
53
55
|
end
|
@@ -67,10 +69,15 @@ module ZhongwenTools
|
|
67
69
|
#
|
68
70
|
# Returns Boolean.
|
69
71
|
def self.py?(str)
|
70
|
-
|
71
|
-
|
72
|
-
|
72
|
+
if str[ZhongwenTools::Regex.only_tones].nil? && str[/[1-5]/].nil?
|
73
|
+
pyn?(str)
|
74
|
+
else
|
75
|
+
# NOTE: py regex does not include capitals with tones.
|
76
|
+
# NOTE: Special Case "fǎnguāng" should be "fǎn" + "guāng"
|
77
|
+
regex = /(#{ ZhongwenTools::Regex.punc }|#{ ZhongwenTools::Regex.py }|[\s\-])/
|
78
|
+
str = str.gsub('ngu', 'n-gu')
|
73
79
|
ZhongwenTools::Caps.downcase(str).gsub(regex, '').strip == ''
|
80
|
+
end
|
74
81
|
end
|
75
82
|
|
76
83
|
# Public: checks if a string is pinyin.
|
@@ -126,7 +133,6 @@ module ZhongwenTools
|
|
126
133
|
|
127
134
|
def self.find_py(str)
|
128
135
|
str.scan(ZhongwenTools::Regex.py).map{ |x| (x - [nil])[0] }
|
129
|
-
|
130
136
|
end
|
131
137
|
|
132
138
|
def self.recapitalize(obj, capitalized)
|
@@ -177,7 +183,7 @@ module ZhongwenTools
|
|
177
183
|
pyn = pyn.sub(/(#{replacements.join('.*')}.*)#{match}/){ $1 + replace }
|
178
184
|
else
|
179
185
|
pyn = pyn.sub(/#{match}/){ "#{$1}#{replace}"}
|
180
|
-
|
186
|
+
end
|
181
187
|
replacements << replace
|
182
188
|
end
|
183
189
|
|
data/test/test_pinyin.rb
CHANGED
@@ -14,6 +14,8 @@ class TestPinyin < Minitest::Test
|
|
14
14
|
@split_words.each do |w|
|
15
15
|
assert_equal w[:split_py], ZhongwenTools::Romanization::Pinyin.split_py(w[:py])
|
16
16
|
end
|
17
|
+
|
18
|
+
assert_equal ['fǎn', 'guāng', 'jìng'], ZhongwenTools::Romanization::Pinyin.split_py('fǎnguāngjìng')
|
17
19
|
end
|
18
20
|
|
19
21
|
def test_py?
|
@@ -21,6 +23,14 @@ class TestPinyin < Minitest::Test
|
|
21
23
|
assert ZhongwenTools::Romanization::Pinyin.py?(w[:py]), w.inspect
|
22
24
|
refute ZhongwenTools::Romanization::Pinyin.py?(w[:pyn]), w.inspect
|
23
25
|
end
|
26
|
+
|
27
|
+
assert ZhongwenTools::Romanization::Pinyin.py? 'fǎnguāngjìng'
|
28
|
+
|
29
|
+
english_words = %w(cyan moose cling touch)
|
30
|
+
|
31
|
+
english_words.each do |w|
|
32
|
+
refute ZhongwenTools::Romanization::Pinyin.py? w
|
33
|
+
end
|
24
34
|
end
|
25
35
|
|
26
36
|
def test_pyn?
|
@@ -58,13 +68,13 @@ class TestPinyin < Minitest::Test
|
|
58
68
|
|
59
69
|
|
60
70
|
@words = [
|
61
|
-
{:
|
62
|
-
{ :
|
63
|
-
{ :
|
64
|
-
{ :
|
65
|
-
{ :
|
71
|
+
{ pyn: 'A1la1bo2', py: 'Ālābó'},
|
72
|
+
{ pyn: 'ni3 hao3', py: 'nǐ hǎo' },
|
73
|
+
{ pyn: 'Zhong1guo2', py: 'Zhōngguó' },
|
74
|
+
{ pyn: 'chui1 niu3', py: "chuī niǔ" },
|
75
|
+
{ pyn: 'Mao2 Ze2dong1', py: 'Máo Zédōng' }
|
66
76
|
]
|
67
77
|
|
68
|
-
@r = { :
|
78
|
+
@r = { pyn: 'r5', py: 'r' }
|
69
79
|
end
|
70
80
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zhongwen_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.16.
|
4
|
+
version: 0.16.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steven Daniels
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|