zhongwen_tools 0.18.0 → 0.18.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/zhongwen_tools/regex.rb +4 -5
- data/lib/zhongwen_tools/romanization/pinyin.rb +8 -4
- data/lib/zhongwen_tools/version.rb +1 -1
- data/test/test_pinyin.rb +8 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: abe0b5477d8f04c2cabfe1054feffa3d1994b9d2
|
4
|
+
data.tar.gz: a14a4bc66804d0cbe0e3892ec48639384ae0f9bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34c8c883922b2e7cf314a6866bb54f6bc4462225492449699e70f65ccf1bd364be3cf9988a2d18edd08be1103703831e81de999dfb31a87c14dad5e1ccabaf8a
|
7
|
+
data.tar.gz: 23c65688e09fa36a15ef0c2add20bee70c0ce167072f73a043fc68bd34b00b5475cada95909956643855a3bb57c1f36f52e8dadb6aa7f40d85816f443f22f9df
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@ Methods for dealing with Chinese.
|
|
5
5
|
Status](https://img.shields.io/travis/stevendaniels/zhongwen_tools.svg?style=flat-square)](https://travis-ci.org/stevendaniels/zhongwen_tools) [![Dependency Status](https://img.shields.io/gemnasium/stevendaniels/zhongwen_tools.svg?style=flat-square)](https://gemnasium.com/stevendaniels/zhongwen_tools) [![Code Climate](https://img.shields.io/codeclimate/github/stevendaniels/zhongwen_tools.svg?style=flat-square)](https://codeclimate.com/github/stevendaniels/zhongwen_tools) [![Coverage Status](https://img.shields.io/coveralls/stevendaniels/zhongwen_tools.svg?style=flat-square)](https://coveralls.io/r/stevendaniels/zhongwen_tools)
|
6
6
|
[![Gem Version](https://img.shields.io/gem/v/zhongwen_tools.svg?style=flat-square)](http://badge.fury.io/rb/zhongwen_tools)
|
7
7
|
|
8
|
-
##
|
8
|
+
## Installation
|
9
9
|
|
10
10
|
Install as a gem
|
11
11
|
|
data/lib/zhongwen_tools/regex.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
|
3
2
|
module ZhongwenTools
|
4
3
|
module Regex
|
5
4
|
def self.pyn
|
@@ -7,12 +6,12 @@ module ZhongwenTools
|
|
7
6
|
end
|
8
7
|
|
9
8
|
def self.py
|
10
|
-
# FIXME: need to detect Ālābó
|
11
9
|
# ([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
|
12
|
-
@py ||= /(#{pyn_regexes.map{|
|
10
|
+
@py ||= /(#{pyn_regexes.map { |_k, v| v.to_s[7..-2].gsub(/[aeiouv]/, py_tones) }.join('|')}([\s\-])?)/
|
13
11
|
end
|
14
12
|
|
15
13
|
def self.pinyin_num
|
14
|
+
# FIXME: n?g? might need to be replaced with (ng|n)?
|
16
15
|
/(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
|
17
16
|
end
|
18
17
|
|
@@ -86,12 +85,12 @@ module ZhongwenTools
|
|
86
85
|
{
|
87
86
|
nl_regex: /([nN]eng?|[lnLN](a(i|ng?|o)?|e(i|ng)?|i(ang|a[on]?|e|ng?|u)?|o(ng?|u)|u(o|i|an?|n)?|ve?))/,
|
88
87
|
bpm_regex: /([mM]iu|[pmPM]ou|[bpmBPM](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
|
89
|
-
y_regex: /[yY](a(o|ng?)?|e|i(n
|
88
|
+
y_regex: /[yY](a(o|ng?)?|e|i(ng|n)?|o(u|ng)?|u(e|a?n)?)/,
|
90
89
|
f_regex: /([fF](ou?|[ae](ng?|i)?|u))/,
|
91
90
|
dt_regex: /([dD](e(i|ng?)|i(a[on]?|u))|[dtDT](a(i|ng?|o)?|e(i|ng)?|i(a[on]?|e|ng|u)?|o(ng?|u)|u(o|i|an?|n)?))/,
|
92
91
|
gkh_regex: /([ghkGHK](a(i|ng?|o)?|e(i|ng?)?|o(u|ng)|u(a(i|ng?)?|i|n|o)?))/,
|
93
92
|
zczhch_regex: /([zZ]h?ei|[czCZ]h?(e(ng?)?|o(ng?|u)?|ao|u?a(i|ng?)?|u?(o|i|n)?))/,
|
94
|
-
ssh_regex: /([sS]ong|[sS]hua(i|ng?)?|[sS]hei|[sS][h]?(a(i|ng?|o)?|
|
93
|
+
ssh_regex: /([sS]ong|[sS]hua(i|ng?)?|[sS]hei|[sS][h]?(a(i|ng?|o)?|e(ng|n)?|ou|u(a?n|o|i)?|i))/,
|
95
94
|
r_regex: /([rR]([ae]ng?|i|e|ao|ou|ong|u[oin]|ua?n?))/,
|
96
95
|
jqx_regex: /([jqxJQX](i(a(o|ng?)?|[eu]|ong|ng?)?|u(e|a?n)?))/,
|
97
96
|
aeo_regex: /(([aA](i|o|ng?)?|[oO]u?|[eE](i|ng?|r)?))/,
|
@@ -49,14 +49,18 @@ module ZhongwenTools
|
|
49
49
|
|
50
50
|
results = words.map do |word|
|
51
51
|
word, is_capitalized = normalize_pinyin(word)
|
52
|
-
# NOTE: Special Case "fǎnguāng"
|
53
|
-
# Special Case "yìnián"
|
54
|
-
|
55
|
-
word = word.gsub(/(
|
52
|
+
# NOTE: Special Case split_py("fǎnguāng") # => ["fǎn" + "guāng"]
|
53
|
+
# Special Case split_py("yìnián") # => ["yì" + "nián"]
|
54
|
+
# split_py("Xīní") # => ["Xī", "ní"]
|
55
|
+
word = word.gsub(/(n)(g(#{ Regex.py_tones['o'] }|u))/){ "#{ $1 }-#{ $2 }" }
|
56
|
+
word = word.gsub(/([#{ Regex.only_tones }])(n(#{ Regex.py_tones['v'] }|#{ Regex.py_tones['i'] }|[iu][#{ Regex.py_tones['a'] }]))/){ "#{ $1 }-#{ $2 }" }
|
56
57
|
result = word.split(/['\-]/).flatten.map do |x|
|
57
58
|
find_py(x)
|
58
59
|
end
|
59
60
|
|
61
|
+
# NOTE: Special Case split_py('wányìr') # => ['wán', 'yì', 'r']
|
62
|
+
result << 'r' unless word[/(.*[^#{ Regex.py_tones['e'] }.])(r)$/].nil?
|
63
|
+
|
60
64
|
recapitalize(result.flatten, is_capitalized)
|
61
65
|
end
|
62
66
|
|
data/test/test_pinyin.rb
CHANGED
@@ -16,6 +16,14 @@ class TestPinyin < Minitest::Test
|
|
16
16
|
end
|
17
17
|
|
18
18
|
assert_equal ['fǎn', 'guāng', 'jìng'], ZhongwenTools::Romanization::Pinyin.split_py('fǎnguāngjìng')
|
19
|
+
assert_equal ['Yīng', 'guó'], ZhongwenTools::Romanization::Pinyin.split_py('Yīngguó')
|
20
|
+
assert_equal ['Xī', 'ní'], ZhongwenTools::Romanization::Pinyin.split_py('Xīní')
|
21
|
+
assert_equal ['bàn', 'gōng', 'lóu'], ZhongwenTools::Romanization::Pinyin.split_py('bàngōnglóu')
|
22
|
+
assert_equal ['jì', 'nǚ'], ZhongwenTools::Romanization::Pinyin.split_py('jìnǚ')
|
23
|
+
assert_equal ['sè', 'guǐ'], ZhongwenTools::Romanization::Pinyin.split_py('sèguǐ')
|
24
|
+
assert_equal ['qǔ', 'nuǎn'], ZhongwenTools::Romanization::Pinyin.split_py('qǔnuǎn')
|
25
|
+
assert_equal ['wán', 'yì', 'r'], ZhongwenTools::Romanization::Pinyin.split_py('wányìr')
|
26
|
+
assert_equal ['yīng', "ér"], ZhongwenTools::Romanization::Pinyin.split_py("yīng'ér")
|
19
27
|
end
|
20
28
|
|
21
29
|
def test_py?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zhongwen_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.18.
|
4
|
+
version: 0.18.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steven Daniels
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -233,7 +233,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
233
233
|
version: '0'
|
234
234
|
requirements: []
|
235
235
|
rubyforge_project: zhongwen_tools
|
236
|
-
rubygems_version: 2.
|
236
|
+
rubygems_version: 2.4.5
|
237
237
|
signing_key:
|
238
238
|
specification_version: 4
|
239
239
|
summary: Zhongwen Tools provide romanization conversions and helper methods for Chinese.
|