zhongwen_tools 0.17.1 → 0.17.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/lib/zhongwen_tools/regex.rb +3 -3
- data/lib/zhongwen_tools/romanization/pinyin.rb +3 -3
- data/lib/zhongwen_tools/romanization.rb +8 -4
- data/lib/zhongwen_tools/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bacbcbd09a73bdd22d78e966029187c7e3dfc9a2
|
|
4
|
+
data.tar.gz: e259d86acb93bb114141d040c6ad2fd8a22293cd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 93999ffd384d42b193873c9be87c94bd5784f4c49381a841fdfa2a4687ffab5f9747cfbcb4a641800fd271226ea4ef6ee1202aa849cbb5688268e73cffe2d19a
|
|
7
|
+
data.tar.gz: 63c7429d200a98628b00cd23db987d0b095f4d50819f8844a6855adddc4accee66a9243122ec4b54eea8b0f9305edbc2080bbd3b2f6929ea579783ad68ebd1aa
|
data/.travis.yml
CHANGED
data/lib/zhongwen_tools/regex.rb
CHANGED
|
@@ -3,13 +3,13 @@
|
|
|
3
3
|
module ZhongwenTools
|
|
4
4
|
module Regex
|
|
5
5
|
def self.pyn
|
|
6
|
-
/(#{pyn_regexes.values.join('|')}|r)([1-5])([\s\-]+)?/
|
|
6
|
+
@pyn ||= /(#{pyn_regexes.values.join('|')}|r)([1-5])([\s\-]+)?/
|
|
7
7
|
end
|
|
8
8
|
|
|
9
9
|
def self.py
|
|
10
10
|
# FIXME: need to detect Ālābó
|
|
11
11
|
# ([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
|
|
12
|
-
/(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub(/[aeiouv]/,py_tones)}.join('|')}([\s\-])?)/
|
|
12
|
+
@py ||= /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub(/[aeiouv]/,py_tones)}.join('|')}([\s\-])?)/
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
def self.pinyin_num
|
|
@@ -17,7 +17,7 @@ module ZhongwenTools
|
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
def self.pinyin_toneless
|
|
20
|
-
/(#{pyn_regexes.values.join('|')}|r)([\s\-]+)?/
|
|
20
|
+
@pynt ||= /(#{pyn_regexes.values.join('|')}|r)([\s\-]+)?/
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
def self.fullwidth
|
|
@@ -38,8 +38,8 @@ module ZhongwenTools
|
|
|
38
38
|
def self.split_pyn(str)
|
|
39
39
|
# FIXME: ignore punctuation
|
|
40
40
|
regex = str[/[1-5]/].nil? ? /(#{ZhongwenTools::Regex.pinyin_toneless})/ : /(#{ZhongwenTools::Regex.pyn}|#{ZhongwenTools::Regex.pinyin_toneless})/
|
|
41
|
-
|
|
42
|
-
str.scan(regex).map{ |arr| arr[0].strip.
|
|
41
|
+
# NOTE: p[/[^\-]*/].to_s is 25% faster thang gsub('-', '')
|
|
42
|
+
str.scan(regex).map{ |arr| arr[0].strip[/[^\-]*/].to_s }.flatten
|
|
43
43
|
end
|
|
44
44
|
|
|
45
45
|
def self.split_py(str)
|
|
@@ -132,7 +132,7 @@ module ZhongwenTools
|
|
|
132
132
|
end
|
|
133
133
|
|
|
134
134
|
def self.find_py(str)
|
|
135
|
-
str.scan(ZhongwenTools::Regex.py).map{ |x|
|
|
135
|
+
str.scan(ZhongwenTools::Regex.py).map{ |x| x.compact[0] }
|
|
136
136
|
end
|
|
137
137
|
|
|
138
138
|
def self.recapitalize(obj, capitalized)
|
|
@@ -174,7 +174,9 @@ module ZhongwenTools
|
|
|
174
174
|
#
|
|
175
175
|
# Returns a Regexp.
|
|
176
176
|
def self.detect_regex(type)
|
|
177
|
-
|
|
177
|
+
# TODO: memoize
|
|
178
|
+
@memoized_detect_regex ||= {}
|
|
179
|
+
@memoized_detect_regex[type] ||= /#{romanization_values(type).sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
|
|
178
180
|
end
|
|
179
181
|
|
|
180
182
|
# Internal: Selects the romanization values for a particular romanization type.
|
|
@@ -188,11 +190,13 @@ module ZhongwenTools
|
|
|
188
190
|
#
|
|
189
191
|
# Returns an Array that contains the romanization's values.
|
|
190
192
|
def self.romanization_values(type)
|
|
191
|
-
|
|
193
|
+
# TODO: memoize
|
|
194
|
+
@memoized_romanization_values = {}
|
|
195
|
+
@memoized_romanization_values[type] = ZhongwenTools::Romanization::ROMANIZATIONS_TABLE.map do |r|
|
|
192
196
|
"[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn]
|
|
193
|
-
end
|
|
197
|
+
end.flatten
|
|
194
198
|
|
|
195
|
-
|
|
199
|
+
@memoized_romanization_values[type]
|
|
196
200
|
end
|
|
197
201
|
|
|
198
202
|
def self.romanization_module(type)
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: zhongwen_tools
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.17.
|
|
4
|
+
version: 0.17.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steven Daniels
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2015-01-10 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|