zhongwen_tools 0.12.1 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 38e857f5b289cca5e024238a437b3e69ca74a443
4
- data.tar.gz: cdd2214ad7fb466252e5416f485a261b447dcaf6
3
+ metadata.gz: 17555fc2b7ad68dc9185b7f0ae0eea1226a799eb
4
+ data.tar.gz: 4b53940c086bf4b839fd5cf22e5a6bb380692b26
5
5
  SHA512:
6
- metadata.gz: 1185558e187c41e55870236bae4261c3be2f4227acd9fe3b7c3d53cfeeec17ed7a7dae6d516ef44055872cb1fbc2f71c31973caa02d4ad71984790a8070ac2f3
7
- data.tar.gz: f7cb7d3b2c486e9faebb56d751b4077bbce7f63d5629dcc00c94470432d61e2460a2f2b41535cf767abd64cb3a5a79dcedb491da36fb377b9f630bad4db4b427
6
+ metadata.gz: 7ac4b646e848da7548a3b9f8915b3202099186c913acd78a6130826bef2bda1bcd3ae18b8ab5d19409d30c59722fd6dd708b863d57b197e4336fb81ae4e20785
7
+ data.tar.gz: adc22130db84d0320b5763484435d94913d5359c1983a9631202cfe6f3181173e1185b0a1962a99521bc2d142a6df1930d504c1ef2159fecbd8509b59ce17856
@@ -13,6 +13,10 @@ module ZhongwenTools
13
13
  /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}([\s\-])?)/
14
14
  end
15
15
 
16
+ def pinyin_num
17
+ /(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
18
+ end
19
+
16
20
  def fullwidth
17
21
  /[0-9A-Za-z%.:#$&+-/\=;<>]/
18
22
  end
@@ -61,9 +61,7 @@ module ZhongwenTools
61
61
  #
62
62
  # Returns a string with actual pinyin
63
63
  def _to_pinyin str
64
- # TODO: move regex to ZhongwenTools::Regex
65
- regex = /(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
66
-
64
+ regex = Regex.pinyin_num
67
65
  # Using gsub is ~8x faster than using scan and each.
68
66
  # Explanation: if it's pinyin without vowels, e.g. m, ng, then convert,
69
67
  # otherwise, check if it needs an apostrophe (http://www.pinyin.info/romanization/hanyu/apostrophes.html).
@@ -76,31 +74,31 @@ module ZhongwenTools
76
74
  end
77
75
 
78
76
  def _to_romanization str, to, from
79
- convert_to = _set_type to
80
- convert_from = _set_type from
81
-
77
+ # NOTE: extract/refactor tokens cause tests to fail.
82
78
  begin
83
- tokens = self.send("split_#{from}").uniq
79
+ tokens = str.send("split_#{from}").uniq
84
80
  rescue
85
81
  tokens = str.split(/[ \-]/).uniq
86
82
  end
87
83
 
88
84
  tokens.collect do |t|
89
- search = t.gsub(/[1-5].*/,'')
90
-
91
- if from.nil?
92
- replace = (_replacement(t) || {}).fetch(to){search}
93
- else
94
- replace = (_replacement(t, from) || {}).fetch(to){search}
95
- end
96
-
97
- replace = _fix_capitalization(str, t, replace)
85
+ search, replace = _token_search_replace(t, str, to, from)
98
86
  str = str.gsub(search, replace)
99
87
  end
100
88
 
101
89
  str
102
90
  end
103
91
 
92
+ def _token_search_replace(token, str, to, from)
93
+ search = token.gsub(/[1-5].*/,'')
94
+
95
+ replace = _replacement(token, from).fetch(to){ search }
96
+ replace = _fix_capitalization(str, token, replace)
97
+
98
+
99
+ [search, replace]
100
+ end
101
+
104
102
  def _fix_capitalization(str, token, replace)
105
103
  replace = replace.capitalize if(token.downcase != token)
106
104
 
@@ -109,13 +107,15 @@ module ZhongwenTools
109
107
 
110
108
  def _replacement(token, from = nil)
111
109
  token = token.downcase.gsub(/[1-5].*/,'')
112
- ROMANIZATIONS_TABLE.find do |x|
110
+ result = ROMANIZATIONS_TABLE.find do |x|
113
111
  if from.nil?
114
112
  x.values.include?(token)
115
113
  else
116
114
  x[from] == token
117
115
  end
118
116
  end
117
+
118
+ result || {}
119
119
  end
120
120
 
121
121
  def _convert_romanization str, to, from
@@ -132,6 +132,10 @@ module ZhongwenTools
132
132
  _to_romanization str, :pyn, from
133
133
  end
134
134
  else
135
+ if from == :py
136
+ str = _convert_pinyin_to_pyn(str)
137
+ from = :pyn
138
+ end
135
139
  _to_romanization str, to, from
136
140
  end
137
141
 
@@ -142,7 +146,6 @@ module ZhongwenTools
142
146
 
143
147
  def _convert_pinyin_to_pyn(pinyin)
144
148
  # TODO: should method check to make sure pinyin is accurate?
145
- pyn = []
146
149
  words = pinyin.split(' ')
147
150
 
148
151
  pyn = words.map do |word|
@@ -169,16 +172,18 @@ module ZhongwenTools
169
172
  matches = PYN_PY.values.select do |x|
170
173
  py.include? x
171
174
  end
175
+ match = select_pinyin_match(matches)
176
+ replace = PYN_PY.find{|k,v| k if v == match}[0]
172
177
 
178
+ py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
179
+ end
180
+
181
+ def select_pinyin_match(matches)
173
182
  # take the longest pinyin match. Use bytes because 'è' is prefered over 'n' or 'r' or 'm'
174
183
  match = matches.sort{|x,y| x.bytes.to_a.length <=> y.bytes.to_a.length}[-1]
175
184
 
176
185
  # Edge case.. en/eng pyn -> py conversion is one way only.
177
- match = match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
178
-
179
- replace = PYN_PY.find{|k,v| k if v == match}[0]
180
-
181
- py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
186
+ match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
182
187
  end
183
188
 
184
189
 
@@ -103,7 +103,7 @@ module ZhongwenTools
103
103
  def romanization?(str = nil)
104
104
  str ||= self
105
105
 
106
- [:pyn, :py, :zyfh, :wg, :typy, :yale, :msp2].find do |type|
106
+ [:pyn, :py, :zyfh, :wg, :typy, :yale, :mps2].find do |type|
107
107
  self.send("#{type}?", str)
108
108
  end
109
109
  end
@@ -126,7 +126,11 @@ module ZhongwenTools
126
126
  #
127
127
  # Returns a Regexp.
128
128
  def detect_regex(type)
129
- /#{ROMANIZATIONS_TABLE.map{ |r| "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn] }.flatten.sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
129
+ /#{regex_values(type).sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
130
+ end
131
+
132
+ def regex_values(type)
133
+ ROMANIZATIONS_TABLE.map{ |r| "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn] }.flatten
130
134
  end
131
135
  end
132
136
  end
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = '0.12.1'
2
+ VERSION = '0.12.2'
3
3
  end
@@ -71,6 +71,19 @@ class TestRomanization < Minitest::Test
71
71
  assert 'ㄋㄧ3 ㄏㄠ3'.zyfh?
72
72
 
73
73
  assert_equal 'ㄋㄧ3 ㄏㄠ3', 'ni3 hau3'.to_bpmf(:yale)
74
+
75
+ t = :bopomofo
76
+ @romanizations.each do |rom|
77
+ rom.each do |type, entry|
78
+ #if type == :bopomofo
79
+ assert_equal rom[t].downcase, entry.send("to_#{t}", type).downcase, "to_#{t}(#{type}) should convert to #{t}."
80
+ assert_equal rom[t].downcase, entry.send("to_#{t}").downcase, "to_#{t}(#{type}) should convert to #{t}, but it isn't detected properly"
81
+ #else
82
+ #assert_equal rom[:t], entry.to_pinyin(type), "to_pinyin(#{type}) should convert to pinyin."
83
+ #assert_equal rom[:t], entry.to_pinyin, "to_pinyin(#{type}) should convert to pinyin, but it isn't detected properly" unless type == :typy
84
+ #end
85
+ end
86
+ end
74
87
  end
75
88
 
76
89
  def test_wade_giles
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.1
4
+ version: 0.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-11 00:00:00.000000000 Z
11
+ date: 2014-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake