zhongwen_tools 0.12.1 → 0.12.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 38e857f5b289cca5e024238a437b3e69ca74a443
4
- data.tar.gz: cdd2214ad7fb466252e5416f485a261b447dcaf6
3
+ metadata.gz: 17555fc2b7ad68dc9185b7f0ae0eea1226a799eb
4
+ data.tar.gz: 4b53940c086bf4b839fd5cf22e5a6bb380692b26
5
5
  SHA512:
6
- metadata.gz: 1185558e187c41e55870236bae4261c3be2f4227acd9fe3b7c3d53cfeeec17ed7a7dae6d516ef44055872cb1fbc2f71c31973caa02d4ad71984790a8070ac2f3
7
- data.tar.gz: f7cb7d3b2c486e9faebb56d751b4077bbce7f63d5629dcc00c94470432d61e2460a2f2b41535cf767abd64cb3a5a79dcedb491da36fb377b9f630bad4db4b427
6
+ metadata.gz: 7ac4b646e848da7548a3b9f8915b3202099186c913acd78a6130826bef2bda1bcd3ae18b8ab5d19409d30c59722fd6dd708b863d57b197e4336fb81ae4e20785
7
+ data.tar.gz: adc22130db84d0320b5763484435d94913d5359c1983a9631202cfe6f3181173e1185b0a1962a99521bc2d142a6df1930d504c1ef2159fecbd8509b59ce17856
@@ -13,6 +13,10 @@ module ZhongwenTools
13
13
  /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}([\s\-])?)/
14
14
  end
15
15
 
16
+ def pinyin_num
17
+ /(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
18
+ end
19
+
16
20
  def fullwidth
17
21
  /[0-9A-Za-z%.:#$&+-/\=;<>]/
18
22
  end
@@ -61,9 +61,7 @@ module ZhongwenTools
61
61
  #
62
62
  # Returns a string with actual pinyin
63
63
  def _to_pinyin str
64
- # TODO: move regex to ZhongwenTools::Regex
65
- regex = /(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
66
-
64
+ regex = Regex.pinyin_num
67
65
  # Using gsub is ~8x faster than using scan and each.
68
66
  # Explanation: if it's pinyin without vowels, e.g. m, ng, then convert,
69
67
  # otherwise, check if it needs an apostrophe (http://www.pinyin.info/romanization/hanyu/apostrophes.html).
@@ -76,31 +74,31 @@ module ZhongwenTools
76
74
  end
77
75
 
78
76
  def _to_romanization str, to, from
79
- convert_to = _set_type to
80
- convert_from = _set_type from
81
-
77
+ # NOTE: extract/refactor tokens cause tests to fail.
82
78
  begin
83
- tokens = self.send("split_#{from}").uniq
79
+ tokens = str.send("split_#{from}").uniq
84
80
  rescue
85
81
  tokens = str.split(/[ \-]/).uniq
86
82
  end
87
83
 
88
84
  tokens.collect do |t|
89
- search = t.gsub(/[1-5].*/,'')
90
-
91
- if from.nil?
92
- replace = (_replacement(t) || {}).fetch(to){search}
93
- else
94
- replace = (_replacement(t, from) || {}).fetch(to){search}
95
- end
96
-
97
- replace = _fix_capitalization(str, t, replace)
85
+ search, replace = _token_search_replace(t, str, to, from)
98
86
  str = str.gsub(search, replace)
99
87
  end
100
88
 
101
89
  str
102
90
  end
103
91
 
92
+ def _token_search_replace(token, str, to, from)
93
+ search = token.gsub(/[1-5].*/,'')
94
+
95
+ replace = _replacement(token, from).fetch(to){ search }
96
+ replace = _fix_capitalization(str, token, replace)
97
+
98
+
99
+ [search, replace]
100
+ end
101
+
104
102
  def _fix_capitalization(str, token, replace)
105
103
  replace = replace.capitalize if(token.downcase != token)
106
104
 
@@ -109,13 +107,15 @@ module ZhongwenTools
109
107
 
110
108
  def _replacement(token, from = nil)
111
109
  token = token.downcase.gsub(/[1-5].*/,'')
112
- ROMANIZATIONS_TABLE.find do |x|
110
+ result = ROMANIZATIONS_TABLE.find do |x|
113
111
  if from.nil?
114
112
  x.values.include?(token)
115
113
  else
116
114
  x[from] == token
117
115
  end
118
116
  end
117
+
118
+ result || {}
119
119
  end
120
120
 
121
121
  def _convert_romanization str, to, from
@@ -132,6 +132,10 @@ module ZhongwenTools
132
132
  _to_romanization str, :pyn, from
133
133
  end
134
134
  else
135
+ if from == :py
136
+ str = _convert_pinyin_to_pyn(str)
137
+ from = :pyn
138
+ end
135
139
  _to_romanization str, to, from
136
140
  end
137
141
 
@@ -142,7 +146,6 @@ module ZhongwenTools
142
146
 
143
147
  def _convert_pinyin_to_pyn(pinyin)
144
148
  # TODO: should method check to make sure pinyin is accurate?
145
- pyn = []
146
149
  words = pinyin.split(' ')
147
150
 
148
151
  pyn = words.map do |word|
@@ -169,16 +172,18 @@ module ZhongwenTools
169
172
  matches = PYN_PY.values.select do |x|
170
173
  py.include? x
171
174
  end
175
+ match = select_pinyin_match(matches)
176
+ replace = PYN_PY.find{|k,v| k if v == match}[0]
172
177
 
178
+ py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
179
+ end
180
+
181
+ def select_pinyin_match(matches)
173
182
  # take the longest pinyin match. Use bytes because 'è' is prefered over 'n' or 'r' or 'm'
174
183
  match = matches.sort{|x,y| x.bytes.to_a.length <=> y.bytes.to_a.length}[-1]
175
184
 
176
185
  # Edge case.. en/eng pyn -> py conversion is one way only.
177
- match = match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
178
-
179
- replace = PYN_PY.find{|k,v| k if v == match}[0]
180
-
181
- py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
186
+ match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
182
187
  end
183
188
 
184
189
 
@@ -103,7 +103,7 @@ module ZhongwenTools
103
103
  def romanization?(str = nil)
104
104
  str ||= self
105
105
 
106
- [:pyn, :py, :zyfh, :wg, :typy, :yale, :msp2].find do |type|
106
+ [:pyn, :py, :zyfh, :wg, :typy, :yale, :mps2].find do |type|
107
107
  self.send("#{type}?", str)
108
108
  end
109
109
  end
@@ -126,7 +126,11 @@ module ZhongwenTools
126
126
  #
127
127
  # Returns a Regexp.
128
128
  def detect_regex(type)
129
- /#{ROMANIZATIONS_TABLE.map{ |r| "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn] }.flatten.sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
129
+ /#{regex_values(type).sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
130
+ end
131
+
132
+ def regex_values(type)
133
+ ROMANIZATIONS_TABLE.map{ |r| "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn] }.flatten
130
134
  end
131
135
  end
132
136
  end
@@ -1,3 +1,3 @@
1
1
  module ZhongwenTools
2
- VERSION = '0.12.1'
2
+ VERSION = '0.12.2'
3
3
  end
@@ -71,6 +71,19 @@ class TestRomanization < Minitest::Test
71
71
  assert 'ㄋㄧ3 ㄏㄠ3'.zyfh?
72
72
 
73
73
  assert_equal 'ㄋㄧ3 ㄏㄠ3', 'ni3 hau3'.to_bpmf(:yale)
74
+
75
+ t = :bopomofo
76
+ @romanizations.each do |rom|
77
+ rom.each do |type, entry|
78
+ #if type == :bopomofo
79
+ assert_equal rom[t].downcase, entry.send("to_#{t}", type).downcase, "to_#{t}(#{type}) should convert to #{t}."
80
+ assert_equal rom[t].downcase, entry.send("to_#{t}").downcase, "to_#{t}(#{type}) should convert to #{t}, but it isn't detected properly"
81
+ #else
82
+ #assert_equal rom[:t], entry.to_pinyin(type), "to_pinyin(#{type}) should convert to pinyin."
83
+ #assert_equal rom[:t], entry.to_pinyin, "to_pinyin(#{type}) should convert to pinyin, but it isn't detected properly" unless type == :typy
84
+ #end
85
+ end
86
+ end
74
87
  end
75
88
 
76
89
  def test_wade_giles
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zhongwen_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.1
4
+ version: 0.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Daniels
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-11 00:00:00.000000000 Z
11
+ date: 2014-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake