RubyGems - zhongwen_tools - Versions diffs - 0.12.1 → 0.12.2 - Mend

zhongwen_tools 0.12.1 → 0.12.2

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/zhongwen_tools/regex.rb +4 -0
data/lib/zhongwen_tools/romanization.rb +28 -23
data/lib/zhongwen_tools/romanization/detect.rb +6 -2
data/lib/zhongwen_tools/version.rb +1 -1
data/test/test_romanization.rb +13 -0
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 38e857f5b289cca5e024238a437b3e69ca74a443
-  data.tar.gz: cdd2214ad7fb466252e5416f485a261b447dcaf6
+  metadata.gz: 17555fc2b7ad68dc9185b7f0ae0eea1226a799eb
+  data.tar.gz: 4b53940c086bf4b839fd5cf22e5a6bb380692b26
 SHA512:
-  metadata.gz: 1185558e187c41e55870236bae4261c3be2f4227acd9fe3b7c3d53cfeeec17ed7a7dae6d516ef44055872cb1fbc2f71c31973caa02d4ad71984790a8070ac2f3
-  data.tar.gz: f7cb7d3b2c486e9faebb56d751b4077bbce7f63d5629dcc00c94470432d61e2460a2f2b41535cf767abd64cb3a5a79dcedb491da36fb377b9f630bad4db4b427
+  metadata.gz: 7ac4b646e848da7548a3b9f8915b3202099186c913acd78a6130826bef2bda1bcd3ae18b8ab5d19409d30c59722fd6dd708b863d57b197e4336fb81ae4e20785
+  data.tar.gz: adc22130db84d0320b5763484435d94913d5359c1983a9631202cfe6f3181173e1185b0a1962a99521bc2d142a6df1930d504c1ef2159fecbd8509b59ce17856

data/lib/zhongwen_tools/regex.rb CHANGED Viewed

@@ -13,6 +13,10 @@ module ZhongwenTools
       /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}([\s\-])?)/
     end
+    def pinyin_num
+      /(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
+    end
     def fullwidth
       /[０-９Ａ-Ｚａ-ｚ％．：＃＄＆＋－／＼＝；＜＞]/
     end

data/lib/zhongwen_tools/romanization.rb CHANGED Viewed

@@ -61,9 +61,7 @@ module ZhongwenTools
     #
     #  Returns a string with actual pinyin
     def _to_pinyin str
-      # TODO: move regex to ZhongwenTools::Regex
-      regex = /(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
+      regex = Regex.pinyin_num
       # Using gsub is ~8x faster than using scan and each.
       # Explanation: if it's pinyin without vowels, e.g. m, ng, then convert,
       #              otherwise, check if it needs an apostrophe (http://www.pinyin.info/romanization/hanyu/apostrophes.html).
@@ -76,31 +74,31 @@ module ZhongwenTools
     end
     def _to_romanization str, to, from
-      convert_to = _set_type to
-      convert_from = _set_type from
+      # NOTE: extract/refactor tokens cause tests to fail.
       begin
-        tokens = self.send("split_#{from}").uniq
+        tokens = str.send("split_#{from}").uniq
       rescue
         tokens = str.split(/[ \-]/).uniq
       end
       tokens.collect do |t|
-        search = t.gsub(/[1-5].*/,'')
-        if from.nil?
-          replace = (_replacement(t) || {}).fetch(to){search}
-        else
-          replace = (_replacement(t, from) || {}).fetch(to){search}
-        end
-        replace = _fix_capitalization(str, t, replace)
+        search, replace = _token_search_replace(t, str, to, from)
         str =  str.gsub(search, replace)
       end
       str
     end
+    def _token_search_replace(token, str, to, from)
+      search = token.gsub(/[1-5].*/,'')
+      replace = _replacement(token, from).fetch(to){ search }
+      replace = _fix_capitalization(str, token, replace)
+      [search, replace]
+    end
     def _fix_capitalization(str, token, replace)
       replace = replace.capitalize  if(token.downcase != token)
@@ -109,13 +107,15 @@ module ZhongwenTools
     def _replacement(token, from = nil)
       token = token.downcase.gsub(/[1-5].*/,'')
-      ROMANIZATIONS_TABLE.find do |x|
+      result = ROMANIZATIONS_TABLE.find do |x|
         if from.nil?
           x.values.include?(token)
         else
           x[from] == token
         end
       end
+      result || {}
     end
     def _convert_romanization str, to, from
@@ -132,6 +132,10 @@ module ZhongwenTools
             _to_romanization str, :pyn, from
           end
         else
+          if from == :py
+            str = _convert_pinyin_to_pyn(str)
+            from = :pyn
+          end
           _to_romanization str, to, from
         end
@@ -142,7 +146,6 @@ module ZhongwenTools
     def _convert_pinyin_to_pyn(pinyin)
       # TODO: should method check to make sure pinyin is accurate?
-      pyn = []
       words =  pinyin.split(' ')
       pyn = words.map do |word|
@@ -169,16 +172,18 @@ module ZhongwenTools
       matches = PYN_PY.values.select do |x|
         py.include? x
       end
+      match = select_pinyin_match(matches)
+      replace = PYN_PY.find{|k,v| k if v == match}[0]
+      py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
+    end
+    def select_pinyin_match(matches)
       # take the longest pinyin match. Use bytes because 'è' is prefered over 'n' or 'r' or 'm'
       match = matches.sort{|x,y| x.bytes.to_a.length <=> y.bytes.to_a.length}[-1]
       # Edge case.. en/eng pyn -> py conversion is one way only.
-      match = match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
-      replace = PYN_PY.find{|k,v| k if v == match}[0]
-      py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
+      match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
     end

data/lib/zhongwen_tools/romanization/detect.rb CHANGED Viewed

@@ -103,7 +103,7 @@ module ZhongwenTools
     def romanization?(str = nil)
       str ||= self
-      [:pyn, :py, :zyfh, :wg, :typy, :yale, :msp2].find do |type|
+      [:pyn, :py, :zyfh, :wg, :typy, :yale, :mps2].find do |type|
         self.send("#{type}?", str)
       end
     end
@@ -126,7 +126,11 @@ module ZhongwenTools
     #
     # Returns a Regexp.
     def detect_regex(type)
-      /#{ROMANIZATIONS_TABLE.map{ |r| "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn] }.flatten.sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
+      /#{regex_values(type).sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
+    end
+    def regex_values(type)
+      ROMANIZATIONS_TABLE.map{ |r| "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn] }.flatten
     end
   end
 end

data/lib/zhongwen_tools/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module ZhongwenTools
-  VERSION = '0.12.1'
+  VERSION = '0.12.2'
 end

data/test/test_romanization.rb CHANGED Viewed

@@ -71,6 +71,19 @@ class TestRomanization < Minitest::Test
      assert 'ㄋㄧ3 ㄏㄠ3'.zyfh?
      assert_equal 'ㄋㄧ3 ㄏㄠ3', 'ni3 hau3'.to_bpmf(:yale)
+     t = :bopomofo
+    @romanizations.each do |rom|
+      rom.each do |type, entry|
+        #if type == :bopomofo
+        assert_equal rom[t].downcase, entry.send("to_#{t}", type).downcase, "to_#{t}(#{type}) should convert to #{t}."
+        assert_equal rom[t].downcase, entry.send("to_#{t}").downcase, "to_#{t}(#{type}) should convert to #{t}, but it isn't detected properly"
+        #else
+        #assert_equal rom[:t], entry.to_pinyin(type), "to_pinyin(#{type}) should convert to pinyin."
+        #assert_equal rom[:t], entry.to_pinyin, "to_pinyin(#{type}) should convert to pinyin, but it isn't detected properly" unless type == :typy
+        #end
+      end
+    end
   end
   def test_wade_giles

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: zhongwen_tools
 version: !ruby/object:Gem::Version
-  version: 0.12.1
+  version: 0.12.2
 platform: ruby
 authors:
 - Steven Daniels
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-06-11 00:00:00.000000000 Z
+date: 2014-06-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake