RubyGems - zhongwen_tools - Versions diffs - 0.7.2 → 0.9.0 - Mend

zhongwen_tools 0.7.2 → 0.9.0

Files changed (23) hide show

checksums.yaml +4 -4
data/.travis.yml +0 -5
data/README.md +4 -16
data/lib/zhongwen_tools/conversion/string.rb +1 -1
data/lib/zhongwen_tools/conversion.rb +5 -5
data/lib/zhongwen_tools/integer.rb +2 -15
data/lib/zhongwen_tools/numbers.rb +21 -18
data/lib/zhongwen_tools/regex/ruby18.rb +15 -0
data/lib/zhongwen_tools/regex.rb +94 -0
data/lib/zhongwen_tools/romanization/conversion_table.rb +3 -3
data/lib/zhongwen_tools/romanization/detect.rb +61 -49
data/lib/zhongwen_tools/romanization/pyn_to_py.rb +5 -4
data/lib/zhongwen_tools/romanization/string.rb +23 -0
data/lib/zhongwen_tools/romanization.rb +3 -1
data/lib/zhongwen_tools/string/caps.rb +5 -5
data/lib/zhongwen_tools/string/ruby18.rb +57 -56
data/lib/zhongwen_tools/string/ruby19.rb +1 -1
data/lib/zhongwen_tools/string.rb +12 -8
data/lib/zhongwen_tools/version.rb +1 -1
data/test/test_numbers.rb +15 -3
data/test/test_romanization.rb +25 -5
data/test/test_string.rb +1 -0
metadata +6 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: e932cfe269ff98dea98a88c0d1ff37961a8f376f
-  data.tar.gz: f39e6e24ec02e8f44ac16f33a2945a605dd962fe
+  metadata.gz: c46a1639e99601e0a9f9cb5e6961f148cf030758
+  data.tar.gz: e4dc3033e220ecd882915dadeb7e9c780a0cbe65
 SHA512:
-  metadata.gz: acf83d77043be54b7a8c8f24a4efce7c93a071ae6e3dab65a7a1dbaa1e38eac8e89aeadfcdd828daf5a831b23e49adc67c37991a5d71608990d3fbc9ea8880c3
-  data.tar.gz: abd8143c12ca09bb7a12c341f188232ff03d4fccf4ee5faa4c79793c4d728673cfb6d161b659fd5b4cf21d29201eca7a90596975bd759a10fead8952e3d45c4a
+  metadata.gz: 427f2bc4b43ea3734995aa2d4c0523244882e300457500728164eef7f297441bd400569d272e59d7b3d7218777b39f46e67ca430765f4d272b5432c186dd09d2
+  data.tar.gz: 1c6065127ee0fda328044d412b545f4b85cb53660df3070599c081f197c41f581054a69cb4d64d05377e18cf81c15808d31ec1804f9ef0951fc24350bcb84374

data/.travis.yml CHANGED Viewed

@@ -4,8 +4,3 @@ rvm:
   - 1.9.3
   - 2.0.0
   - 2.1.1
-matrix:
-  include:
-    - rvm: 1.8.7
-      gemfile: Gemfile.1.8.7

data/README.md CHANGED Viewed

@@ -132,22 +132,6 @@ The following capitalization methods work for pinyin.
     ZhongwenTools::String.capitalize 'àomén'
     #=> 'Àomén'
-#### Ruby 1.8 safe methods
-Zhongwen Tools is tested on every ruby since 1.8.7 and lets you deal
-with multibyte strings in an simple, consistent fashion regardless of
-which ruby version you are using.
-    require 'zhongwen_tools/string'
-    ZhongwenTools::String.chars '中文'
-    #=> ['中','文']
-    ZhongwenTools::String.size '中文'
-    #=> 2
-    ZhongwenTools::String.reverse '中文'
-    #=> '文中'
-    ZhongwenTools::String.to_utf8 '\x{D6D0}\x{CEC4}'
-    #=> '中文'
 ### Numbers
 Functions for converting to and from Chinese numbers.
@@ -222,6 +206,10 @@ scripts. It **does not convert Chinese characters to pinyin** (see ZhongwenTools
     str.to_py.py?
     #=> true
+    #split pinyin with numbers accurately.
+    'dong1xi1'.split_pyn    # => ['dong1', 'xi1']
+    'dongxi'.split_pyn      # => ['dong', 'xi']
 ### Conversion
 Functions for converting between scripts (e.g. traditional Chinese to
 simplified Chinese) and [TODO] between Chinese and romanization systems (e.g.

data/lib/zhongwen_tools/conversion/string.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-#encoding: utf-8
+# encoding: utf-8
 module ZhongwenTools
   module String

data/lib/zhongwen_tools/conversion.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-#encoding: utf-8
+# encoding: utf-8
 module ZhongwenTools
@@ -20,7 +20,6 @@ module ZhongwenTools
     def to_zhtw(str = nil)
       str ||= self
       convert(:zhtw, str)
     end
@@ -48,17 +47,18 @@ module ZhongwenTools
     private
     # Conversion data and algorithm shamelessly stolen from chinese_convt gem.
+    # ( https://github.com/xxxooo/chinese_convt )
+    #
     # There are two differences:
     #   + Zhongwen Tools loads the conversion data into memory and
-    #     chinese_convt reads the file every time. As a result,
+    #     chinese_convt reads the file every time it converts. As a result,
     #     Zhongwen Tools is  ~12X faster.
     #   + Zhongwen Tools uses Ruby's nifty str[/regex/] = replacement
     #     instead of indices. Conversion tests using indices fail with Ruby 1.8.
-    # ( https://github.com/xxxooo/chinese_convt )
     def load_table
       filename = File.expand_path('../conversion/conversion_data', __FILE__)
       File.open(filename).read.split("\n&\n").each do |group|
-      ZH_CONVERSION_TABLE << group.split("\n").map do |type|
+        ZH_CONVERSION_TABLE << group.split("\n").map do |type|
           Hash[ type.split(',').map{ |term| term.split(':') } ]
         end
       end

data/lib/zhongwen_tools/integer.rb CHANGED Viewed

@@ -1,9 +1,10 @@
-#encoding: utf-8
+# encoding: utf-8
 require File.expand_path("../numbers", __FILE__)
 module ZhongwenTools
   module Integer
     include ZhongwenTools::Numbers
+    extend self
     def to_zh(type = nil)
       type == :zht ? self.to_zht? : self.to_zhs
@@ -23,19 +24,5 @@ module ZhongwenTools
       int ||= self
       number_to_pyn int.to_s, :num
     end
-    class Basement
-      include ZhongwenTools::Integer
-    end
-    def self.to_zhs(*args)
-      Basement.new.to_zhs(*args)
-    end
-    def self.to_zht(*args)
-      Basement.new.to_zht(*args)
-    end
-    def self.to_pyn(*args)
-      Basement.new.to_pyn(*args)
-    end
   end
 end

data/lib/zhongwen_tools/numbers.rb CHANGED Viewed

@@ -1,10 +1,13 @@
-#encoding: utf-8
+# encoding: utf-8
+require File.expand_path("../regex", __FILE__)
+# TODO: more testing
 module ZhongwenTools
   module Numbers
     extend self
     NUMBER_MULTIPLES = '拾十百佰千仟仟万萬亿億'
+    # TODO: Add huge numbers.
+    # 垓	秭	穰	溝	澗	正	載 --> beyond 100,000,000!
     NUMBERS_TABLE = [
       { :zhs => '零', :zht => '零', :num => 0, :pyn => 'ling2'},
       { :zhs => '〇', :zht => '〇', :num => 0, :pyn => 'ling2'},
@@ -35,31 +38,31 @@ module ZhongwenTools
       { :zhs => '廿', :zht => '廿', :num => 20, :pyn => ' nian4'},
       { :zhs => '百', :zht => '百', :num => 100, :pyn => 'bai2'},
       { :zhs => '佰', :zht => '佰', :num => 100, :pyn => 'bai2'},
-      { :zhs => '千', :zht => '千', :num => 1000, :pyn => 'qian2'},
-      { :zhs => '仟', :zht => '仟', :num => 1000, :pyn => 'qian2'},
-      { :zhs => '万', :zht => '萬', :num => 10000, :pyn => 'wan4'},
-      { :zhs => '亿', :zht => '億', :num => 100000000, :pyn => 'yi4'},
+      { :zhs => '千', :zht => '千', :num => 1_000, :pyn => 'qian2'},
+      { :zhs => '仟', :zht => '仟', :num => 1_000, :pyn => 'qian2'},
+      { :zhs => '万', :zht => '萬', :num => 10_000, :pyn => 'wan4'},
+      { :zhs => '亿', :zht => '億', :num => 100_000_000, :pyn => 'yi4'},
     ]
     def number? word
-      #垓	秭	穰	溝	澗	正	載 --> beyond 100,000,000!
-      "#{word}".gsub(/([\d]|[一二三四五六七八九十百千萬万億亿]){2,}/,'') == ''
+      "#{word}".gsub(/([\d]|#{ZhongwenTools::Regex.zh_numbers}){1,}/,'') == ''
     end
     def zh_number_to_number(zh_number)
       zh_number = zh_number.to_s
       numbers = convert_date(zh_number)
-      #if it's a year, or an oddly formatted number
+      # if it's a year, or an oddly formatted number
       return numbers.join('').to_i if zh_number[/[#{NUMBER_MULTIPLES}]/u].nil?
       convert_numbers numbers
     end
-    #these should also be able to convert numbers to chinese numbers
+    # these should also be able to convert numbers to chinese numbers
     def number_to_zhs type, number
       convert_number_to :zhs, type.to_sym, number
     end
     def number_to_zht type, number
       convert_number_to :zht, type.to_sym, number
     end
@@ -70,7 +73,7 @@ module ZhongwenTools
     private
     def convert_date(zh)
-      #if it's a year, or an oddly formatted number
+      # if it's a year, or an oddly formatted number
       zh_numbers = ZhongwenTools::String.chars zh
       numbers = [];
       i = 0
@@ -78,7 +81,7 @@ module ZhongwenTools
       while( i < zh_numbers.length)
         curr_number = zh_numbers[i]
-        #x[:num] == curr_number.to_i is a kludge; any string will == 0
+        # x[:num] == curr_number.to_i is a kludge; any string will == 0
         num = convert(curr_number)[:num]
         numbers << num
         i += 1
@@ -125,7 +128,7 @@ module ZhongwenTools
     def is_number_multiplier?(number)
-      [10,100,1000,10000,100000000].include? number
+      [10,100,1_000,10_000,100_000_000].include? number
     end
@@ -144,7 +147,7 @@ module ZhongwenTools
     end
     def convert_from_num number, to
-      #TODO: this will fail for numbers over 1 billion. grr.
+      # TODO: this will fail for numbers over 1 billion. grr.
       str = number.to_s
       len = str.length
       converted_number = []
@@ -157,8 +160,8 @@ module ZhongwenTools
           converted_number << _find_number(num, to) unless num == 0
         else
           converted_number <<  _find_wan_level(i, to)
-          #checks the wan level and ...
-          converted_number <<  _find_number(num, to) if (num == 1 && (10**(i) / 10000 ** wan) != 10) || num != 1
+          # checks the wan level and ...
+          converted_number <<  _find_number(num, to) if (num == 1 && (10**(i) / 10_000 ** wan) != 10) || num != 1
         end
       end
@@ -174,14 +177,14 @@ module ZhongwenTools
         converted_number = convert_from_zh number, to
       end
-      #liang rules are tough...
+      # FIXME: liang rules are tough...
       converted_number.join(separator).gsub(/零[#{NUMBER_MULTIPLES}]/u,'')#.gsub(/二([百佰千仟仟万萬亿億])/){"#{NUMBERS_TABLE.find{|x|x[:pyn] == 'liang3'}[to]}#{$1}"}
     end
     private
     def _find_wan_level(i, to)
-      _find_number((10**(i)), to) || _find_number((10**(i) / 10000), to) || _find_number((10**(i) / 10000**2), to)
+      _find_number((10**(i)), to) || _find_number((10**(i) / 10_000), to) || _find_number((10**(i) / 10_000**2), to)
     end
     def _find_number(num, to)

data/lib/zhongwen_tools/regex/ruby18.rb ADDED Viewed

@@ -0,0 +1,15 @@
+# encoding: utf-8
+module ZhongwenTools
+  module Regex
+    def py_tones
+      {
+        'a' => '(ā|á|ǎ|à|a)',
+        'e' => '(ē|é|ě|è|e)',
+        'i' => '(ī|í|ǐ|ì|i)',
+        'o' => '(ō|ó|ǒ|ò|o)',
+        'u' => '(ū|ú|ǔ|ù|u)',
+        'v' => '(ǖ|ǘ|ǚ|ǜ|ü)'
+      }
+    end
+  end
+end

data/lib/zhongwen_tools/regex.rb ADDED Viewed

@@ -0,0 +1,94 @@
+# encoding: utf-8
+module ZhongwenTools
+  module Regex
+    extend self
+    def pyn
+      # NOTE: might not need / want the space on the end.
+      /(#{pyn_regexes.values.join('|')})([1-5])?([\s\-]+)?/
+    end
+    def py
+      # NOTE: might not need / want the space on the end.
+      /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
+    end
+    def fullwidth
+      /[０-９Ａ-Ｚａ-ｚ％．：＃＄＆＋－／＼＝；＜＞]/
+    end
+    def capital_letters
+      /(#{Regexp.union(ZhongwenTools::UNICODE_CAPS.keys)})/
+    end
+    def lowercase_letters
+      /(#{Regexp.union(ZhongwenTools::UNICODE_CAPS.values)})/
+    end
+    def zh
+      /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/
+    end
+    def punc
+      /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]/
+    end
+    def zh_punc
+      # TODO: includes non-zh punctuation codes. Should only include punctuation in CJK ranges.
+      /[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/
+    end
+    def zh_numbers
+      # TODO: include numbers like yotta, etc.
+      # 垓	秭	穰	溝	澗	正	載 --> beyond 100,000,000!
+      /[〇零一壹幺二贰貳两兩三弎叁參四肆䦉五伍六陆陸七柒八捌九玖十拾廿百佰千仟万萬亿億]/
+    end
+    # Public: A Regex for bopomofo, a.k.a. Zhuyin Fuhao 注音符号.
+    #
+    # Examples
+    #
+    #
+    #   bopomofo #=> <Regex>
+    #
+    # Returns a Regex.
+    def bopomofo
+      /[ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ]/
+    end
+    private
+    def pyn_regexes
+      # http://stackoverflow.com/questions/20736291/regex-for-matching-pinyin
+      # https://www.debuggex.com/r/_9kbxA6f00gIGiVo
+      # NOTE: you might need to change the order of these regexes for more accurate matching of some pinyin.
+      {
+        :nl_regex => /([nN]eng?|[lnLN](a(i|ng?|o)?|e(i|ng)?|i(ang|a[on]?|e|ng?|u)?|o(ng?|u)|u(o|i|an?|n)?|ve?))/,
+        :bpm_regex => /([mM]iu|[pmPM]ou|[bpmBPM](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
+        :f_regex => /([fF](ou?|[ae](ng?|i)?|u))/,
+        :dt_regex => /([dD](e(i|ng?)|i(a[on]?|u))|[dtDT](a(i|ng?|o)?|e(i|ng)?|i(a[on]?|e|ng|u)?|o(ng?|u)|u(o|i|an?|n)?))/,
+        :gkh_regex => /([ghkGHK](a(i|ng?|o)?|e(i|ng?)?|o(u|ng)|u(a(i|ng?)?|i|n|o)?))/,
+        :zczhch_regex => /([zZ]h?ei|[czCZ]h?(e(ng?)?|o(ng?|u)?|ao|u?a(i|ng?)?|u?(o|i|n)?))/,
+        :ssh_regex => /([sS]ong|[sS]hua(i|ng?)?|[sS]hei|[sS][h]?(a(i|ng?|o)?|en?g?|ou|u(a?n|o|i)?|i))/,
+        :r_regex => /([rR]([ae]ng?|i|e|ao|ou|ong|u[oin]|ua?n?))/,
+        :jqx_regex => /([jqxJQX](i(a(o|ng?)?|[eu]|ong|ng?)?|u(e|a?n)?))/,
+        :aeo_regex => /(([aA](i|o|ng?)?|[oO]u?|[eE](i|ng?|r)?))/,
+        :w_regex => /([wW](a(i|ng?)?|o|e(i|ng?)?|u))/,
+        :y_regex => /[yY](a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
+      }
+    end
+    def py_tones
+      py_tones = {
+        'a' => '[āáǎàa]',
+        'e' => '[ēéěèe]',
+        'i' => '[īíǐìi]',
+        'o' => '[ōóǒòo]',
+        'u' => '[ūúǔùu]',
+        'v' => '[ǖǘǚǜü]'
+        #([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
+      }
+    end
+  end
+end
+require File.expand_path("../regex/ruby18", __FILE__) if RUBY_VERSION < '1.9'

data/lib/zhongwen_tools/romanization/conversion_table.rb CHANGED Viewed

@@ -1,8 +1,8 @@
-#encoding: utf-8
+# encoding: utf-8
 module ZhongwenTools
   module Romanization
-ROMANANIZATIONS_TABLE = [{:zyfh =>  " ㄚ", :wg =>  "a", :mps2 =>  "a", :yale =>  "a", :typy =>  "a", :py =>  "a"},
+    # TODO: remove excess values, i.e. keys whose value == :py
+ROMANIZATIONS_TABLE = [{:zyfh =>  " ㄚ", :wg =>  "a", :mps2 =>  "a", :yale =>  "a", :typy =>  "a", :py =>  "a"},
 { :zyfh =>  "ㄞ", :wg =>  "ai", :mps2 =>  "ai", :yale =>  "ai", :typy =>  "ai", :py =>  "ai"},
 { :zyfh =>  "ㄢ", :wg =>  "an", :mps2 =>  "an", :yale =>  "an", :typy =>  "an", :py =>  "an"},
 { :zyfh =>  "ㄤ", :wg =>  "ang", :mps2 =>  "ang", :yale =>  "ang", :typy =>  "ang", :py =>  "ang"},

data/lib/zhongwen_tools/romanization/detect.rb CHANGED Viewed

@@ -1,49 +1,13 @@
 # encoding: utf-8
+require File.expand_path("../../regex", __FILE__)
 module ZhongwenTools
   module Romanization
+    # Deprecated: a Regex for accurate pinyin. Use ZhongwenTools::Regex.py instead
+    PY_REGEX = ZhongwenTools::Regex.py
-    #TODO: these regexes don't deal with capital letters. Capitals will make it much more complicated.
-    pyn_regexes = {
-      :bpm_regex => /(miu|[pm]ou|[bpm](o|e(i|ng?)?|a(ng?|i|o)?|i(e|ng?|a[no])?|u))/,
-      :f_regex => /(f(ou?|[ae](ng?|i)?|u))/,
-      :dt_regex => /(d(e(i|ng?)|i(a[on]?|u))|[dt](a(i|ng?|o)?|e(i|ng)?|i(a[on]?|e|ng|u)?|o(ng?|u)|u(o|i|an?|n)?))/,
-      :nl_regex => /(neng?|[ln](a(i|ng?|o)?|e(i|ng)?|i(ang|a[on]?|e|ng?|u)?|o(ng?|u)|u(o|i|an?|n)?|ve?))/,
-      :gkh_regex => /([ghk](a(i|ng?|o)?|e(i|ng?)?|o(u|ng)|u(a(i|ng?)?|i|n|o)?))/,
-      :zczhch_regex => /(z[h]?ei|[cz]hua(i|ng?)?|[cz][h]?(a(i|ng?|o)?|en?g?|o(u|ng)?|u(a?n|o|i)?))/,
-      :ssh_regex => /(song|shua(i|ng?)?|shei|s[h]?(a(i|ng?|o)?|en?g?|ou|u(a?n|o|i)?))/,
-      :r_regex => /(r([ae]ng?|i|e|ao|ou|ong|u[oin]|ua?n?))/,
-      :jqx_regex => /([jqx](i(a(o|ng?)?|[eu]|ong|ng?)?|u(e|a?n)?))/,
-      :aw_regex => /(wu|w?(a(i|o|ng?)?|ou?|e(i|ng?)?))/,
-      :y_regex => /y(a(o|ng?)?|e|in?g?|o(u|ng)?|u(e|a?n)?)/
-    }
-    if RUBY_VERSION < '1.9'
-      py_tones = {
-        'a' => '(ā|á|ǎ|à|a)',
-        'e' => '(ē|é|ě|è|e)',
-        'i' => '(ī|í|ǐ|ì|i)',
-        'o' => '(ō|ó|ǒ|ò|o)',
-        'u' => '(ū|ú|ǔ|ù|u)',
-        'v' => '(ǖ|ǘ|ǚ|ǜ|ü)'
-      }
-      # might not need the space on the end.
-      PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
-    else
-      py_tones = {
-        'a' => '[āáǎàa]',
-        'e' => '[ēéěèe]',
-        'i' => '[īíǐìi]',
-        'o' => '[ōóǒòo]',
-        'u' => '[ūúǔùu]',
-        'v' => '[ǖǘǚǜü]'
-        #([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
-      }
-      PY_REGEX = /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub(/[aeiouv]/,py_tones)}.join('|')}(\s\-))/
-    end
-    PINYIN_REGEX = /(#{pyn_regexes.values.join('|')})([1-5])?([\s\-]+)?/
+    # Deprecate: a Regex for accurate pinyin with numbers. use ZhongwenTools::Regex.pyn instead.
+    PINYIN_REGEX = ZhongwenTools::Regex.pyn
     # Public: checks if a string is pinyin.
     #
@@ -55,7 +19,7 @@ module ZhongwenTools
     def py?(str = nil)
       str ||= self
-      str.gsub(PY_REGEX, '').strip == ''
+      str.gsub(ZhongwenTools::Regex.py, '').strip == ''
     end
     # Public: checks if a string is pinyin.
@@ -68,24 +32,72 @@ module ZhongwenTools
     def pyn?(str = nil)
       str ||= self
-      str.gsub(PINYIN_REGEX,'').strip == ''
+      str.gsub(ZhongwenTools::Regex.pyn, '').strip == ''
     end
-    # Public: checks if a string is wade-giles.
+    # Public: Checks if a string is wade-giles.
     #
     # Examples
     #   wg?('pin1-yin1')
     #   # => false
-    # There are some situations where wg == pyn, but there's no way to differentiate the two.
+    #
+    # Returns a Boolean.
     def wg?(str = nil, type = :pyn)
-      #it shouldn't be pyn, but it should be able to conver to pyn
+      # NOTE: There are some situations where wg == pyn, but there's no way to differentiate the two.
+      # FIXME: it shouldn't be pyn, but it should be able to conver to pyn
+      #        Actually, wade-giles does sometimes overlap with pyn. So this
+      #        method creates false negatives. A future :romanization method
+      #        would default to pyn, but this method shouldn't.
+      #        Add tests where str.pyn? and str.wg?
       str ||= self
-      #easy ones.. is it py? pyn? zyfh? gyrm?
-      #harder ones: is it typy, msp2, yale, wg
       wg = ZhongwenTools::Romanization.to_wade_giles(str, type)
       # TODO: need to convert string to pyn.
       pyn = str
       wg != pyn && wg.gsub(/[1-5]/,'')
     end
+    # Public: Checks if a String is Zhuyin Fuhao (a.k.a. bopomofo).
+    #
+    # str - a String. Optional if the object calling the method is a String.
+    #
+    # Examples
+    #
+    #   zyfh?('ㄊㄥ')
+    #   # => true
+    #
+    # Returns a boolean.
+    def zyfh?(str = nil)
+      str ||= self
+      bopomofo = str.gsub(/[1-5\s]/,'')
+      bopomofo.scan(ZhongwenTools::Regex.bopomofo).join == bopomofo
+    end
+    # Public: Checks if a String is Tongyong Pinyin.
+    #         http://en.wikipedia.org/wiki/Tongyong_Pinyin
+    #         http://pinyin.info/romanization/tongyong/
+    #
+    # str - a String. Optional if the object calling the method is a String.
+    #
+    # Examples
+    #
+    #   typy?('chuei niou')
+    #   # => true
+    #
+    # Returns a boolean.
+    def typy?(str = nil)
+      str ||= self
+      typy = str.gsub(/[1-5\s\-']/,'')
+      # Sorting by String length means it will match the longest possible part.
+      # FIXME: it is probably possible for this to have false negatives.
+      #        A more comprehensive regex like Regex.pyn would be needed
+      #        to accurately detect typy.
+      regex_str = ROMANIZATIONS_TABLE.map{ |r| r[:typy] || r[:py] }.sort{|x,y| x.size <=> y.size}.reverse.join('|')
+      typy.scan(/#{regex_str}/).join == typy
+    end
+    # TODO: msp2? yale? wgyrm? romanization?
   end
 end

data/lib/zhongwen_tools/romanization/pyn_to_py.rb CHANGED Viewed

@@ -1,7 +1,8 @@
-#encoding: utf-8
-#This table works for pyn -> pinyin conversion, but it introduces
-#mistakes when converting pinyin to pyn. In practice, pinyin can't be
-#converted to pyn properly unless it's properly formatted.
+# encoding: utf-8
+# NOTE: This table works for pyn -> pinyin conversion, but it introduces
+#       mistakes when converting pinyin to pyn. In practice, pinyin can't
+#       be converted to pyn properly unless it's properly formatted.
 module ZhongwenTools
   module Romanization
     PYN_PY = {

data/lib/zhongwen_tools/romanization/string.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# encoding: utf-8
+module ZhongwenTools
+  module Romanization
+    # Public: splits pinyin number strings.
+    #
+    # str - a String to be split
+    #
+    # Examples
+    #
+    #
+    #   split_pyn('zhong1guo2')
+    #   # => ['zhong1', 'guo2']
+    #
+    # Returns an Array of Strings.
+    def split_pyn(str = nil)
+      str ||= self
+      puts "WARNING: string is not valid pinyin-num format. #{str}" unless str.pyn?
+      str.scan(/(#{ZhongwenTools::Regex.pyn})/).map{ |arr| arr[0].strip.gsub('-','') }.flatten
+    end
+  end
+end

data/lib/zhongwen_tools/romanization.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 # encoding: utf-8
 require File.expand_path("../romanization/conversion_table", __FILE__)
 require File.expand_path("../romanization/detect", __FILE__)
+require File.expand_path("../romanization/string", __FILE__)
 require File.expand_path("../romanization/pyn_to_py", __FILE__)
 module ZhongwenTools
@@ -66,6 +67,7 @@ module ZhongwenTools
     #
     #  Returns a string with actual pinyin
     def _to_pinyin str
+      # TODO: move regex to ZhongwenTools::Regex
       regex = /(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
       # doing the substitution in a block is ~8x faster than using scan and each.
@@ -107,7 +109,7 @@ module ZhongwenTools
     def _replacement(token, from = nil)
       token = token.downcase.gsub(/[1-5].*/,'')
-      ROMANANIZATIONS_TABLE.find do |x|
+      ROMANIZATIONS_TABLE.find do |x|
         if from.nil?
           x.values.include?(token)
         else

data/lib/zhongwen_tools/string/caps.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-#encoding: utf-8
+# encoding: utf-8
 module ZhongwenTools
   UNICODE_CAPS = {
@@ -18,10 +18,10 @@ module ZhongwenTools
     'Ó' => 'ó',
     'Ǒ' => 'ǒ',
     'Ò' => 'ò',
-    'Ǖ' => 'ǖ',# using combining diatrical marks
-    'Ǘ' => 'ǘ',# using combining diatrical marks
-    'Ǚ' => 'ǚ',# using combining diatrical marks
-    'Ǜ' => 'ǜ',# using combining diatrical marks
+    'Ǖ' => 'ǖ', # using combining diatrical marks
+    'Ǘ' => 'ǘ', # using combining diatrical marks
+    'Ǚ' => 'ǚ', # using combining diatrical marks
+    'Ǜ' => 'ǜ', # using combining diatrical marks
     'Ū' => 'ū',
     'Ú' => 'ú',
     'Ǔ' => 'ǔ',

data/lib/zhongwen_tools/string/ruby18.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-#encoding: utf-8
+# encoding: utf-8
 class String
   define_method(:chars) do
@@ -22,74 +22,75 @@ end
 module ZhongwenTools
   module String
-    def to_utf8(encoding = nil, encodings = nil)
-      #should substitute out known bad actors like space
-      encodings = ['utf-8', 'GB18030', 'BIG5', 'GBK', 'GB2312'] if encodings.nil?
-      encodings = encoding + encodings unless encoding.nil?
-      raise 'Unable to Convert' if encodings.size == 0
-      begin
-        text = Iconv.conv('utf-8', encodings[0], self)
-      rescue
-        text = self.to_utf8(nil, encodings[1..-1])
-      end
-      text
-    end
-    def convert_regex(regex)
-      str = regex.to_s
-      regex.to_s.scan(/u[0-9A-Z]{4}/).each{|cp| str = str.sub('\\' + cp,cp.from_codepoint)}
-      /#{str}/
+    # TODO: replace deprecated constant UNICODE_REGEX.
+  end
+  def to_utf8(encoding = nil, encodings = nil)
+    # FIXME: should substitute out known bad actors like space
+    encodings = ['utf-8', 'GB18030', 'BIG5', 'GBK', 'GB2312'] if encodings.nil?
+    encodings = encoding + encodings unless encoding.nil?
+    raise 'Unable to Convert' if encodings.size == 0
+    begin
+      text = Iconv.conv('utf-8', encodings[0], self)
+    rescue
+      text = self.to_utf8(nil, encodings[1..-1])
     end
+    text
+  end
-    def has_zh?(str = nil)
-      str ||= self
+  def convert_regex(regex)
+    str = regex.to_s
+    regex.to_s.scan(/u[0-9A-Z]{4}/).each{|cp| str = str.sub('\\' + cp,cp.from_codepoint)}
+    /#{str}/
+  end
-      regex = {
-        :zh => self.convert_regex(UNICODE_REGEX[:zh]),
-        :punc => self.convert_regex(UNICODE_REGEX[:punc])
-      }
-      #str.scan(/#{regex[:zh]}|#{regex[:punc]}|\s/).join == str
-      !self.fullwidth?(str) && (!str[regex[:zh]].nil? || !str[regex[:punc]].nil?)
-    end
+  def has_zh?(str = nil)
+    str ||= self
-    def zh?(str = nil)
-      str ||= self
+    regex = {
+      :zh => self.convert_regex(UNICODE_REGEX[:zh]),
+      :punc => self.convert_regex(UNICODE_REGEX[:punc])
+    }
+    # str.scan(/#{regex[:zh]}|#{regex[:punc]}|\s/).join == str
+    !self.fullwidth?(str) && (!str[regex[:zh]].nil? || !str[regex[:punc]].nil?)
+  end
-      regex = {
-        :zh => self.convert_regex(UNICODE_REGEX[:zh]),
-        :punc => self.convert_regex(UNICODE_REGEX[:punc])
-      }
+  def zh?(str = nil)
+    str ||= self
-      !str.fullwidth? && (str.scan(/(#{regex[:zh]}+|#{regex[:punc]}+|\s+)/).join == str)
-    end
+    regex = {
+      :zh => self.convert_regex(UNICODE_REGEX[:zh]),
+      :punc => self.convert_regex(UNICODE_REGEX[:punc])
+    }
-    def has_zh_punctuation?(str = nil)
-      str ||= self
-      regex = {
-        :zh => self.convert_regex(UNICODE_REGEX[:zh]),
-        :punc => self.convert_regex(UNICODE_REGEX[:punc])
-      }
+    !str.fullwidth? && (str.scan(/(#{regex[:zh]}+|#{regex[:punc]}+|\s+)/).join == str)
+  end
-      !str[regex[:punc]].nil?
-    end
+  def has_zh_punctuation?(str = nil)
+    str ||= self
+    regex = {
+      :zh => self.convert_regex(UNICODE_REGEX[:zh]),
+      :punc => self.convert_regex(UNICODE_REGEX[:punc])
+    }
-    def strip_zh_punctuation(str = nil)
-      str ||= self
+    !str[regex[:punc]].nil?
+  end
-      str.gsub(self.convert_regex(UNICODE_REGEX[:punc]), '')
-    end
+  def strip_zh_punctuation(str = nil)
+    str ||= self
-    def to_halfwidth(str = nil)
-      str ||= self
-      matches = str.scan(/([０-９Ａ-Ｚａ-ｚ％．：＃＄＆＋－／＼＝；＜＞])/u).uniq.flatten
+    str.gsub(self.convert_regex(UNICODE_REGEX[:punc]), '')
+  end
-      matches.each do |match|
-        replacement = FW_HW[match]
-        str = str.gsub(match, replacement) #unless str.nil?
-      end
+  def to_halfwidth(str = nil)
+    str ||= self
+    matches = str.scan(/([０-９Ａ-Ｚａ-ｚ％．：＃＄＆＋－／＼＝；＜＞])/u).uniq.flatten
-      str
+    matches.each do |match|
+      replacement = FW_HW[match]
+      str = str.gsub(match, replacement)
     end
+    str
   end
 end

data/lib/zhongwen_tools/string/ruby19.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-#encoding: utf-8
+# encoding: utf-8
 class String
   define_method(:chars) do
     self.scan(/./mu).to_a

data/lib/zhongwen_tools/string.rb CHANGED Viewed

@@ -1,12 +1,14 @@
 # encoding: utf-8
 #$:.unshift File.join(File.dirname(__FILE__),'..','lib','zhongwen_tools', 'string')
 require 'uri'
+require File.expand_path("../regex", __FILE__)
 require File.expand_path("../string/fullwidth", __FILE__)
 require File.expand_path("../string/caps", __FILE__)
 class String
   alias_method :_downcase, :downcase
   alias_method :_upcase, :upcase
+  alias_method :gsub_with_hash, :gsub
   def downcase
     self._downcase.gsub(/(#{ZhongwenTools::UNICODE_CAPS.keys.join('|')})/){
@@ -35,10 +37,12 @@ module ZhongwenTools
   module String
     extend self
+    # Deprecated: a Hash of unicode Regexes. Use ZhongwenTools::Regex.zh instead
     UNICODE_REGEX = {
-      :zh => /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/,
-      :punc => /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]|[\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F]|[\u066A-\u066D]|[\u06D4]|[\u0700-\u070D]|[\u07F7-\u07F9]|[\u0830-\u083E]|[\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B]|[\u0F04-\u0F12]|[\u0F14]|[\u0F3A-\u0F3D]|[\u0F85]|[\u0FD0-\u0FD4]|[\u0FD9\u0FDA]|[\u104A-\u104F]|[\u10FB]|[\u1360-\u1368]|[\u1400\u166D\u166E\u169B\u169C]|[\u16EB-\u16ED]|[\u1735\u1736]|[\u17D4-\u17D6]|[\u17D8-\u17DA]|[\u1800-\u180A\u1944\u1945\u1A1E\u1A1F]|[\u1AA0-\u1AA6]|[\u1AA8-\u1AAD]|[\u1B5A-\u1B60]|[\u1BFC-\u1BFF]|[\u1C3B-\u1C3F]|[\u1C7E\u1C7F]|[\u1CC0-\u1CC7]|[\u1CD3]|[\u2010-\u2027]|[\u2030-\u2043]|[\u2045-\u2051]|[\u2053-\u205E]|[\u207D\u207E\u208D\u208E\u2329\u232A]|[\u2768-\u2775\u27C5\u27C6]|[\u27E6-\u27EF]|[\u2983-\u2998]|[\u29D8-\u29DB\u29FC\u29FD]|[\u2CF9-\u2CFC]|[\u2CFE\u2CFF\u2D70]|[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/
+      :zh => Regex.zh,
+      :punc => Regex.zh_punc
     }
     def to_utf8(str = nil)
       (str || self).force_encoding('utf-8')
       #TODO: better conversion methods can be extracted from categories service
@@ -47,13 +51,13 @@ module ZhongwenTools
     def has_zh?(str = nil)
       str ||= self
-      !str[/(#{UNICODE_REGEX[:zh]}|#{UNICODE_REGEX[:punc]})/].nil?
+      !str[/(#{Regex.zh}|#{Regex.zh_punc})/].nil?
     end
     def zh?(str = nil)
       str ||= self
-      str.scan(/(#{UNICODE_REGEX[:zh]}+|#{UNICODE_REGEX[:punc]}+|\s+)/).join == str
+      str.scan(/(#{Regex.zh}+|#{Regex.zh_punc}+|\s+)/).join == str
     end
     def downcase(str = nil)
@@ -77,13 +81,13 @@ module ZhongwenTools
     def has_zh_punctuation?(str = nil)
       str ||= self
-      !str[UNICODE_REGEX[:punc]].nil?
+      !str[Regex.zh_punc].nil?
     end
     def strip_zh_punctuation(str = nil)
       str ||= self
-      str.gsub(UNICODE_REGEX[:punc], '')
+      str.gsub(Regex.zh_punc, '')
     end
     def size(str = nil)
@@ -122,7 +126,7 @@ module ZhongwenTools
     def halfwidth?(str = nil)
       str ||= self
-      str[/[０-９Ａ-Ｚａ-ｚ％．：＃＄＆＋－／＼＝；＜＞]/].nil?
+      str[Regex.fullwidth].nil?
     end
     def fullwidth?(str = nil)
@@ -133,7 +137,7 @@ module ZhongwenTools
     def to_halfwidth(str = nil)
       str ||= self
-      str.gsub(/([０-９Ａ-Ｚａ-ｚ％．：＃＄＆＋－／＼＝；＜＞])/){  ZhongwenTools::FW_HW[$1] }
+      str.gsub(/(#{Regex.fullwidth})/){  ZhongwenTools::FW_HW[$1] }
     end
     def to_codepoint(str = nil)

data/lib/zhongwen_tools/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module ZhongwenTools
-  VERSION = "0.7.2"
+  VERSION = "0.9.0"
 end

data/test/test_numbers.rb CHANGED Viewed

@@ -41,14 +41,26 @@ class TestNumbers < Minitest::Test
     pyn = self.number_to_pyn num
     assert_equal 'yi1-bai2-san1-shi2-liu4', pyn
+    num = '一千五百四十二'
+    pyn = self.number_to_pyn num
+    assert_equal 'yi1-qian2-wu3-bai2-si4-shi2-er4', pyn
+  end
+  def test_is_number
+    @numbers.map{ |n| n[:zh]}.each do |zh|
+     assert self.number? zh
+    end
+    assert self.number? '一'
   end
   def setup
     @numbers = [
-      {:zh =>'一万两千七', :en => 12007},
-      {:zh => '三千六十三', :en => 3063},
+      {:zh =>'一万两千七', :en => 12_007},
+      {:zh => '三千六十三', :en => 3_063},
       {:zh => '一百五十', :en => 150 },
-      {:zh => '三千亿', :en => 300000000000},
+      {:zh => '三千亿', :en => 300_000_000_000},
       {:zh => '一九六六', :en => 1966},
       {:zh => '二零零八', :en => 2008},
     ]

data/test/test_romanization.rb CHANGED Viewed

@@ -24,6 +24,9 @@ class TestRomanization < Minitest::Test
   def test_pyn
     assert_equal 'ni3 hao3', @py.to_pyn(:py)
     assert_equal 'tian1an1men2', 'tian1an1men2'.to_py.to_pyn(:py)
+    #assert_equal 'Wūlúhānuòfū'.to_pyn, 'Wu1-lu2-ha1-nuo4-fu1'
+    #"007：Dàpò Liàngzǐ Wēijī", "007: Da4po4 Liang4zi3 Wei1ji1"
   end
   def test_zhuyin_fuhao
@@ -33,6 +36,7 @@ class TestRomanization < Minitest::Test
      assert_equal 'ㄇㄠ2 ㄗㄜ2 ㄉㄨㄥ1', @mzd.to_zhuyin_fuhao
      assert_equal 'ㄑㄧㄥ3 ㄏㄨㄟ2ㄉㄚ2 ㄨㄛ3 ㄉㄜ5 ㄨㄣ4ㄊㄧ2 .', @sent.to_zhuyin
      assert_equal 'ㄇㄠ2 ㄗㄜ2ㄉㄨㄥ1', @mzd2.to_zhuyin_fuhao
+     assert 'ㄋㄧ3 ㄏㄠ3'.zyfh?
   end
   def test_wade_giles
@@ -48,11 +52,16 @@ class TestRomanization < Minitest::Test
     #assert_equal '', @str.to_mspy2
   #end
-  #def test_typy
+  def test_typy
     #skip
-    #assert_equal '', @str.to_typy
-    #assert_equal '', @str.to_tongyong
-  #end
+    pyn = 'chui1 niu3'
+    typy = 'chuei1 niou3'
+    assert_equal typy, pyn.to_typy
+    # FIXME: to_typy doesn't work with non-spaced pinyin.
+    #assert_equal typy, typy.to_pyn(:typy)
+    assert typy.typy?
+    refute pyn.typy?
+  end
   def test_yale
     assert_equal 'ni3 hau3', @str.to_yale
@@ -68,7 +77,18 @@ class TestRomanization < Minitest::Test
     refute @py.pyn?
     assert 'chung1 kuo2'.wg?
-    assert @py.py?
+    # Travis CI is having trouble with this using Ruby 1.8.7, but it works locally.
+    # I'll probably end up dropping full 1.8.7 support.
+    assert @py.py?, "#{@py} should be pinyin. (#{@py.py?})" unless RUBY_VERSION < '1.9'
+  end
+  def test_split_pyn
+    assert_equal 'zhong1guo2'.split_pyn, %w(zhong1 guo2)
+    assert_equal 'dong1xi'.split_pyn, %w(dong1 xi)
+    assert_equal 'zhongguo'.split_pyn, %w(zhong guo)
+    assert_equal 'dong1 xi1 '.split_pyn, %w(dong1 xi1)
+    assert_equal @mzd2.split_pyn, %w(Mao2 Ze2 dong1)
   end
   def setup

data/test/test_string.rb CHANGED Viewed

@@ -75,6 +75,7 @@ class TestString < Minitest::Test
     assert @str.has_zh?
     refute @hw.has_zh?
     refute @fw.has_zh?
+    refute 'zhong1-guo'.has_zh?
     assert ZhongwenTools::String.has_zh? @str
     refute ZhongwenTools::String.has_zh? @hw

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: zhongwen_tools
 version: !ruby/object:Gem::Version
-  version: 0.7.2
+  version: 0.9.0
 platform: ruby
 authors:
 - Steven Daniels
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-05-04 00:00:00.000000000 Z
+date: 2014-05-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -137,10 +137,13 @@ files:
 - lib/zhongwen_tools/conversion/string.rb
 - lib/zhongwen_tools/integer.rb
 - lib/zhongwen_tools/numbers.rb
+- lib/zhongwen_tools/regex.rb
+- lib/zhongwen_tools/regex/ruby18.rb
 - lib/zhongwen_tools/romanization.rb
 - lib/zhongwen_tools/romanization/conversion_table.rb
 - lib/zhongwen_tools/romanization/detect.rb
 - lib/zhongwen_tools/romanization/pyn_to_py.rb
+- lib/zhongwen_tools/romanization/string.rb
 - lib/zhongwen_tools/string.rb
 - lib/zhongwen_tools/string/caps.rb
 - lib/zhongwen_tools/string/fullwidth.rb
@@ -174,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project: zhongwen_tools
-rubygems_version: 2.2.0
+rubygems_version: 2.2.2
 signing_key:
 specification_version: 4
 summary: Zhongwen Tools provide romanization conversions and helper methods for Chinese.