RubyGems - zhongwen_tools - Versions diffs - 0.12.4 → 0.15.1 - Mend

zhongwen_tools 0.12.4 → 0.15.1

Files changed (57) hide show

checksums.yaml +4 -4
data/Gemfile +1 -1
data/README.md +74 -165
data/Rakefile +0 -1
data/lib/zhongwen_tools/{string/caps.rb → caps.rb} +19 -1
data/lib/zhongwen_tools/core.rb +19 -0
data/lib/zhongwen_tools/core_ext/integer.rb +8 -0
data/lib/zhongwen_tools/core_ext/string.rb +10 -0
data/lib/zhongwen_tools/fullwidth.rb +102 -0
data/lib/zhongwen_tools/integer_extension.rb +31 -0
data/lib/zhongwen_tools/number/number_table.rb +44 -0
data/lib/zhongwen_tools/number.rb +221 -0
data/lib/zhongwen_tools/regex.rb +38 -22
data/lib/zhongwen_tools/romanization/pinyin.rb +231 -0
data/lib/zhongwen_tools/romanization/{pyn_to_py.rb → pinyin_table.rb} +2 -1
data/lib/zhongwen_tools/romanization/romanization_table.rb +425 -0
data/lib/zhongwen_tools/romanization.rb +199 -136
data/lib/zhongwen_tools/{string/ruby19.rb → ruby_19.rb} +1 -2
data/lib/zhongwen_tools/{conversion → script}/conversion_data +0 -0
data/lib/zhongwen_tools/{conversion.rb → script.rb} +21 -34
data/lib/zhongwen_tools/string_extension.rb +136 -0
data/lib/zhongwen_tools/unicode.rb +25 -0
data/lib/zhongwen_tools/uri.rb +14 -0
data/lib/zhongwen_tools/version.rb +1 -1
data/lib/zhongwen_tools/zhongwen.rb +29 -0
data/lib/zhongwen_tools.rb +2 -3
data/test/test_caps.rb +26 -0
data/test/test_core.rb +13 -0
data/test/test_fullwidth.rb +30 -0
data/test/test_helper.rb +4 -12
data/test/test_helpers/unload_zhongwen_tools_script.rb +5 -0
data/test/test_integer_extension.rb +34 -0
data/test/test_number.rb +79 -0
data/test/test_pinyin.rb +68 -0
data/test/test_regex.rb +41 -0
data/test/test_romanization.rb +110 -133
data/test/{test_conversion.rb → test_script.rb} +41 -44
data/test/test_string_extension.rb +94 -0
data/test/test_unicode.rb +27 -0
data/test/test_uri.rb +16 -0
data/test/test_zhongwen.rb +37 -0
data/zhongwen_tools.gemspec +1 -1
metadata +93 -52
data/Gemfile.1.8.7 +0 -8
data/lib/zhongwen_tools/conversion/string.rb +0 -19
data/lib/zhongwen_tools/integer.rb +0 -28
data/lib/zhongwen_tools/numbers.rb +0 -195
data/lib/zhongwen_tools/regex/ruby18.rb +0 -15
data/lib/zhongwen_tools/romanization/conversion_table.rb +0 -425
data/lib/zhongwen_tools/romanization/detect.rb +0 -141
data/lib/zhongwen_tools/romanization/string.rb +0 -36
data/lib/zhongwen_tools/string/fullwidth.rb +0 -85
data/lib/zhongwen_tools/string/ruby18.rb +0 -96
data/lib/zhongwen_tools/string.rb +0 -164
data/test/test_integer.rb +0 -31
data/test/test_numbers.rb +0 -68
data/test/test_string.rb +0 -133

data/lib/zhongwen_tools/romanization.rb CHANGED Viewed

@@ -1,111 +1,120 @@
 # encoding: utf-8
-require 'zhongwen_tools/string'
-require 'zhongwen_tools/romanization/conversion_table'
-require 'zhongwen_tools/romanization/string'
-require 'zhongwen_tools/romanization/pyn_to_py'
-# TODO: follow tone conventions for different systems.
-#       IPA	mä˥˥	mä˧˥	mä˨˩˦	mä˥˩	mä
-#       Pinyin	mā	má	mǎ	mà	ma
-#       Tongyong Pinyin	ma	má	mǎ	mà	må # this will be difficult.
-#       Wade–Giles	ma¹	ma²	ma³	ma⁴	ma⁰
-#       Zhuyin	ㄇㄚ	ㄇㄚˊ	ㄇㄚˇ	ㄇㄚˋ	•ㄇㄚ
+require 'zhongwen_tools/romanization/pinyin'
+require 'zhongwen_tools/romanization/pinyin_table'
+require 'zhongwen_tools/romanization/romanization_table'
+# NOTE: Creates several dynamic Modules and their associated methods.
+#       e.g. ZhongwenTools::Romanization::ZhuyinFuhao.to_bpmf
+#            ZhongwenTools::Romanization::WadeGiles.to_wg
 module ZhongwenTools
   module Romanization
-    extend self
+    def self.convert(str, to, from)
+      # NOTE: don't convert if it already is converted.
+      return str if to == from
-    %w(pinyin py pyn bopomofo bpmf zhuyin zyfh zhyfh zhuyin_fuhao yale wade_giles wg typy tongyong mps2).each do |type|
-      define_method("to_#{type}") do |*args|
-        str, from = _romanization_options(args)
-        _convert_romanization str, _set_type(type.to_sym), _set_type(from)
+      if to == :py
+        convert_to_py(str, from)
+      elsif to == :pyn
+        convert_to_pyn(str, from)
+      else
+        convert_to_other(str, from, to)
       end
     end
-    private
-    # Private: Provides romanization options for romanization methods. If no :from argument is given, then
-    #          the method will try to guess the romanization. This can sometimes provide sub-optimal
-    #          romanization suggestions. See lib/zhongwen_tools/romanization/detect.rb#romanization? for details.
+    # Public: Checks the romanization type for the string.
+    #         Romanization types are like ducks. If it walks, talks, and acts
+    #         like a duck, it is a duck. Therefore, where a String is both
+    #         pinyin and another romanization system, it will be identified
+    #         as pinyin. If you need to determine whether a py/pyn string
+    #         belongs to another romanization system p a romanization
+    #         system, use the romanization modules specific function.
     #
-    # args - an Array of arguments. If the Object is a String, then the first argument should be the :from option.
-    #        Otherwise, the first argument is a String and the second argument is the :from option.
-    #
-    # Examples:
+    # str - a String to test.
     #
+    # Examples
+    #    romanization?('hao3') #=> :pyn
+    #    romanization?('zzzz')   #=> nil
     #
-    #   _romanization_options('hao3', :pyn) #=> 'hao3' :pyn
-    #   _romanization_options('hao3') #=> 'hao3', :pyn
     #
-    # Returns an Array. The first item is a String to be converted. The second item is a Symbol for the :from option.
-    def _romanization_options(args)
-      if self.class.to_s != 'String'
-        str = args[0]
-        from = args[1] || str.romanization? || :pyn
-      else
-        str = self
-        from = args[0] || str.romanization? || :pyn
+    # Returns a String for the romanization system or Nil if the string is not
+    # a romanization.
+    def self.romanization?(str)
+      if ZhongwenTools::Romanization::Pinyin.py?(str)
+        :py
+      elsif ZhongwenTools::Romanization::Pinyin.pyn?(str)
+        :pyn
+      elsif ZhongwenTools::Romanization::ZhuyinFuhao.bpmf?(str)
+        :bpmf
+      elsif ZhongwenTools::Romanization::WadeGiles.wg?(str)
+        :wg
+      elsif ZhongwenTools::Romanization::TongyongPinyin.typy?(str)
+        :typy
+      elsif ZhongwenTools::Romanization::Yale.yale?(str)
+        :yale
+      elsif ZhongwenTools::Romanization::MPS2.mps2?(str)
+        :mps2
       end
-      [str, from.to_sym]
     end
-    #  Private: Replaces numbered pinyin with actual pinyin. Pinyin separated with hyphens are combined as one word.
-    #
-    #  str - A String to replace with actual pinyin
-    #
-    #  Examples
-    #    _to_pinyin 'Ni3 hao3 ma5?'
-    #    # => "Nǐ hǎo ma?"
-    #    # => 'Zhong1-guo2-ren2'
-    #
-    #
-    #  Returns a string with actual pinyin
-    def _to_pinyin str
-      regex = Regex.pinyin_num
-      # Using gsub is ~8x faster than using scan and each.
-      # Explanation: if it's pinyin without vowels, e.g. m, ng, then convert,
-      #              otherwise, check if it needs an apostrophe (http://www.pinyin.info/romanization/hanyu/apostrophes.html).
-      #              If it does, add it and then convert. Otherwise, just convert.
-      #              Oh, and if double hyphens are used, replace them with one hyphen.
-      #              And finally, correct those apostrophes at the very end.
-      str.gsub(regex) do
-        ($3.nil? ? "#{PYN_PY[$1]}" : ($2 == '' && ['a','e','o'].include?($3[0,1]))? "'#{PYN_PY["#{$3}#{$6}"]}#{$4}#{$5}" : "#{$2}#{PYN_PY["#{$3}#{$6}"]}#{$4}#{$5}") + (($7.to_s.length > 1) ? '-' : '')
-      end.gsub("-'","-").sub(/^'/,'')
-    end
+    def split(str, type = nil)
+      type ||= romanization?(str)
-    def _to_romanization str, to, from
-      # NOTE: extract/refactor tokens cause tests to fail.
-      begin
-        tokens = str.send("split_#{from}").uniq
-      rescue
-        tokens = str.split(/[ \-]/).uniq
+      if type == :py
+      elsif type == :pyn
       end
-      tokens.collect do |t|
-        search, replace = _token_search_replace(t, str, to, from)
+    end
+    private
+    def self.convert_romanization(str, from, to)
+        # NOTE: extract/refactor tokens cause tests to fail.
+        if from == :pyn
+          tokens = ZhongwenTools::Romanization::Pinyin.split_pyn(str).uniq
+        else
+          tokens = romanization_module(from).send(:split, str).uniq
+        end
+     tokens.collect do |t|
+        search, replace = find_token_replacement(t, str, to, from)
         str =  str.gsub(search, replace)
       end
       str
     end
-    def _token_search_replace(token, str, to, from)
+    def self.convert_to_other(str, from, to)
+      if from == :py
+        str =  ZhongwenTools::Romanization::Pinyin.convert_pinyin_to_pyn(str)
+        from = :pyn
+      end
+      str = convert_romanization(str, from, to)
+      if to == :bpmf
+        str.gsub('-', '')
+      else
+        str
+      end
+    end
+    def self.find_token_replacement(token, str, to, from)
       search = token.gsub(/[1-5].*/,'')
-      replace = _replacement(token, from).fetch(to){ search }
-      replace = _fix_capitalization(str, token, replace)
+      replace = token_replacement(token, from).fetch(to){ search }
+      replace = fix_capitalization(str, token, replace)
       [search, replace]
     end
-    def _fix_capitalization(str, token, replace)
+    def self.fix_capitalization(str, token, replace)
       replace = replace.capitalize  if(token.downcase != token)
       replace
     end
-    def _replacement(token, from = nil)
+    def self.token_replacement(token, from = nil)
       token = token.downcase.gsub(/[1-5].*/,'')
       result = ROMANIZATIONS_TABLE.find do |x|
         if from.nil?
@@ -118,90 +127,144 @@ module ZhongwenTools
       result || {}
     end
-    def _convert_romanization str, to, from
-      return str if to == from
-      result =
-        if to == :py
-          str = _to_romanization str, :pyn, from if from != :pyn
-          _to_pinyin str
-        elsif to == :pyn
-          if from == :py
-            _convert_pinyin_to_pyn(str)
-          else
-            _to_romanization str, :pyn, from
-          end
-        else
-          if from == :py
-            str = _convert_pinyin_to_pyn(str)
-            from = :pyn
-          end
-          _to_romanization str, to, from
-        end
-      # TODO: check to see if wade giles, yale etc. can have hyphens.
-      result = result.gsub('-','') if to == :zyfh
-      result
+    # <module_name>::<romanization_type>?(str)
+    #
+    # Public: Checks if a String is a romanization:
+    #         Zhuyin Fuhao, Tongyong Pinyin, Wade Giles, MSP2 or Yale.
+    #         http://en.wikipedia.org/wiki/Tongyong_Pinyin
+    #         http://pinyin.info/romanization/tongyong/
+    #         http://en.wikipedia.org/wiki/Wade%E2%80%93Giles
+    #         http://en.wikipedia.org/wiki/Bopomofo
+    #         http://pinyin.info/romanization/bopomofo/index.html
+    #
+    # str - a String. Optional if the object calling the method is a String.
+    #
+    # Examples
+    #
+    #   typy?('chuei niou')     #=> true
+    #   wg?('Mao2 Tse2 Tung1')  #=> true
+    #   bpmf?('ㄊㄥ')           #=> true
+    #
+    # Returns a boolean.
+    def self.create_detect_method(romanization_module, name)
+      romanization_module.define_singleton_method("#{name}?") do |str|
+        regex = romanization_module == :ZhuyinFuhao ? ZhongwenTools::Regex.bopomofo : ZhongwenTools::Romanization.detect_regex(name.to_sym)
+        normalized_str = str.downcase.gsub(ZhongwenTools::Regex.punc,'').gsub(/[1-5\s\-']/,'')
+        #TODO: ignore tonal marks from other systems wade giles, tongyong etc.
+        normalized_str.scan(regex).join == normalized_str
+      end
     end
-    def _convert_pinyin_to_pyn(pinyin)
-      # TODO: should method check to make sure pinyin is accurate?
-      words =  pinyin.split(' ')
+    # <module_name>::to_<romanization_type>(str)
+    # Public: Converts to the given romanization from pyn (pinyin using numbers instead of tone marks.
+    #
+    # str = a String to be converted
+    #
+    # Examples:
+    #
+    #
+    #
+    #   ZhongwenTools::Romanization::ZhuyinFuhao.to_zyfh('Mao2 Ze2-dong1') # => 'ㄇㄠ2 ㄗㄜ2ㄉㄨㄥ1'
+    #
+    # Returns a String.
+    def self.create_convert_method(romanization_module, romanization_name, name)
+      romanization_module.define_singleton_method("to_#{ name }") do |*args|
+        str, from = args
+        from ||= ZhongwenTools::Romanization.romanization?(str)
-      pyn = words.map do |word|
-        pys = word.split(/['\-]/).flatten.map{|x| x.scan(Regex.py).map{|x| (x - [nil])[0]}}.flatten
-        _current_pyn(word, pys)
+        ZhongwenTools::Romanization.convert str, romanization_name, from.to_sym
       end
-      pyn.join(' ')
     end
-    def _current_pyn(pyn, pinyin_arr)
-      replacements = []
-      pinyin_arr.each do |pinyin|
-        replace =  pinyin_replacement(pinyin)
-        match = pinyin
-        pyn = pyn.sub(/(#{replacements.join('.*')}.*)#{match}/){ $1 + replace}
-        replacements << replace
+    # <module_name>::split(str)
+    # Public: splits the romanization's string.
+    #
+    # str - a String to be split
+    #
+    # Examples
+    #
+    #
+    #   split('zhong1guo2')
+    #   # => ['zhong1', 'guo2']
+    #
+    # Returns an Array of Strings.
+    def self.create_split_method(romanization_module, name)
+      regex = romanization_module == :ZhuyinFuhao ? /([#{ZhongwenTools::Regex.bopomofo}]*)/ : /(#{ZhongwenTools::Romanization.detect_regex(name.to_sym)}*)/
+      romanization_module.define_singleton_method("split") do |str|
+        # TODO: ignore tonal marks from other systems wade giles, tongyong etc.
+        results = str.scan(regex).map do |arr|
+          arr[0].strip.gsub('-','')
+        end
+        results.flatten - ['']
       end
+    end
-      pyn.gsub("'",'')
+    # Internal: Produces a Regexp for a romanization type.
+    #
+    # type - a Symbol for the romanization type.
+    #
+    # Examples:
+    #
+    #
+    #   detect_regex(:typy) #=> <Regexp>
+    #
+    # Returns a Regexp.
+    def self.detect_regex(type)
+      /#{romanization_values(type).sort{|x,y| x.size <=> y.size}.reverse.join('|')}/
     end
-    def pinyin_replacement(py)
-      matches = PYN_PY.values.select do |x|
-        py.include? x
+    # Internal: Selects the romanization values for a particular romanization type.
+    #
+    # type - a Symbol for the romanization type.
+    #
+    # Examples:
+    #
+    #
+    #   romanization_values(:typy) #=> ['a', ..., 'r']
+    #
+    # Returns an Array that contains the romanization's values.
+    def self.romanization_values(type)
+      results = ZhongwenTools::Romanization::ROMANIZATIONS_TABLE.map do |r|
+        "[#{r[type][0]}#{r[type][0].upcase}]#{r[type][1..-1]}" || r[:pyn]
       end
-      match = select_pinyin_match(matches)
-      replace = PYN_PY.find{|k,v| k if v == match}[0]
-      py.gsub(match, replace).gsub(/([^\d ]*)(\d)([^\d ]*)/){$1 + $3 + $2}
+      results.flatten
     end
-    def select_pinyin_match(matches)
-      # take the longest pinyin match. Use bytes because 'è' is prefered over 'n' or 'r' or 'm'
-      match = matches.sort{|x,y| x.bytes.to_a.length <=> y.bytes.to_a.length}[-1]
+    def self.romanization_module(type)
+      module_name = RomanizationTypes.find{ |k,v| v.include?(type.to_s) }.first
+      ZhongwenTools::Romanization.const_get(module_name)
+    end
-      # Edge case.. en/eng pyn -> py conversion is one way only.
-      match[/^(ē|é|ě|è|e)n?g?/].nil? ? match : match.chars[0]
+    def self.hyphenated?(str)
+      !str[/\-/].nil?
     end
+    # Internal: Creates romanization modules and their methods.
+    RomanizationTypes = {
+      ZhuyinFuhao: %w(bpmf zhuyin_fuhao zhuyinfuhao zyfh zhyfh bopomofo),
+      WadeGiles: %w(wg wade_giles),
+      Yale: ['yale'],
+      TongyongPinyin: %w(typy tongyong tongyong_pinyin),
+      MPS2: ['mps2']
+    }
-    def _set_type(type)
-      type = type.to_s.downcase.to_sym
-      return type if [:zyfh, :wg, :typy, :py, :mps2, :yale, :pyn].include? type
+    RomanizationTypes.each do |module_name, names|
+      romanization_module = self.const_set(module_name, Module.new) unless self.const_defined?(module_name)
+      romanization_module ||= self.const_get(module_name)
-      if [:zhuyinfuhao, :zhuyin, :zhuyin_fuhao, :bopomofo, :bpmf, :zhyfh].include? type
-        :zyfh
-      elsif [:wade_giles, 'wade-giles'.to_sym].include? type
-        :wg
-      elsif [:tongyong, :typy, :ty].include? type
-        :typy
-      elsif type == :pinyin
-        :py
+      romanization_name = names.first.to_sym
+      names.each do |name|
+        create_convert_method(romanization_module, romanization_name, name)
       end
+      create_detect_method(romanization_module, romanization_name)
+      create_split_method(romanization_module, romanization_name)
     end
   end
 end
-require 'zhongwen_tools/romanization/detect'

data/lib/zhongwen_tools/{string/ruby19.rb → ruby_19.rb} RENAMED Viewed

@@ -1,6 +1,5 @@
-# encoding: utf-8
 class String
-  define_method(:chars) do
+  def chars
     self.scan(/./mu).to_a
   end
 end

data/lib/zhongwen_tools/{conversion → script}/conversion_data RENAMED Viewed

File without changes

data/lib/zhongwen_tools/{conversion.rb → script.rb} RENAMED Viewed

@@ -1,39 +1,27 @@
 # encoding: utf-8
-require 'zhongwen_tools/string'
 module ZhongwenTools
-  module Conversion
-    extend self
-    def to_zhs(str = nil)
-      str ||= self
-      convert(:zhs, str)
+  module Script
+    def self.zht?(str)
+      str == convert(:zht, str) ||  str == convert(:zhhk, str)
     end
-    def to_zht(str = nil)
-      str ||= self
-      convert(:zht, str)
+    def self.zhs?(str)
+      str == convert(:zhs, str)
     end
-    def to_zhtw(str = nil)
-      str ||= self
+    def self.to_zhs(str, type)
+      type = type.to_sym
+      fail ArgumentError unless [:zhs, :zhcn].include? type
-      convert(:zhtw, str)
+      convert(type, str)
     end
-    def to_zhhk(str = nil)
-      str ||= self
+    def self.to_zht(str, type)
+      type = type.to_sym
+      fail ArgumentError unless [:zht, :zhtw, :zhhk].include? type
-      convert(:zhhk, str)
-    end
-    def to_zhcn(str = nil)
-      str ||= self
-      convert(:zhcn, str)
+      convert(type, str)
     end
     ZH_TYPES = {
@@ -42,9 +30,10 @@ module ZhongwenTools
       :zhtw => [2,0],
       :zhhk => [3,0],
       :zhcn => [4,1]
-    }
+    } unless defined?(ZH_TYPES)
+    ZH_CONVERSION_TABLE = [] unless defined?(ZH_CONVERSION_TABLE)
-    ZH_CONVERSION_TABLE = []
     private
     # Conversion data and algorithm shamelessly stolen from chinese_convt gem.
@@ -56,8 +45,8 @@ module ZhongwenTools
     #     Zhongwen Tools is  ~12X faster.
     #   + Zhongwen Tools uses Ruby's nifty str[/regex/] = replacement
     #     instead of indices. Conversion tests using indices fail with Ruby 1.8.
-    def load_table
-      filename = File.expand_path('../conversion/conversion_data', __FILE__)
+    def self.load_table
+      filename = File.expand_path('../script/conversion_data', __FILE__)
       File.open(filename).read.split("\n&\n").each do |group|
         ZH_CONVERSION_TABLE << group.split("\n").map do |type|
           Hash[ type.split(',').map{ |term| term.split(':') } ]
@@ -67,12 +56,12 @@ module ZhongwenTools
       nil
     end
-    def convert(type, str)
+    def self.convert(type, str)
       load_table if ZH_CONVERSION_TABLE.length == 0
       types = ZH_TYPES[type] || ZH_TYPES[:zht]
       begin
-        str_len = ZhongwenTools::String.size(str)
+        str_len = str.chars.to_a.size
         n = (str_len < 6)? str_len : 6
         convert_zhongwen(str.dup, str.dup, types, n)
@@ -81,7 +70,7 @@ module ZhongwenTools
       end
     end
-    def convert_zhongwen(str0, str1, types, n)
+    def self.convert_zhongwen(str0, str1, types, n)
       ZH_CONVERSION_TABLE.last(n).each do |group|
         types.each do |t|
           group[t].each do |key , value|
@@ -97,5 +86,3 @@ module ZhongwenTools
     end
   end
 end
-require 'zhongwen_tools/conversion/string'

data/lib/zhongwen_tools/string_extension.rb ADDED Viewed

@@ -0,0 +1,136 @@
+# encoding: utf-8
+module ZhongwenTools
+  module StringExtension
+    def capitalize
+      ZhongwenTools::Caps.capitalize(self)
+    end
+    def zh_downcase
+      ZhongwenTools::Caps.downcase(self)
+    end
+    def zh_upcase
+      ZhongwenTools::Caps.upcase(self)
+    end
+    def has_zh?
+      ZhongwenTools::Zhongwen.has_zh?(self)
+    end
+    def has_zh_punctuation?
+      ZhongwenTools::Zhongwen.has_zh_punctuation?(self)
+    end
+    def zh?
+      ZhongwenTools::Zhongwen.zh?(self)
+    end
+    def strip_zh_punctuation
+      ZhongwenTools::Zhongwen.strip_zh_punctuation(self)
+    end
+    def uri_encode
+      ZhongwenTools::URI.encode(self)
+    end
+    def uri_escape
+      ZhongwenTools::URI.escape(self)
+    end
+    def ascii?
+      ZhongwenTools::Unicode.ascii?(self)
+    end
+    def multibyte?
+      ZhongwenTools::Unicode.multibyte?(self)
+    end
+    def halfwidth?
+      ZhongwenTools::Fullwidth.halfwidth?(self)
+    end
+    def fullwidth?
+      ZhongwenTools::Fullwidth.fullwidth?(self)
+    end
+    def to_halfwidth
+      ZhongwenTools::Fullwidth.to_halfwidth(self)
+    end
+    def to_codepoint
+      ZhongwenTools::Unicode.to_codepoint(self)
+    end
+    def from_codepoint
+      ZhongwenTools::Unicode.from_codepoint(self)
+    end
+    def to_pinyin(from = nil)
+      ZhongwenTools::Romanization::Pinyin::to_py(self, from)
+    end
+    alias_method :to_py, :to_pinyin
+    def to_pyn(from = nil)
+      ZhongwenTools::Romanization::Pinyin::to_pyn(self, from)
+    end
+    def to_bpmf(from = nil)
+      ZhongwenTools::Romanization::ZhuyinFuhao::to_bpmf(self, from)
+    end
+    alias_method :to_zyfh, :to_bpmf
+    alias_method :to_zhyfh, :to_bpmf
+    alias_method :to_bopomofo, :to_bpmf
+    def to_wg(from = nil)
+      ZhongwenTools::Romanization::WadeGiles::to_wg(self, from)
+    end
+    alias_method :to_wade_giles, :to_wg
+    def to_yale(from = nil)
+      ZhongwenTools::Romanization::Yale::to_yale(self, from)
+    end
+    def to_typy(from = nil)
+      ZhongwenTools::Romanization::TongyongPinyin::to_typy(self, from)
+    end
+    alias_method :to_tongyong, :to_typy
+    alias_method :to_tongyong_pinyin, :to_typy
+    def to_mps2(from = nil)
+      ZhongwenTools::Romanization::MPS2::to_mps2(self, from)
+    end
+    def zhs?
+      ZhongwenTools::Script.zhs?(self)
+    end
+    def zht?
+      ZhongwenTools::Script.zht?(self)
+    end
+    def to_zhcn
+      ZhongwenTools::Script.to_zhs(self, :zhcn)
+    end
+    def to_zhhk
+      ZhongwenTools::Script.to_zht(self, :zhhk)
+    end
+    def to_zhs
+      ZhongwenTools::Script.to_zhs(self, :zhs)
+    end
+    def to_zht
+      ZhongwenTools::Script.to_zht(self, :zht)
+    end
+    def to_zhtw
+      ZhongwenTools::Script.to_zht(self, :zhtw)
+    end
+  end
+end

data/lib/zhongwen_tools/unicode.rb ADDED Viewed

@@ -0,0 +1,25 @@
+# encoding: utf-8
+module ZhongwenTools
+  module Unicode
+    def self.to_codepoint(str)
+      str.chars.map{ |c| "\\u%04x" % c.unpack("U")[0] }.join
+    end
+    def self.from_codepoint(str)
+      results = (str.split(/\\?u/) - ['']).map do |s|
+        [s.hex].pack("U")
+      end
+      results.join
+    end
+    def self.ascii?(str)
+      str.chars.to_a.size == str.bytes.to_a.size
+    end
+    def self.multibyte?(str)
+      !ascii?(str)
+    end
+  end
+end