RubyGems - hanzi-converter - Versions diffs - 0.1.1 → 0.2.0 - Mend

hanzi-converter 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/README.rdoc +4 -0
data/VERSION +1 -1
data/hanzi-converter.gemspec +1 -1
data/lib/hanzi-converter.rb +41 -2
data/test/test_hanzi-converter.rb +10 -0
metadata +2 -2

data/README.rdoc CHANGED

@@ -6,6 +6,10 @@ Example usage:
   HanziConverter.load_data
   HanziConverter.to_pinyin('走红') # zou3hong2
+  HanziConverter.to_pinyin('簡單') # jian3dan1
+  HanziConverter.to_pinyin('为什么') # wei4shen2me5
+  HanziConverter.to_pinyin('no! 为什么！') # no! wei4shen2me5！
+  HanziConverter.to_pinyin('你好， 我是康昱辰。') # ni3hao3， wo3shi4kang1yu4chen2。
 To run tests:

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.1
1	+ 0.2.0

data/hanzi-converter.gemspec CHANGED

@@ -5,7 +5,7 @@
 Gem::Specification.new do |s|
   s.name = "hanzi-converter"
-  s.version = "0.1.1"
+  s.version = "0.2.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Steve Jackson"]

data/lib/hanzi-converter.rb CHANGED

@@ -22,7 +22,7 @@ class HanziConverter
         line_data[:simplified] = line[0, line.index(' ')]
         line = line[line.index('['), line.length]
-        line_data[:pinyin] = line[1, line.index(']') - 1]
+        line_data[:pinyin] = line[1, line.index(']') - 1].downcase
         line = line[line.index('/'), line.rindex('/')]
         line_data[:english] = line[1, line.rindex('/') - 1]
@@ -34,10 +34,49 @@ class HanziConverter
     def to_pinyin(text, options={})
       load_data if @data.nil?
+      result = ''
+      pos = 0
+      loop do
+        char = text[pos]
+        break if !char
+        if char.ord < 0x4E00 || char.ord > 0x9FFF
+          # it's not a chinese character.
+          result << char
+          pos += 1
+        else
+          # it's a chinese character. start by trying to find a long word match,
+          # and if it fails, all the way down to a single hanzi.
+          match = nil
+          match_length = 0
+          4.downto(1) do |length|
+            match = find_match(text[pos, length])
+            match_length = length
+            break if match
+          end
+          if match
+            result << match[:pinyin].gsub("\s", '')
+            pos += match_length
+            next
+          else
+            # if we're still here, we didn't find a match at all.
+            result << char
+            pos += 1
+          end
+        end
+      end
+      result
+    end
+    private
+    def find_match(text)
       entry = @data.find do |word|
         word[:simplified] == text || word[:traditional] == text
       end
-      entry[:pinyin].gsub("\s", '') if entry
     end
   end
 end

data/test/test_hanzi-converter.rb CHANGED

@@ -24,4 +24,14 @@ class TestHanziConverter < Test::Unit::TestCase
     assert_equal 'jian3dan1', result
   end
+  def test_can_convert_with_surrounding_english
+    result = HanziConverter.to_pinyin('no! 为什么！')
+    assert_equal 'no! wei4shen2me5！', result
+  end
+  def test_can_convert_sentence_of_hanzi
+    result = HanziConverter.to_pinyin('你好， 我是康昱辰。')
+    assert_equal 'ni3hao3， wo3shi4kang1yu4chen2。', result
+  end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: hanzi-converter
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.2.0
   prerelease:
 platform: ruby
 authors:
@@ -78,7 +78,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -3181578378699029479
+      hash: 4343939069972924356
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements: