hanzi-converter 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,10 @@ Example usage:
6
6
 
7
7
  HanziConverter.load_data
8
8
  HanziConverter.to_pinyin('走红') # zou3hong2
9
+ HanziConverter.to_pinyin('簡單') # jian3dan1
10
+ HanziConverter.to_pinyin('为什么') # wei4shen2me5
11
+ HanziConverter.to_pinyin('no! 为什么!') # no! wei4shen2me5!
12
+ HanziConverter.to_pinyin('你好, 我是康昱辰。') # ni3hao3, wo3shi4kang1yu4chen2。
9
13
 
10
14
  To run tests:
11
15
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.1
1
+ 0.2.0
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "hanzi-converter"
8
- s.version = "0.1.1"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Steve Jackson"]
@@ -22,7 +22,7 @@ class HanziConverter
22
22
  line_data[:simplified] = line[0, line.index(' ')]
23
23
 
24
24
  line = line[line.index('['), line.length]
25
- line_data[:pinyin] = line[1, line.index(']') - 1]
25
+ line_data[:pinyin] = line[1, line.index(']') - 1].downcase
26
26
 
27
27
  line = line[line.index('/'), line.rindex('/')]
28
28
  line_data[:english] = line[1, line.rindex('/') - 1]
@@ -34,10 +34,49 @@ class HanziConverter
34
34
 
35
35
  def to_pinyin(text, options={})
36
36
  load_data if @data.nil?
37
+
38
+ result = ''
39
+ pos = 0
40
+
41
+ loop do
42
+ char = text[pos]
43
+ break if !char
44
+
45
+ if char.ord < 0x4E00 || char.ord > 0x9FFF
46
+ # it's not a chinese character.
47
+ result << char
48
+ pos += 1
49
+ else
50
+ # it's a chinese character. start by trying to find a long word match,
51
+ # and if it fails, all the way down to a single hanzi.
52
+ match = nil
53
+ match_length = 0
54
+ 4.downto(1) do |length|
55
+ match = find_match(text[pos, length])
56
+ match_length = length
57
+ break if match
58
+ end
59
+
60
+ if match
61
+ result << match[:pinyin].gsub("\s", '')
62
+ pos += match_length
63
+ next
64
+ else
65
+ # if we're still here, we didn't find a match at all.
66
+ result << char
67
+ pos += 1
68
+ end
69
+ end
70
+ end
71
+
72
+ result
73
+ end
74
+
75
+ private
76
+ def find_match(text)
37
77
  entry = @data.find do |word|
38
78
  word[:simplified] == text || word[:traditional] == text
39
79
  end
40
- entry[:pinyin].gsub("\s", '') if entry
41
80
  end
42
81
  end
43
82
  end
@@ -24,4 +24,14 @@ class TestHanziConverter < Test::Unit::TestCase
24
24
  assert_equal 'jian3dan1', result
25
25
  end
26
26
 
27
+ def test_can_convert_with_surrounding_english
28
+ result = HanziConverter.to_pinyin('no! 为什么!')
29
+ assert_equal 'no! wei4shen2me5!', result
30
+ end
31
+
32
+ def test_can_convert_sentence_of_hanzi
33
+ result = HanziConverter.to_pinyin('你好, 我是康昱辰。')
34
+ assert_equal 'ni3hao3, wo3shi4kang1yu4chen2。', result
35
+ end
36
+
27
37
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hanzi-converter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -78,7 +78,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
78
78
  version: '0'
79
79
  segments:
80
80
  - 0
81
- hash: -3181578378699029479
81
+ hash: 4343939069972924356
82
82
  required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements: