hanzi-converter 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,6 +6,10 @@ Example usage:
6
6
 
7
7
  HanziConverter.load_data
8
8
  HanziConverter.to_pinyin('走红') # zou3hong2
9
+ HanziConverter.to_pinyin('簡單') # jian3dan1
10
+ HanziConverter.to_pinyin('为什么') # wei4shen2me5
11
+ HanziConverter.to_pinyin('no! 为什么!') # no! wei4shen2me5!
12
+ HanziConverter.to_pinyin('你好, 我是康昱辰。') # ni3hao3, wo3shi4kang1yu4chen2。
9
13
 
10
14
  To run tests:
11
15
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.1
1
+ 0.2.0
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "hanzi-converter"
8
- s.version = "0.1.1"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Steve Jackson"]
@@ -22,7 +22,7 @@ class HanziConverter
22
22
  line_data[:simplified] = line[0, line.index(' ')]
23
23
 
24
24
  line = line[line.index('['), line.length]
25
- line_data[:pinyin] = line[1, line.index(']') - 1]
25
+ line_data[:pinyin] = line[1, line.index(']') - 1].downcase
26
26
 
27
27
  line = line[line.index('/'), line.rindex('/')]
28
28
  line_data[:english] = line[1, line.rindex('/') - 1]
@@ -34,10 +34,49 @@ class HanziConverter
34
34
 
35
35
  def to_pinyin(text, options={})
36
36
  load_data if @data.nil?
37
+
38
+ result = ''
39
+ pos = 0
40
+
41
+ loop do
42
+ char = text[pos]
43
+ break if !char
44
+
45
+ if char.ord < 0x4E00 || char.ord > 0x9FFF
46
+ # it's not a chinese character.
47
+ result << char
48
+ pos += 1
49
+ else
50
+ # it's a chinese character. start by trying to find a long word match,
51
+ # and if it fails, all the way down to a single hanzi.
52
+ match = nil
53
+ match_length = 0
54
+ 4.downto(1) do |length|
55
+ match = find_match(text[pos, length])
56
+ match_length = length
57
+ break if match
58
+ end
59
+
60
+ if match
61
+ result << match[:pinyin].gsub("\s", '')
62
+ pos += match_length
63
+ next
64
+ else
65
+ # if we're still here, we didn't find a match at all.
66
+ result << char
67
+ pos += 1
68
+ end
69
+ end
70
+ end
71
+
72
+ result
73
+ end
74
+
75
+ private
76
+ def find_match(text)
37
77
  entry = @data.find do |word|
38
78
  word[:simplified] == text || word[:traditional] == text
39
79
  end
40
- entry[:pinyin].gsub("\s", '') if entry
41
80
  end
42
81
  end
43
82
  end
@@ -24,4 +24,14 @@ class TestHanziConverter < Test::Unit::TestCase
24
24
  assert_equal 'jian3dan1', result
25
25
  end
26
26
 
27
+ def test_can_convert_with_surrounding_english
28
+ result = HanziConverter.to_pinyin('no! 为什么!')
29
+ assert_equal 'no! wei4shen2me5!', result
30
+ end
31
+
32
+ def test_can_convert_sentence_of_hanzi
33
+ result = HanziConverter.to_pinyin('你好, 我是康昱辰。')
34
+ assert_equal 'ni3hao3, wo3shi4kang1yu4chen2。', result
35
+ end
36
+
27
37
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hanzi-converter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -78,7 +78,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
78
78
  version: '0'
79
79
  segments:
80
80
  - 0
81
- hash: -3181578378699029479
81
+ hash: 4343939069972924356
82
82
  required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements: