hanzi-converter 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +4 -0
- data/VERSION +1 -1
- data/hanzi-converter.gemspec +1 -1
- data/lib/hanzi-converter.rb +41 -2
- data/test/test_hanzi-converter.rb +10 -0
- metadata +2 -2
data/README.rdoc
CHANGED
@@ -6,6 +6,10 @@ Example usage:
|
|
6
6
|
|
7
7
|
HanziConverter.load_data
|
8
8
|
HanziConverter.to_pinyin('走红') # zou3hong2
|
9
|
+
HanziConverter.to_pinyin('簡單') # jian3dan1
|
10
|
+
HanziConverter.to_pinyin('为什么') # wei4shen2me5
|
11
|
+
HanziConverter.to_pinyin('no! 为什么!') # no! wei4shen2me5!
|
12
|
+
HanziConverter.to_pinyin('你好, 我是康昱辰。') # ni3hao3, wo3shi4kang1yu4chen2。
|
9
13
|
|
10
14
|
To run tests:
|
11
15
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/hanzi-converter.gemspec
CHANGED
data/lib/hanzi-converter.rb
CHANGED
@@ -22,7 +22,7 @@ class HanziConverter
|
|
22
22
|
line_data[:simplified] = line[0, line.index(' ')]
|
23
23
|
|
24
24
|
line = line[line.index('['), line.length]
|
25
|
-
line_data[:pinyin] = line[1, line.index(']') - 1]
|
25
|
+
line_data[:pinyin] = line[1, line.index(']') - 1].downcase
|
26
26
|
|
27
27
|
line = line[line.index('/'), line.rindex('/')]
|
28
28
|
line_data[:english] = line[1, line.rindex('/') - 1]
|
@@ -34,10 +34,49 @@ class HanziConverter
|
|
34
34
|
|
35
35
|
def to_pinyin(text, options={})
|
36
36
|
load_data if @data.nil?
|
37
|
+
|
38
|
+
result = ''
|
39
|
+
pos = 0
|
40
|
+
|
41
|
+
loop do
|
42
|
+
char = text[pos]
|
43
|
+
break if !char
|
44
|
+
|
45
|
+
if char.ord < 0x4E00 || char.ord > 0x9FFF
|
46
|
+
# it's not a chinese character.
|
47
|
+
result << char
|
48
|
+
pos += 1
|
49
|
+
else
|
50
|
+
# it's a chinese character. start by trying to find a long word match,
|
51
|
+
# and if it fails, all the way down to a single hanzi.
|
52
|
+
match = nil
|
53
|
+
match_length = 0
|
54
|
+
4.downto(1) do |length|
|
55
|
+
match = find_match(text[pos, length])
|
56
|
+
match_length = length
|
57
|
+
break if match
|
58
|
+
end
|
59
|
+
|
60
|
+
if match
|
61
|
+
result << match[:pinyin].gsub("\s", '')
|
62
|
+
pos += match_length
|
63
|
+
next
|
64
|
+
else
|
65
|
+
# if we're still here, we didn't find a match at all.
|
66
|
+
result << char
|
67
|
+
pos += 1
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
result
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
def find_match(text)
|
37
77
|
entry = @data.find do |word|
|
38
78
|
word[:simplified] == text || word[:traditional] == text
|
39
79
|
end
|
40
|
-
entry[:pinyin].gsub("\s", '') if entry
|
41
80
|
end
|
42
81
|
end
|
43
82
|
end
|
@@ -24,4 +24,14 @@ class TestHanziConverter < Test::Unit::TestCase
|
|
24
24
|
assert_equal 'jian3dan1', result
|
25
25
|
end
|
26
26
|
|
27
|
+
def test_can_convert_with_surrounding_english
|
28
|
+
result = HanziConverter.to_pinyin('no! 为什么!')
|
29
|
+
assert_equal 'no! wei4shen2me5!', result
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_can_convert_sentence_of_hanzi
|
33
|
+
result = HanziConverter.to_pinyin('你好, 我是康昱辰。')
|
34
|
+
assert_equal 'ni3hao3, wo3shi4kang1yu4chen2。', result
|
35
|
+
end
|
36
|
+
|
27
37
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hanzi-converter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -78,7 +78,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
78
78
|
version: '0'
|
79
79
|
segments:
|
80
80
|
- 0
|
81
|
-
hash:
|
81
|
+
hash: 4343939069972924356
|
82
82
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|