hanzi-converter 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +4 -0
- data/VERSION +1 -1
- data/hanzi-converter.gemspec +1 -1
- data/lib/hanzi-converter.rb +41 -2
- data/test/test_hanzi-converter.rb +10 -0
- metadata +2 -2
data/README.rdoc
CHANGED
|
@@ -6,6 +6,10 @@ Example usage:
|
|
|
6
6
|
|
|
7
7
|
HanziConverter.load_data
|
|
8
8
|
HanziConverter.to_pinyin('走红') # zou3hong2
|
|
9
|
+
HanziConverter.to_pinyin('簡單') # jian3dan1
|
|
10
|
+
HanziConverter.to_pinyin('为什么') # wei4shen2me5
|
|
11
|
+
HanziConverter.to_pinyin('no! 为什么!') # no! wei4shen2me5!
|
|
12
|
+
HanziConverter.to_pinyin('你好, 我是康昱辰。') # ni3hao3, wo3shi4kang1yu4chen2。
|
|
9
13
|
|
|
10
14
|
To run tests:
|
|
11
15
|
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.
|
|
1
|
+
0.2.0
|
data/hanzi-converter.gemspec
CHANGED
data/lib/hanzi-converter.rb
CHANGED
|
@@ -22,7 +22,7 @@ class HanziConverter
|
|
|
22
22
|
line_data[:simplified] = line[0, line.index(' ')]
|
|
23
23
|
|
|
24
24
|
line = line[line.index('['), line.length]
|
|
25
|
-
line_data[:pinyin] = line[1, line.index(']') - 1]
|
|
25
|
+
line_data[:pinyin] = line[1, line.index(']') - 1].downcase
|
|
26
26
|
|
|
27
27
|
line = line[line.index('/'), line.rindex('/')]
|
|
28
28
|
line_data[:english] = line[1, line.rindex('/') - 1]
|
|
@@ -34,10 +34,49 @@ class HanziConverter
|
|
|
34
34
|
|
|
35
35
|
def to_pinyin(text, options={})
|
|
36
36
|
load_data if @data.nil?
|
|
37
|
+
|
|
38
|
+
result = ''
|
|
39
|
+
pos = 0
|
|
40
|
+
|
|
41
|
+
loop do
|
|
42
|
+
char = text[pos]
|
|
43
|
+
break if !char
|
|
44
|
+
|
|
45
|
+
if char.ord < 0x4E00 || char.ord > 0x9FFF
|
|
46
|
+
# it's not a chinese character.
|
|
47
|
+
result << char
|
|
48
|
+
pos += 1
|
|
49
|
+
else
|
|
50
|
+
# it's a chinese character. start by trying to find a long word match,
|
|
51
|
+
# and if it fails, all the way down to a single hanzi.
|
|
52
|
+
match = nil
|
|
53
|
+
match_length = 0
|
|
54
|
+
4.downto(1) do |length|
|
|
55
|
+
match = find_match(text[pos, length])
|
|
56
|
+
match_length = length
|
|
57
|
+
break if match
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
if match
|
|
61
|
+
result << match[:pinyin].gsub("\s", '')
|
|
62
|
+
pos += match_length
|
|
63
|
+
next
|
|
64
|
+
else
|
|
65
|
+
# if we're still here, we didn't find a match at all.
|
|
66
|
+
result << char
|
|
67
|
+
pos += 1
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
result
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
def find_match(text)
|
|
37
77
|
entry = @data.find do |word|
|
|
38
78
|
word[:simplified] == text || word[:traditional] == text
|
|
39
79
|
end
|
|
40
|
-
entry[:pinyin].gsub("\s", '') if entry
|
|
41
80
|
end
|
|
42
81
|
end
|
|
43
82
|
end
|
|
@@ -24,4 +24,14 @@ class TestHanziConverter < Test::Unit::TestCase
|
|
|
24
24
|
assert_equal 'jian3dan1', result
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
+
def test_can_convert_with_surrounding_english
|
|
28
|
+
result = HanziConverter.to_pinyin('no! 为什么!')
|
|
29
|
+
assert_equal 'no! wei4shen2me5!', result
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def test_can_convert_sentence_of_hanzi
|
|
33
|
+
result = HanziConverter.to_pinyin('你好, 我是康昱辰。')
|
|
34
|
+
assert_equal 'ni3hao3, wo3shi4kang1yu4chen2。', result
|
|
35
|
+
end
|
|
36
|
+
|
|
27
37
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: hanzi-converter
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -78,7 +78,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
78
78
|
version: '0'
|
|
79
79
|
segments:
|
|
80
80
|
- 0
|
|
81
|
-
hash:
|
|
81
|
+
hash: 4343939069972924356
|
|
82
82
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
83
83
|
none: false
|
|
84
84
|
requirements:
|