hanzi-converter 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ class HanziConverter
4
+ class << self
5
+ attr_accessor :data
6
+
7
+ def load_data
8
+ return if @data
9
+ @data = []
10
+
11
+ File.open('lib/data/cedict_ts.u8').each_line do |line|
12
+ next if line.start_with?('#')
13
+ line = line.force_encoding('utf-8')
14
+
15
+ # CC-CEDICT format:
16
+ # Traditional Simplified [pin1 yin1] /English equivalent 1/equivalent 2/
17
+ line_data = {}
18
+ line_data[:traditional] = line[0, line.index(' ')]
19
+
20
+ line = line[line.index(' ') + 1, line.length]
21
+ line_data[:simplified] = line[0, line.index(' ')]
22
+
23
+ line = line[line.index('['), line.length]
24
+ line_data[:pinyin] = line[1, line.index(']') - 1]
25
+
26
+ line = line[line.index('/'), line.rindex('/')]
27
+ line_data[:english] = line[1, line.rindex('/') - 1]
28
+
29
+ @data << line_data
30
+ end
31
+
32
+ end
33
+
34
+ def to_pinyin(text, options={})
35
+ load_data if @data.nil?
36
+ entry = @data.find do |word|
37
+ word[:simplified] == text || word[:traditional] == text
38
+ end
39
+ entry[:pinyin].gsub("\s", '') if entry
40
+ end
41
+ end
42
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'hanzi-converter'
15
+
16
+ class Test::Unit::TestCase
17
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+
3
+ require 'helper'
4
+
5
+ class TestHanziConverter < Test::Unit::TestCase
6
+
7
+ def test_should_init_data
8
+ HanziConverter.load_data
9
+ assert HanziConverter.data.count > 0
10
+ end
11
+
12
+ def test_convert_with_tones
13
+ result = HanziConverter.to_pinyin('为什么')
14
+ assert_equal 'wei4shen2me5', result
15
+ end
16
+
17
+ def test_second_word
18
+ result = HanziConverter.to_pinyin('走红')
19
+ assert_equal 'zou3hong2', result
20
+ end
21
+
22
+ def test_can_convert_traditional
23
+ result = HanziConverter.to_pinyin('簡單')
24
+ assert_equal 'jian3dan1', result
25
+ end
26
+
27
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hanzi-converter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Steve Jackson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-10 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rdoc
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '3.12'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '3.12'
30
+ - !ruby/object:Gem::Dependency
31
+ name: jeweler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.8.4
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.8.4
46
+ description: Convert Hanzi to pinyin. Unlike other similar gems, this includes tones
47
+ and can accurately translate common words.
48
+ email: steven.j.jackson@gmail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files:
52
+ - LICENSE.txt
53
+ - README.rdoc
54
+ files:
55
+ - .document
56
+ - Gemfile
57
+ - LICENSE.txt
58
+ - README.rdoc
59
+ - Rakefile
60
+ - VERSION
61
+ - hanzi-converter.gemspec
62
+ - lib/data/cedict_ts.u8
63
+ - lib/hanzi-converter.rb
64
+ - test/helper.rb
65
+ - test/test_hanzi-converter.rb
66
+ homepage: http://github.com/stevejackson/hanzi-converter
67
+ licenses:
68
+ - MIT
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ segments:
80
+ - 0
81
+ hash: 614009522307965857
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 1.8.24
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: Convert Hanzi to pinyin. Unlike other similar gems, this includes tones and
94
+ can accurately translate common words.
95
+ test_files: []