hanzi-converter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ class HanziConverter
4
+ class << self
5
+ attr_accessor :data
6
+
7
+ def load_data
8
+ return if @data
9
+ @data = []
10
+
11
+ File.open('lib/data/cedict_ts.u8').each_line do |line|
12
+ next if line.start_with?('#')
13
+ line = line.force_encoding('utf-8')
14
+
15
+ # CC-CEDICT format:
16
+ # Traditional Simplified [pin1 yin1] /English equivalent 1/equivalent 2/
17
+ line_data = {}
18
+ line_data[:traditional] = line[0, line.index(' ')]
19
+
20
+ line = line[line.index(' ') + 1, line.length]
21
+ line_data[:simplified] = line[0, line.index(' ')]
22
+
23
+ line = line[line.index('['), line.length]
24
+ line_data[:pinyin] = line[1, line.index(']') - 1]
25
+
26
+ line = line[line.index('/'), line.rindex('/')]
27
+ line_data[:english] = line[1, line.rindex('/') - 1]
28
+
29
+ @data << line_data
30
+ end
31
+
32
+ end
33
+
34
+ def to_pinyin(text, options={})
35
+ load_data if @data.nil?
36
+ entry = @data.find do |word|
37
+ word[:simplified] == text || word[:traditional] == text
38
+ end
39
+ entry[:pinyin].gsub("\s", '') if entry
40
+ end
41
+ end
42
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'hanzi-converter'
15
+
16
+ class Test::Unit::TestCase
17
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+
3
+ require 'helper'
4
+
5
+ class TestHanziConverter < Test::Unit::TestCase
6
+
7
+ def test_should_init_data
8
+ HanziConverter.load_data
9
+ assert HanziConverter.data.count > 0
10
+ end
11
+
12
+ def test_convert_with_tones
13
+ result = HanziConverter.to_pinyin('为什么')
14
+ assert_equal 'wei4shen2me5', result
15
+ end
16
+
17
+ def test_second_word
18
+ result = HanziConverter.to_pinyin('走红')
19
+ assert_equal 'zou3hong2', result
20
+ end
21
+
22
+ def test_can_convert_traditional
23
+ result = HanziConverter.to_pinyin('簡單')
24
+ assert_equal 'jian3dan1', result
25
+ end
26
+
27
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hanzi-converter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Steve Jackson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-10 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rdoc
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '3.12'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '3.12'
30
+ - !ruby/object:Gem::Dependency
31
+ name: jeweler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.8.4
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.8.4
46
+ description: Convert Hanzi to pinyin. Unlike other similar gems, this includes tones
47
+ and can accurately translate common words.
48
+ email: steven.j.jackson@gmail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files:
52
+ - LICENSE.txt
53
+ - README.rdoc
54
+ files:
55
+ - .document
56
+ - Gemfile
57
+ - LICENSE.txt
58
+ - README.rdoc
59
+ - Rakefile
60
+ - VERSION
61
+ - hanzi-converter.gemspec
62
+ - lib/data/cedict_ts.u8
63
+ - lib/hanzi-converter.rb
64
+ - test/helper.rb
65
+ - test/test_hanzi-converter.rb
66
+ homepage: http://github.com/stevejackson/hanzi-converter
67
+ licenses:
68
+ - MIT
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ segments:
80
+ - 0
81
+ hash: 614009522307965857
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 1.8.24
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: Convert Hanzi to pinyin. Unlike other similar gems, this includes tones and
94
+ can accurately translate common words.
95
+ test_files: []