chinese_pinyin 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,13 +6,19 @@ require 'chinese_pinyin/version'
6
6
  class Pinyin
7
7
  class <<self
8
8
  attr_accessor :table
9
+ attr_accessor :ruby2
9
10
 
10
11
  def init_table
11
12
  return if @table
12
- @table = {}
13
- open(File.dirname(__FILE__) + '/../data/Mandarin.dat') do |file|
13
+
14
+ # Ruby 2.0以后默认即为UTF-8编码,使用新的码表以提升效率
15
+ @ruby2 = !!(RUBY_VERSION =~ /^2/)
16
+ datfile = @ruby2 ? 'pinyin-utf8.dat' : 'Mandarin.dat'
17
+ @table = {}
18
+
19
+ File.open(File.dirname(__FILE__) + "/../data/#{datfile}") do |file|
14
20
  while line = file.gets
15
- key, value = line.split(' ', 2)
21
+ key, value = line.split(' ', 2)
16
22
  @table[key] = value
17
23
  end
18
24
  end
@@ -20,11 +26,13 @@ class Pinyin
20
26
 
21
27
  def init_word_table
22
28
  return if @words_table
29
+
23
30
  @words_table = {}
31
+
24
32
  if ENV["WORDS_FILE"]
25
- open(ENV["WORDS_FILE"]) do |file|
33
+ File.open(ENV["WORDS_FILE"]) do |file|
26
34
  while line = file.gets
27
- key, value = line.sub("\n", "").split('|', 2)
35
+ key, value = line.sub("\n", "").split('|', 2)
28
36
  @words_table[key] = value
29
37
  end
30
38
  end
@@ -32,24 +40,41 @@ class Pinyin
32
40
  end
33
41
 
34
42
  def translate(chars, options={})
35
- splitter = options[:splitter] || ' '
36
- tone = options[:tone] || false
43
+ splitter = options.fetch(:splitter, ' ')
44
+ tone = options.fetch(:tone, false)
45
+ camel = options.fetch(:camelcase, false)
37
46
 
38
47
  init_word_table
39
- return @words_table[chars].gsub(' ', splitter) if @words_table[chars]
48
+ results = @words_table[chars]
49
+ if results
50
+ results = results.split
51
+ results.map!(&:downcase)
52
+ results.map!(&:capitalize) if camel
53
+ results.map! { |x| (48..57).include?(x[-1].ord) ? x.chop! : x } unless tone
54
+
55
+ return results.join(splitter)
56
+ end
40
57
 
41
58
  init_table
42
- results = []
59
+ results = []
43
60
  is_english = false
61
+
44
62
  chars.scan(/./).each do |char|
45
- key = sprintf("%X", char.unpack("U").first)
63
+ key = @ruby2 ? char : sprintf("%X", char.unpack("U").first)
64
+
46
65
  if @table[key]
47
66
  results << splitter if is_english
48
- pinyin = @table[key].chomp.split(' ', 2)[0].downcase
67
+
68
+ is_english = false
69
+ pinyin = @table[key].chomp.split(' ', 2)[0]
70
+
71
+ pinyin.downcase! unless @ruby2
49
72
  pinyin.chop! unless tone
73
+ pinyin.capitalize! if camel
74
+
50
75
  results << pinyin
51
76
  results << splitter
52
- is_english = false
77
+
53
78
  else
54
79
  results << char
55
80
  is_english = true
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module ChinesePinyin
3
- VERSION = "0.5.0"
3
+ VERSION = "0.6.0"
4
4
  end
data/test/Words.dat CHANGED
@@ -1 +1,2 @@
1
1
  广州|guang zhou
2
+ 上海|ShANg4 hAi3
@@ -7,16 +7,31 @@ class PinyinTest < Test::Unit::TestCase
7
7
  def test_t
8
8
  assert_equal("zhong guo", Pinyin.t('中国'))
9
9
  assert_equal("zhong guo english ri", Pinyin.t('中国english日'))
10
+ assert_equal("shang hai very good o ye", Pinyin.t('上海very good哦耶'))
10
11
  end
11
12
 
12
13
  def test_t_with_splitter
13
14
  assert_equal("zhong-guo", Pinyin.t('中国', splitter: '-'))
14
15
  assert_equal("huangzhimin", Pinyin.t('黄志敏', splitter: ''))
15
16
  assert_equal("guang-zhou", Pinyin.t('广州', splitter: '-'))
17
+ assert_equal("shang-hai", Pinyin.t('上海', splitter: '-'))
16
18
  end
17
19
 
18
20
  def test_t_with_tone
19
21
  assert_equal("zhong1 guo2", Pinyin.t('中国', tone: true))
20
22
  assert_equal("huang2 zhi4 min3", Pinyin.t('黄志敏', tone: true))
23
+ assert_equal("shang4 hai3", Pinyin.t('上海', tone: true))
24
+ end
25
+
26
+ def test_t_with_camelcase
27
+ assert_equal("Zhong Guo", Pinyin.t('中国', camelcase: true))
28
+ assert_equal("Huang Zhi Min", Pinyin.t('黄志敏', camelcase: true))
29
+ assert_equal("Zhong1 Guo2", Pinyin.t('中国', camelcase: true, tone: true))
30
+ assert_equal("Huang2 Zhi4 Min3", Pinyin.t('黄志敏', camelcase: true, tone: true))
31
+ assert_equal("Zhong-Guo", Pinyin.t('中国', camelcase: true, splitter: '-'))
32
+ assert_equal("HuangZhiMin", Pinyin.t('黄志敏', camelcase: true, splitter: ''))
33
+ assert_equal("Guang-Zhou", Pinyin.t('广州', camelcase: true, splitter: '-'))
34
+ assert_equal("Shang-Hai", Pinyin.t('上海', camelcase: true, splitter: '-'))
35
+ assert_equal("Shang4-Hai3", Pinyin.t('上海', camelcase: true, tone:true, splitter: '-'))
21
36
  end
22
37
  end
metadata CHANGED
@@ -1,18 +1,20 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chinese_pinyin
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Huang
8
+ - Hong, Liang
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2013-06-27 00:00:00.000000000 Z
12
+ date: 2013-09-04 00:00:00.000000000 Z
12
13
  dependencies: []
13
14
  description: translate chinese hanzi to pinyin.
14
15
  email:
15
16
  - flyerhzm@gmail.com
17
+ - hongliang@bamajia.com
16
18
  executables: []
17
19
  extensions: []
18
20
  extra_rdoc_files: []
@@ -26,6 +28,7 @@ files:
26
28
  - Rakefile
27
29
  - chinese_pinyin.gemspec
28
30
  - data/Mandarin.dat
31
+ - data/pinyin-utf8.dat
29
32
  - lib/chinese_pinyin.rb
30
33
  - lib/chinese_pinyin/version.rb
31
34
  - test/Words.dat
@@ -50,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
50
53
  version: 1.3.6
51
54
  requirements: []
52
55
  rubyforge_project:
53
- rubygems_version: 2.0.3
56
+ rubygems_version: 2.0.5
54
57
  signing_key:
55
58
  specification_version: 4
56
59
  summary: translate chinese hanzi to pinyin.