hanzi_to_pinyin 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/README.rdoc +5 -5
  2. data/VERSION +1 -1
  3. data/lib/hanzi_to_pinyin.rb +39 -35
  4. metadata +26 -13
data/README.rdoc CHANGED
@@ -1,16 +1,16 @@
1
1
  = hanzi_to_pinyin
2
2
 
3
- translate chinese to pinyin , fetch first letter OR full pinyin
3
+ translate chinese to pinyin, fetch first letter OR full pinyin
4
4
 
5
5
  == Install
6
6
 
7
7
  rails3
8
8
 
9
- $ gem 'hanzi_to_pinyin' , '0.6.1' , require: 'hanzi_to_pinyin'
9
+ $ gem 'hanzi_to_pinyin', '0.8.0', require: 'hanzi_to_pinyin'
10
10
  or latest develop version
11
- $ gem 'hanzi_to_pinyin' , git: 'git://github.com/wxianfeng/hanzi_to_pinyin.git'
11
+ $ gem 'hanzi_to_pinyin', git: 'git://github.com/wxianfeng/hanzi_to_pinyin.git'
12
12
  or freeze ref version
13
- $ gem 'hanzi_to_pinyin' , git: 'git://github.com/wxianfeng/hanzi_to_pinyin.git' , ref: 5fa43b0
13
+ $ gem 'hanzi_to_pinyin', git: 'git://github.com/wxianfeng/hanzi_to_pinyin.git', ref: 5fa43b0
14
14
 
15
15
  == Usage
16
16
 
@@ -22,7 +22,7 @@ or freeze ref version
22
22
  $ HanziToPinyin.is_hanzi?("你") => true
23
23
  $ HanziToPinyin.is_hanzi?("a") => false
24
24
 
25
- # 只处理汉字和数字 多音字,分隔 字字之间;分隔
25
+ # 多音字,分隔 字字之间;分隔,字母丢弃
26
26
  $ HanziToPinyin.hanzi_2_py("我们") => "wo;men"
27
27
  $ HanziToPinyin.hanzi_2_py("查理Smith") => "cha,zha;li"
28
28
  $ HanziToPinyin.hanzi_2_py("测试1") => "ce;shi;1"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.7.0
1
+ 0.8.0
@@ -11,6 +11,12 @@ class HanziToPinyin
11
11
  # Unicode中汉字的结束点
12
12
  @@hanzi_unicode_end = 40869
13
13
 
14
+ # 字母(10进制)
15
+ @@letter_upcase_start = 65
16
+ @@letter_upcase_end = 90
17
+ @@letter_downcase_start = 97
18
+ @@letter_downcase_end = 122
19
+
14
20
  # 数字(10进制)
15
21
  @@number_unicode_start = 48
16
22
  @@number_unicode_end = 57
@@ -48,55 +54,49 @@ class HanziToPinyin
48
54
  end
49
55
 
50
56
  ##
51
- # 只处理汉字和数字和_,- 多音字,分隔 字字之间;分隔
52
- # 查理Smith => "cha,zha;li"
53
- # 郭轶 => "guo;yi,die"
54
- # 我们 => "wo;men"
55
- # 宗志强 => "zong;zhi;qiang,jiang"
57
+ # 多音字,分隔 字字之间;分隔,字母丢弃
58
+ # 查理Smith => "cha,zha;li"
59
+ # 郭轶 => "guo;yi,die"
60
+ # 我们 => "wo;men"
61
+ # 宗志强 => "zong;zhi;qiang,jiang"
56
62
  def self.hanzi_2_py(hanzi)
57
63
  hanzi = hanzi.force_encoding("utf-8")
58
- str = ''
64
+ @str = ''
59
65
  hanzi.each_char do |hz|
60
- if is_number?(hz.ord) or is_underline?(hz.ord) or is_dash?(hz.ord)
61
- if str.length == 0
62
- str << hz.chr
63
- else
64
- if str[-1] == ";"
65
- str << hz.chr
66
- else
67
- str << ";#{hz.chr}"
68
- end
69
- end
70
- elsif is_hanzi?(hz.ord)
66
+ if is_hanzi?(hz.ord)
71
67
  values = @@py[hz]
72
- if values.size > 1
73
- if str.length == 0
74
- str << "#{values.join(',')}"
75
- else
76
- if str[-1] == ";"
77
- str << "#{values.join(',')}"
78
- else
79
- str << ";#{values.join(',')}"
80
- end
81
- end
68
+ append(values)
69
+ elsif is_letter?(hz.ord)
70
+ next
71
+ else
72
+ if @str.length == 0
73
+ @str << hz.chr
82
74
  else
83
- if str.length == 0
84
- str << "#{values.join};"
75
+ if @str[-1] == ";"
76
+ @str << hz.chr
85
77
  else
86
- if str[-1] == ";"
87
- str << "#{values.join}"
88
- else
89
- str << ";#{values.join}"
90
- end
78
+ @str << ";#{hz.chr}"
91
79
  end
92
80
  end
93
81
  end
94
82
  end
95
- str
83
+ @str
96
84
  end
97
85
  class << self
98
86
  alias_method :hanzi_to_py , :hanzi_2_py
99
87
  end
88
+
89
+ def self.append(values)
90
+ if @str.length == 0
91
+ @str << "#{values.join(',')}"
92
+ else
93
+ if @str[-1] == ";"
94
+ @str << "#{values.join(',')}"
95
+ else
96
+ @str << ";#{values.join(',')}"
97
+ end
98
+ end
99
+ end
100
100
 
101
101
  def self.is_hanzi?(hanzi_codepoint)
102
102
  hanzi_codepoint >= @@hanzi_unicode_start && hanzi_codepoint <= @@hanzi_unicode_end
@@ -113,5 +113,9 @@ class HanziToPinyin
113
113
  def self.is_dash?(codepoint)
114
114
  codepoint == @@dash
115
115
  end
116
+
117
+ def self.is_letter?(codepoint)
118
+ codepoint >= @@letter_upcase_start && codepoint <= @@letter_upcase_end or codepoint >= @@letter_downcase_start && codepoint <= @@letter_downcase_end
119
+ end
116
120
 
117
121
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hanzi_to_pinyin
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-17 00:00:00.000000000 +08:00
13
- default_executable:
12
+ date: 2012-05-14 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
15
  name: json
17
- requirement: &87605570 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
18
17
  none: false
19
18
  requirements:
20
19
  - - ! '>='
@@ -22,10 +21,15 @@ dependencies:
22
21
  version: '0'
23
22
  type: :runtime
24
23
  prerelease: false
25
- version_requirements: *87605570
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
26
30
  - !ruby/object:Gem::Dependency
27
31
  name: yamler
28
- requirement: &87604140 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
29
33
  none: false
30
34
  requirements:
31
35
  - - ! '>='
@@ -33,10 +37,15 @@ dependencies:
33
37
  version: '0'
34
38
  type: :runtime
35
39
  prerelease: false
36
- version_requirements: *87604140
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
37
46
  - !ruby/object:Gem::Dependency
38
47
  name: rspec
39
- requirement: &87603270 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
40
49
  none: false
41
50
  requirements:
42
51
  - - ! '>='
@@ -44,7 +53,12 @@ dependencies:
44
53
  version: '0'
45
54
  type: :development
46
55
  prerelease: false
47
- version_requirements: *87603270
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
48
62
  description: chinese hanzi to pinyin , fetch first letter OR full pinyin, written
49
63
  in Ruby.
50
64
  email:
@@ -57,10 +71,9 @@ files:
57
71
  - VERSION
58
72
  - LICENSE.txt
59
73
  - Rakefile
74
+ - lib/hanzi_to_pinyin.rb
60
75
  - lib/data/unicode_to_pinyin.yml
61
76
  - lib/data/hz2py.json
62
- - lib/hanzi_to_pinyin.rb
63
- has_rdoc: true
64
77
  homepage: http://github.com/wxianfeng/hanzi_to_pinyin
65
78
  licenses: []
66
79
  post_install_message: ! " hanzi_to_pinyin is a tool for chinese hanzi to pinyin ,fetch
@@ -77,7 +90,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
77
90
  version: '0'
78
91
  segments:
79
92
  - 0
80
- hash: -743416909
93
+ hash: 292557973814545694
81
94
  required_rubygems_version: !ruby/object:Gem::Requirement
82
95
  none: false
83
96
  requirements:
@@ -86,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
99
  version: 1.3.6
87
100
  requirements: []
88
101
  rubyforge_project:
89
- rubygems_version: 1.6.2
102
+ rubygems_version: 1.8.24
90
103
  signing_key:
91
104
  specification_version: 3
92
105
  summary: chinese hanzi to pinyin , fetch first letter OR full pinyin