hanzi_to_pinyin 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,102 @@
1
+ # encoding:utf-8
2
+ require "json"
3
+
4
+ class HanziToPinyin
5
+
6
+ VERSION = IO.read File.expand_path("../../VERSION",__FILE__)
7
+
8
+ # Unicode中汉字开始点(16进制)
9
+ @@hanzi_unicode_start = 19968
10
+ # Unicode中汉字的结束点
11
+ @@hanzi_unicode_end = 40869
12
+
13
+ # 数字(10进制)
14
+ @@number_unicode_start = 48
15
+ @@number_unicode_end = 57
16
+
17
+ # 汉字 unicode 编码(16进制)
18
+ @@unicode = YAML.load(IO.read File.expand_path("../data/unicode_to_pinyin.yml",__FILE__))
19
+ @@py = ::JSON.parse(IO.read File.expand_path("../data/hz2py.json",__FILE__))
20
+
21
+ # 只取首字母
22
+ def self.hanzi_2_pinyin(hanzi)
23
+ hanzi = hanzi.force_encoding("utf-8")
24
+ u_str = ''
25
+ hanzi.each_codepoint { |c|
26
+ if is_hanzi?(c)
27
+ unicode = c.to_s(16).upcase
28
+ u_str << @@unicode[unicode]
29
+ else
30
+ if c == 45 # -
31
+ u_str << "_"
32
+ else
33
+ u_str << c.chr.downcase
34
+ end
35
+ end
36
+ }
37
+ u_str
38
+ end
39
+ class << self
40
+ alias_method :hanzi_to_pinyin , :hanzi_2_pinyin
41
+ end
42
+
43
+ ##
44
+ # 只处理汉字和数字 多音字,分隔 字字之间;分隔
45
+ # 查理Smith => "cha,zha;li"
46
+ # 郭轶 => "guo;yi,die"
47
+ # 我们 => "wo;men"
48
+ # 宗志强 => "zong;zhi;qiang,jiang"
49
+ def self.hanzi_2_py(hanzi)
50
+ hanzi = hanzi.force_encoding("utf-8")
51
+ str = ''
52
+ hanzi.each_char do |hz|
53
+ if is_number?(hz.ord)
54
+ if str.length == 0
55
+ str << hz.chr
56
+ else
57
+ if str[-1] == ";"
58
+ str << hz.chr
59
+ else
60
+ str << ";#{hz.chr}"
61
+ end
62
+ end
63
+ elsif is_hanzi?(hz.ord)
64
+ values = @@py[hz]
65
+ if values.size > 1
66
+ if str.length == 0
67
+ str << "#{values.join(',')}"
68
+ else
69
+ if str[-1] == ";"
70
+ str << "#{values.join(',')}"
71
+ else
72
+ str << ";#{values.join(',')}"
73
+ end
74
+ end
75
+ else
76
+ if str.length == 0
77
+ str << "#{values.join};"
78
+ else
79
+ if str[-1] == ";"
80
+ str << "#{values.join}"
81
+ else
82
+ str << ";#{values.join}"
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+ str
89
+ end
90
+ class << self
91
+ alias_method :hanzi_to_py , :hanzi_2_py
92
+ end
93
+
94
+ def self.is_hanzi?(hanzi_codepoint)
95
+ hanzi_codepoint >= @@hanzi_unicode_start && hanzi_codepoint <= @@hanzi_unicode_end
96
+ end
97
+
98
+ def self.is_number?(number_codepoint)
99
+ number_codepoint >= @@number_unicode_start && number_codepoint <= @@number_unicode_end
100
+ end
101
+
102
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hanzi_to_pinyin
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -14,7 +14,7 @@ default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rspec
17
- requirement: &80933510 !ruby/object:Gem::Requirement
17
+ requirement: &86066550 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,7 +22,7 @@ dependencies:
22
22
  version: '0'
23
23
  type: :development
24
24
  prerelease: false
25
- version_requirements: *80933510
25
+ version_requirements: *86066550
26
26
  description: chinese hanzi to pinyin , fetch first letter OR full pinyin, written
27
27
  in Ruby.
28
28
  email:
@@ -30,7 +30,14 @@ email:
30
30
  executables: []
31
31
  extensions: []
32
32
  extra_rdoc_files: []
33
- files: []
33
+ files:
34
+ - README.rdoc
35
+ - VERSION
36
+ - LICENSE.txt
37
+ - Rakefile
38
+ - lib/data/unicode_to_pinyin.yml
39
+ - lib/data/hz2py.json
40
+ - lib/hanzi_to_pinyin.rb
34
41
  has_rdoc: true
35
42
  homepage: http://github.com/wxianfeng/hanzi_to_pinyin
36
43
  licenses: []
@@ -48,7 +55,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
48
55
  version: '0'
49
56
  segments:
50
57
  - 0
51
- hash: 898937167
58
+ hash: 26390223
52
59
  required_rubygems_version: !ruby/object:Gem::Requirement
53
60
  none: false
54
61
  requirements: