hanzi_to_pinyin 0.8.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +7 -0
- data/VERSION +1 -1
- data/lib/data/hz2py.json +1796 -1795
- data/lib/data/unicode_to_pinyin.yml +1 -0
- data/lib/hanzi_to_pinyin.rb +34 -8
- metadata +14 -15
- data/README.rdoc +0 -47
data/lib/hanzi_to_pinyin.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding:utf-8
|
2
2
|
require 'json'
|
3
3
|
require 'yaml'
|
4
|
+
require 'cgi'
|
4
5
|
|
5
6
|
class HanziToPinyin
|
6
7
|
|
@@ -50,40 +51,65 @@ class HanziToPinyin
|
|
50
51
|
u_str
|
51
52
|
end
|
52
53
|
class << self
|
53
|
-
alias_method :hanzi_to_pinyin
|
54
|
+
alias_method :hanzi_to_pinyin, :hanzi_2_pinyin
|
54
55
|
end
|
55
56
|
|
56
57
|
##
|
57
|
-
# 多音字,分隔
|
58
|
-
# 查理Smith => "cha,zha;li"
|
58
|
+
# 多音字,分隔 字字之间;分隔,字母原样保留
|
59
|
+
# 查理Smith => "cha,zha;li;Smith"
|
59
60
|
# 郭轶 => "guo;yi,die"
|
60
61
|
# 我们 => "wo;men"
|
61
62
|
# 宗志强 => "zong;zhi;qiang,jiang"
|
62
63
|
def self.hanzi_2_py(hanzi)
|
63
|
-
hanzi = hanzi.force_encoding("utf-8")
|
64
|
+
hanzi = hanzi.to_s.force_encoding("utf-8")
|
64
65
|
@str = ''
|
66
|
+
index = 0
|
65
67
|
hanzi.each_char do |hz|
|
66
68
|
if is_hanzi?(hz.ord)
|
67
69
|
values = @@py[hz]
|
68
70
|
append(values)
|
69
|
-
elsif is_letter?(hz.ord)
|
70
|
-
next
|
71
71
|
else
|
72
72
|
if @str.length == 0
|
73
73
|
@str << hz.chr
|
74
74
|
else
|
75
75
|
if @str[-1] == ";"
|
76
76
|
@str << hz.chr
|
77
|
+
elsif @str[-1] =~ /[a-z]/i
|
78
|
+
if is_hanzi?(hanzi[index-1].ord)
|
79
|
+
@str << ";#{hz.chr}"
|
80
|
+
else
|
81
|
+
@str << hz.chr
|
82
|
+
end
|
77
83
|
else
|
78
84
|
@str << ";#{hz.chr}"
|
79
85
|
end
|
80
86
|
end
|
81
87
|
end
|
88
|
+
index += 1
|
82
89
|
end
|
83
90
|
@str
|
84
91
|
end
|
85
92
|
class << self
|
86
|
-
alias_method :hanzi_to_py
|
93
|
+
alias_method :hanzi_to_py, :hanzi_2_py
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# 汉字转化为安全的 url
|
98
|
+
def self.hanzi_2_url(hanzi)
|
99
|
+
hanzi = hanzi.to_s.force_encoding("utf-8")
|
100
|
+
arr = []
|
101
|
+
hanzi.each_char do |hz|
|
102
|
+
if is_hanzi?(hz.ord)
|
103
|
+
value = @@py[hz].first
|
104
|
+
else
|
105
|
+
value = hz.chr
|
106
|
+
end
|
107
|
+
arr << value
|
108
|
+
end
|
109
|
+
::CGI.escape arr.join('-')
|
110
|
+
end
|
111
|
+
class << self
|
112
|
+
alias_method :hanzi_to_url, :hanzi_2_url
|
87
113
|
end
|
88
114
|
|
89
115
|
def self.append(values)
|
@@ -115,7 +141,7 @@ class HanziToPinyin
|
|
115
141
|
end
|
116
142
|
|
117
143
|
def self.is_letter?(codepoint)
|
118
|
-
codepoint >= @@letter_upcase_start && codepoint <= @@letter_upcase_end
|
144
|
+
codepoint >= @@letter_upcase_start && codepoint <= @@letter_upcase_end or codepoint >= @@letter_downcase_start && codepoint <= @@letter_downcase_end
|
119
145
|
end
|
120
146
|
|
121
147
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hanzi_to_pinyin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 2.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,14 +9,14 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-10-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: json
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- -
|
19
|
+
- - ">="
|
20
20
|
- !ruby/object:Gem::Version
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
@@ -24,7 +24,7 @@ dependencies:
|
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
none: false
|
34
34
|
requirements:
|
35
|
-
- -
|
35
|
+
- - ">="
|
36
36
|
- !ruby/object:Gem::Version
|
37
37
|
version: '0'
|
38
38
|
type: :runtime
|
@@ -40,7 +40,7 @@ dependencies:
|
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
42
42
|
requirements:
|
43
|
-
- -
|
43
|
+
- - ">="
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
46
|
- !ruby/object:Gem::Dependency
|
@@ -48,7 +48,7 @@ dependencies:
|
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
49
|
none: false
|
50
50
|
requirements:
|
51
|
-
- -
|
51
|
+
- - ">="
|
52
52
|
- !ruby/object:Gem::Version
|
53
53
|
version: '0'
|
54
54
|
type: :development
|
@@ -56,7 +56,7 @@ dependencies:
|
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
none: false
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
description: chinese hanzi to pinyin , fetch first letter OR full pinyin, written
|
@@ -67,7 +67,6 @@ executables: []
|
|
67
67
|
extensions: []
|
68
68
|
extra_rdoc_files: []
|
69
69
|
files:
|
70
|
-
- README.rdoc
|
71
70
|
- VERSION
|
72
71
|
- LICENSE.txt
|
73
72
|
- Rakefile
|
@@ -76,30 +75,30 @@ files:
|
|
76
75
|
- lib/data/hz2py.json
|
77
76
|
homepage: http://github.com/wxianfeng/hanzi_to_pinyin
|
78
77
|
licenses: []
|
79
|
-
post_install_message:
|
78
|
+
post_install_message: " hanzi_to_pinyin is a tool for chinese hanzi to pinyin ,fetch
|
80
79
|
first letter OR full pinyin, writen in Ruby.\n \n http://github.com/wxianfeng/hanzi_to_pinyin\n\n
|
81
|
-
\ Enjoy!\n\n
|
80
|
+
\ Enjoy!\n\n wxianfeng (522096432@qq.com)\n\n"
|
82
81
|
rdoc_options: []
|
83
82
|
require_paths:
|
84
83
|
- lib
|
85
84
|
required_ruby_version: !ruby/object:Gem::Requirement
|
86
85
|
none: false
|
87
86
|
requirements:
|
88
|
-
- -
|
87
|
+
- - ">="
|
89
88
|
- !ruby/object:Gem::Version
|
90
89
|
version: '0'
|
91
90
|
segments:
|
92
91
|
- 0
|
93
|
-
hash:
|
92
|
+
hash: -866964413225385331
|
94
93
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
94
|
none: false
|
96
95
|
requirements:
|
97
|
-
- -
|
96
|
+
- - ">="
|
98
97
|
- !ruby/object:Gem::Version
|
99
98
|
version: 1.3.6
|
100
99
|
requirements: []
|
101
100
|
rubyforge_project:
|
102
|
-
rubygems_version: 1.8.
|
101
|
+
rubygems_version: 1.8.25
|
103
102
|
signing_key:
|
104
103
|
specification_version: 3
|
105
104
|
summary: chinese hanzi to pinyin , fetch first letter OR full pinyin
|
data/README.rdoc
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
= hanzi_to_pinyin
|
2
|
-
|
3
|
-
translate chinese to pinyin, fetch first letter OR full pinyin
|
4
|
-
|
5
|
-
== Install
|
6
|
-
|
7
|
-
rails3
|
8
|
-
|
9
|
-
$ gem 'hanzi_to_pinyin', '0.8.0', require: 'hanzi_to_pinyin'
|
10
|
-
or latest develop version
|
11
|
-
$ gem 'hanzi_to_pinyin', git: 'git://github.com/wxianfeng/hanzi_to_pinyin.git'
|
12
|
-
or freeze ref version
|
13
|
-
$ gem 'hanzi_to_pinyin', git: 'git://github.com/wxianfeng/hanzi_to_pinyin.git', ref: 5fa43b0
|
14
|
-
|
15
|
-
== Usage
|
16
|
-
|
17
|
-
$ HanziToPinyin.hanzi_to_pinyin("中华人民共和国") => "zhrmghg"
|
18
|
-
$ HanziToPinyin.hanzi_2_pinyin("中华人民共和国") => "zhrmghg"
|
19
|
-
$ HanziToPinyin.hanzi_to_pinyin("喜欢Ruby") => "xhruby"
|
20
|
-
$ HanziToPinyin.hanzi_2_pinyin("喜欢Ruby") => "xhruby"
|
21
|
-
|
22
|
-
$ HanziToPinyin.is_hanzi?("你") => true
|
23
|
-
$ HanziToPinyin.is_hanzi?("a") => false
|
24
|
-
|
25
|
-
# 多音字,分隔 字字之间;分隔,字母丢弃
|
26
|
-
$ HanziToPinyin.hanzi_2_py("我们") => "wo;men"
|
27
|
-
$ HanziToPinyin.hanzi_2_py("查理Smith") => "cha,zha;li"
|
28
|
-
$ HanziToPinyin.hanzi_2_py("测试1") => "ce;shi;1"
|
29
|
-
$ HanziToPinyin.hanzi_2_py("测_试") => "ce;_;shi"
|
30
|
-
$ HanziToPinyin.hanzi_2_py("测-试").should == "ce;-;shi"
|
31
|
-
|
32
|
-
$ HanziToPinyin.is_number?("1".ord) => true
|
33
|
-
$ HanziToPinyin.is_number?("a".ord) => false
|
34
|
-
|
35
|
-
$ HanziToPinyin.is_underline?("_".ord).should be_true
|
36
|
-
$ HanziToPinyin.is_underline?("豆豆").should be_false
|
37
|
-
$ HanziToPinyin.is_dash?("-".ord).should be_true
|
38
|
-
|
39
|
-
== Test
|
40
|
-
$ rake spec
|
41
|
-
or spec one case
|
42
|
-
$ rspec spec/hanzi_to_pinyin/hanzi_to_pinyin_spec.rb -l 21
|
43
|
-
|
44
|
-
== Copyright
|
45
|
-
|
46
|
-
Copyright (c) 2011 wxianfeng. See LICENSE.txt for
|
47
|
-
further details.
|