hz2py 0.0.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +8 -1
- data/hz2py.gemspec +1 -1
- data/lib/elvuel/hz2py.rb +38 -69
- data/spec/spec_helper.rb +1 -2
- metadata +32 -53
data/README.rdoc
CHANGED
@@ -13,27 +13,34 @@
|
|
13
13
|
As GEM
|
14
14
|
|
15
15
|
rails3: gem 'hz2py'
|
16
|
+
|
16
17
|
rails2: config.gem 'hz2py'
|
17
18
|
|
18
19
|
As PLUGIN
|
19
20
|
|
20
21
|
rails3: rails plugin install git://github.com/elvuel/hz2py.git
|
22
|
+
|
21
23
|
rails2: ./script/plugin install git://github.com/elvuel/hz2py.git
|
22
24
|
|
23
25
|
|
24
26
|
== Usage & Example
|
25
27
|
|
26
28
|
# in rails app add config/initializers/hz2py.rb
|
27
|
-
|
29
|
+
|
30
|
+
String.class_eval do
|
28
31
|
def to_pinyin
|
29
32
|
Hz2py.do(self)
|
30
33
|
end
|
31
34
|
end
|
32
35
|
|
33
36
|
Hz2py.do("你好") => "ni hao"
|
37
|
+
|
34
38
|
TraditionalAndSimplified.conv_t2s("鳳凰") => "凤凰"
|
39
|
+
|
35
40
|
TraditionalAndSimplified.conv_s2t("无心插柳柳成阴") => "無心插柳柳成陰"
|
41
|
+
|
36
42
|
Hz2py.do("你好", :join_with => '-') => "ni-hao"
|
43
|
+
|
37
44
|
Hz2py.do("愛你", :join_with => '-', :to_simplified => true) => "ai-ni"
|
38
45
|
|
39
46
|
== Test
|
data/hz2py.gemspec
CHANGED
data/lib/elvuel/hz2py.rb
CHANGED
@@ -196,12 +196,12 @@ module Elvuel
|
|
196
196
|
'mo' => %w( 6478 6479 8611 6a21 819c 78e8 6469 9b54 62b9 672b 83ab 58a8 9ed8 6cab 6f20 5bde 964c 8c1f 8309 84e6 998d 5aeb 9546 79e3 763c 8031 87c6 8c8a 8c98 ),
|
197
197
|
'mou' => %w( 8c0b 725f 67d0 53b6 54de 5a7a 7738 936a ),
|
198
198
|
'mu' => %w( 62c7 7261 4ea9 59c6 6bcd 5893 66ae 5e55 52df 6155 6728 76ee 7766 7267 7a46 4eeb 82dc 5452 6c90 6bea 94bc ),
|
199
|
-
'na' => %w( 62ff 54ea 5450 94a0 90a3 5a1c 7eb3
|
199
|
+
'na' => %w( 62ff 54ea 5450 94a0 90a3 5a1c 7eb3 637a 80ad 954e 8872 7bac ),
|
200
200
|
'nai' => %w( 6c16 4e43 5976 8010 5948 9f10 827f 8418 67f0 ),
|
201
201
|
'nan' => %w( 5357 7537 96be 56ca 5583 56e1 6960 8169 877b 8d67 ),
|
202
202
|
'nao' => %w( 6320 8111 607c 95f9 5b6c 57b4 7331 7459 7847 94d9 86f2 ),
|
203
203
|
'ne' => %w( 6dd6 5462 8bb7 ),
|
204
|
-
'nei' => %w( 9981 ),
|
204
|
+
'nei' => %w( 9981 5185 ),
|
205
205
|
'nen' => %w( 5ae9 80fd 6798 6041 ),
|
206
206
|
'ni' => %w( 59ae 9713 502a 6ce5 5c3c 62df 4f60 533f 817b 9006 6eba 4f32 576d 730a 6029 6ee0 6635 65ce 7962 615d 7768 94cc 9cb5 ),
|
207
207
|
'nian' => %w( 852b 62c8 5e74 78be 64b5 637b 5ff5 5eff 8f87 9ecf 9c87 9cb6 ),
|
@@ -215,8 +215,7 @@ module Elvuel
|
|
215
215
|
'nu' => %w( 5974 52aa 6012 5476 5e11 5f29 80ec 5b65 9a7d ),
|
216
216
|
'nv' => %w( 5973 6067 9495 8844 ),
|
217
217
|
'nuan' => %w( 6696 ),
|
218
|
-
'
|
219
|
-
'nue' => %w( 759f 8c11 ),
|
218
|
+
'nue' => %w( 8650 759f 8c11 ),
|
220
219
|
'nuo' => %w( 632a 61e6 7cef 8bfa 50a9 6426 558f 9518 ),
|
221
220
|
'ou' => %w( 54e6 6b27 9e25 6bb4 85d5 5455 5076 6ca4 6004 74ef 8026 ),
|
222
221
|
'pa' => %w( 556a 8db4 722c 5e15 6015 7436 8469 7b62 ),
|
@@ -253,8 +252,8 @@ module Elvuel
|
|
253
252
|
'ran' => %w( 7136 71c3 5189 67d3 82d2 9aef ),
|
254
253
|
'rang' => %w( 74e4 58e4 6518 56b7 8ba9 79b3 7a70 ),
|
255
254
|
'rao' => %w( 9976 6270 7ed5 835b 5a06 6861 ),
|
256
|
-
'ruo' => %w( 60f9 82e5 5f31 ),
|
257
|
-
're' => %w( 70ed
|
255
|
+
'ruo' => %w( 60f9 82e5 5f31 504c ),
|
256
|
+
're' => %w( 70ed ),
|
258
257
|
'ren' => %w( 58ec 4ec1 4eba 5fcd 97e7 4efb 8ba4 5203 598a 7eab 4ede 834f 845a 996a 8f6b 7a14 887d ),
|
259
258
|
'reng' => %w( 6254 4ecd ),
|
260
259
|
'ri' => %w( 65e5 ),
|
@@ -412,46 +411,6 @@ module Elvuel
|
|
412
411
|
module Hz2py
|
413
412
|
|
414
413
|
class << self
|
415
|
-
def diff_uni_asc(s)
|
416
|
-
return "" unless block_given?
|
417
|
-
str = s.gsub(/#{@@sbc_hash.keys.join("|")}/){ |c| @@sbc_hash[c] }
|
418
|
-
bytes = str.each_byte.collect{|b| b }
|
419
|
-
while bytes.length > 0
|
420
|
-
num = bytes[0]
|
421
|
-
if (num >= 224 and num <= 239) or (num >= 128 and num <= 191)
|
422
|
-
yield bytes[0..2]
|
423
|
-
# u 3 times
|
424
|
-
bytes.shift
|
425
|
-
bytes.shift
|
426
|
-
bytes.shift
|
427
|
-
else
|
428
|
-
yield bytes[0]
|
429
|
-
bytes.shift
|
430
|
-
end
|
431
|
-
end
|
432
|
-
end
|
433
|
-
|
434
|
-
def utf8_to_unicode(utf8chr)
|
435
|
-
bins = utf8chr.to_i(16).to_s(2)
|
436
|
-
s1, s2, s3 = bins[0..7], bins[8..15], bins[16..23]
|
437
|
-
s1, s2, s3 = s1[4..7], s2[2..7], s3[2..7]
|
438
|
-
result = s1 + s2 + s3
|
439
|
-
while result[0].chr == "0"
|
440
|
-
result = result[1..result.length]
|
441
|
-
end
|
442
|
-
result.to_i(2).to_s(16)
|
443
|
-
end
|
444
|
-
|
445
|
-
def unicode_to_utf8(unichr)
|
446
|
-
bins = unichr.to_i(16).to_s(2)
|
447
|
-
(16 - bins.length).times{ bins = "0" + bins }
|
448
|
-
s1 = "1110" + bins[0..3]
|
449
|
-
s2 = "10" + bins[4..9]
|
450
|
-
s3 = "10" + bins[10..15]
|
451
|
-
result = s1 + s2 + s3
|
452
|
-
result.to_i(2).to_s(16)
|
453
|
-
end
|
454
|
-
|
455
414
|
def fetch_py(u)
|
456
415
|
@@dic.each do |k,v|
|
457
416
|
return k if v.index u
|
@@ -461,35 +420,45 @@ module Elvuel
|
|
461
420
|
|
462
421
|
def do(s, options={})
|
463
422
|
return "" if s.to_s.empty?
|
464
|
-
str = s.to_s
|
423
|
+
str = s.to_s.strip
|
465
424
|
delimiter = ' '
|
466
425
|
to_simplified = false
|
467
426
|
if options.is_a?(Hash)
|
468
|
-
delimiter = ' '
|
469
|
-
delimiter = options[:join_with] if options[:join_with]
|
427
|
+
delimiter = options[:join_with] || ' '
|
470
428
|
delimiter = ' ' if delimiter.length > 1
|
471
429
|
to_simplified = options[:to_simplified]
|
472
430
|
end
|
473
|
-
str =
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
if
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
431
|
+
str = TraditionalAndSimplified.conv_t2s(str) if to_simplified
|
432
|
+
str.gsub!(/#{@@sbc_hash.keys.join("|")}/){ |c| @@sbc_hash[c] }
|
433
|
+
utf8_chrs = str.unpack("U*")
|
434
|
+
chrs_size = utf8_chrs.size
|
435
|
+
result = utf8_chrs.each_with_index.collect do |num, index|
|
436
|
+
if num > 255
|
437
|
+
py = ""
|
438
|
+
if index == 0
|
439
|
+
py << fetch_py(num.to_s(16))
|
440
|
+
unless (index + 1) >= chrs_size
|
441
|
+
py << delimiter if utf8_chrs[index+1] <= 255
|
442
|
+
end
|
443
|
+
elsif index == chrs_size - 1
|
444
|
+
py << delimiter
|
445
|
+
py << fetch_py(num.to_s(16))
|
446
|
+
else
|
447
|
+
py << delimiter
|
448
|
+
py << fetch_py(num.to_s(16))
|
449
|
+
unless (index + 1) >= chrs_size
|
450
|
+
py << delimiter if utf8_chrs[index+1] <= 255
|
451
|
+
end
|
452
|
+
end
|
453
|
+
py
|
484
454
|
else
|
485
|
-
|
486
|
-
end
|
487
|
-
end
|
488
|
-
result[-1] = "" if result[-1].chr == delimiter
|
455
|
+
num.chr
|
456
|
+
end # if num
|
457
|
+
end.join
|
489
458
|
result.strip
|
490
|
-
end
|
491
|
-
|
492
|
-
end
|
459
|
+
end # do
|
460
|
+
|
461
|
+
end # module function
|
493
462
|
|
494
|
-
end
|
495
|
-
end
|
463
|
+
end # Hz2py
|
464
|
+
end # Elvuel
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,46 +1,35 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: hz2py
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 4
|
9
|
-
version: 0.0.4
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- elvuel
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-11-10 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: rspec
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &10464800 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
18
|
+
requirements:
|
26
19
|
- - ~>
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 1
|
30
|
-
- 3
|
31
|
-
- 0
|
20
|
+
- !ruby/object:Gem::Version
|
32
21
|
version: 1.3.0
|
33
22
|
type: :development
|
34
|
-
|
35
|
-
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *10464800
|
25
|
+
description: ! '汉字转拼音实现, Chinese pinyin conversion. Chinese Traditional and Simplified
|
26
|
+
conversion '
|
36
27
|
email: elvuel@gmail.com
|
37
|
-
executables:
|
28
|
+
executables:
|
38
29
|
- hz2py
|
39
30
|
extensions: []
|
40
|
-
|
41
31
|
extra_rdoc_files: []
|
42
|
-
|
43
|
-
files:
|
32
|
+
files:
|
44
33
|
- .gitignore
|
45
34
|
- Gemfile
|
46
35
|
- README.rdoc
|
@@ -54,39 +43,29 @@ files:
|
|
54
43
|
- spec/hz2py_spec.rb
|
55
44
|
- spec/spec_helper.rb
|
56
45
|
- spec/ts_spec.rb
|
57
|
-
has_rdoc: true
|
58
46
|
homepage: http://github.com/elvuel
|
59
47
|
licenses: []
|
60
|
-
|
61
48
|
post_install_message:
|
62
49
|
rdoc_options: []
|
63
|
-
|
64
|
-
require_paths:
|
50
|
+
require_paths:
|
65
51
|
- lib
|
66
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
53
|
none: false
|
68
|
-
requirements:
|
69
|
-
- -
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
|
72
|
-
|
73
|
-
version: "0"
|
74
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
59
|
none: false
|
76
|
-
requirements:
|
77
|
-
- -
|
78
|
-
- !ruby/object:Gem::Version
|
79
|
-
|
80
|
-
- 0
|
81
|
-
version: "0"
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
82
64
|
requirements: []
|
83
|
-
|
84
65
|
rubyforge_project: hz2py
|
85
|
-
rubygems_version: 1.
|
66
|
+
rubygems_version: 1.8.10
|
86
67
|
signing_key:
|
87
68
|
specification_version: 3
|
88
|
-
summary:
|
89
|
-
test_files:
|
90
|
-
|
91
|
-
- spec/spec_helper.rb
|
92
|
-
- spec/ts_spec.rb
|
69
|
+
summary: 汉字转拼音,汉字繁简转换-Chinese-Pinyin Conversion
|
70
|
+
test_files: []
|
71
|
+
has_rdoc: false
|