hz2py 0.0.4 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +8 -1
- data/hz2py.gemspec +1 -1
- data/lib/elvuel/hz2py.rb +38 -69
- data/spec/spec_helper.rb +1 -2
- metadata +32 -53
data/README.rdoc
CHANGED
@@ -13,27 +13,34 @@
|
|
13
13
|
As GEM
|
14
14
|
|
15
15
|
rails3: gem 'hz2py'
|
16
|
+
|
16
17
|
rails2: config.gem 'hz2py'
|
17
18
|
|
18
19
|
As PLUGIN
|
19
20
|
|
20
21
|
rails3: rails plugin install git://github.com/elvuel/hz2py.git
|
22
|
+
|
21
23
|
rails2: ./script/plugin install git://github.com/elvuel/hz2py.git
|
22
24
|
|
23
25
|
|
24
26
|
== Usage & Example
|
25
27
|
|
26
28
|
# in rails app add config/initializers/hz2py.rb
|
27
|
-
|
29
|
+
|
30
|
+
String.class_eval do
|
28
31
|
def to_pinyin
|
29
32
|
Hz2py.do(self)
|
30
33
|
end
|
31
34
|
end
|
32
35
|
|
33
36
|
Hz2py.do("你好") => "ni hao"
|
37
|
+
|
34
38
|
TraditionalAndSimplified.conv_t2s("鳳凰") => "凤凰"
|
39
|
+
|
35
40
|
TraditionalAndSimplified.conv_s2t("无心插柳柳成阴") => "無心插柳柳成陰"
|
41
|
+
|
36
42
|
Hz2py.do("你好", :join_with => '-') => "ni-hao"
|
43
|
+
|
37
44
|
Hz2py.do("愛你", :join_with => '-', :to_simplified => true) => "ai-ni"
|
38
45
|
|
39
46
|
== Test
|
data/hz2py.gemspec
CHANGED
data/lib/elvuel/hz2py.rb
CHANGED
@@ -196,12 +196,12 @@ module Elvuel
|
|
196
196
|
'mo' => %w( 6478 6479 8611 6a21 819c 78e8 6469 9b54 62b9 672b 83ab 58a8 9ed8 6cab 6f20 5bde 964c 8c1f 8309 84e6 998d 5aeb 9546 79e3 763c 8031 87c6 8c8a 8c98 ),
|
197
197
|
'mou' => %w( 8c0b 725f 67d0 53b6 54de 5a7a 7738 936a ),
|
198
198
|
'mu' => %w( 62c7 7261 4ea9 59c6 6bcd 5893 66ae 5e55 52df 6155 6728 76ee 7766 7267 7a46 4eeb 82dc 5452 6c90 6bea 94bc ),
|
199
|
-
'na' => %w( 62ff 54ea 5450 94a0 90a3 5a1c 7eb3
|
199
|
+
'na' => %w( 62ff 54ea 5450 94a0 90a3 5a1c 7eb3 637a 80ad 954e 8872 7bac ),
|
200
200
|
'nai' => %w( 6c16 4e43 5976 8010 5948 9f10 827f 8418 67f0 ),
|
201
201
|
'nan' => %w( 5357 7537 96be 56ca 5583 56e1 6960 8169 877b 8d67 ),
|
202
202
|
'nao' => %w( 6320 8111 607c 95f9 5b6c 57b4 7331 7459 7847 94d9 86f2 ),
|
203
203
|
'ne' => %w( 6dd6 5462 8bb7 ),
|
204
|
-
'nei' => %w( 9981 ),
|
204
|
+
'nei' => %w( 9981 5185 ),
|
205
205
|
'nen' => %w( 5ae9 80fd 6798 6041 ),
|
206
206
|
'ni' => %w( 59ae 9713 502a 6ce5 5c3c 62df 4f60 533f 817b 9006 6eba 4f32 576d 730a 6029 6ee0 6635 65ce 7962 615d 7768 94cc 9cb5 ),
|
207
207
|
'nian' => %w( 852b 62c8 5e74 78be 64b5 637b 5ff5 5eff 8f87 9ecf 9c87 9cb6 ),
|
@@ -215,8 +215,7 @@ module Elvuel
|
|
215
215
|
'nu' => %w( 5974 52aa 6012 5476 5e11 5f29 80ec 5b65 9a7d ),
|
216
216
|
'nv' => %w( 5973 6067 9495 8844 ),
|
217
217
|
'nuan' => %w( 6696 ),
|
218
|
-
'
|
219
|
-
'nue' => %w( 759f 8c11 ),
|
218
|
+
'nue' => %w( 8650 759f 8c11 ),
|
220
219
|
'nuo' => %w( 632a 61e6 7cef 8bfa 50a9 6426 558f 9518 ),
|
221
220
|
'ou' => %w( 54e6 6b27 9e25 6bb4 85d5 5455 5076 6ca4 6004 74ef 8026 ),
|
222
221
|
'pa' => %w( 556a 8db4 722c 5e15 6015 7436 8469 7b62 ),
|
@@ -253,8 +252,8 @@ module Elvuel
|
|
253
252
|
'ran' => %w( 7136 71c3 5189 67d3 82d2 9aef ),
|
254
253
|
'rang' => %w( 74e4 58e4 6518 56b7 8ba9 79b3 7a70 ),
|
255
254
|
'rao' => %w( 9976 6270 7ed5 835b 5a06 6861 ),
|
256
|
-
'ruo' => %w( 60f9 82e5 5f31 ),
|
257
|
-
're' => %w( 70ed
|
255
|
+
'ruo' => %w( 60f9 82e5 5f31 504c ),
|
256
|
+
're' => %w( 70ed ),
|
258
257
|
'ren' => %w( 58ec 4ec1 4eba 5fcd 97e7 4efb 8ba4 5203 598a 7eab 4ede 834f 845a 996a 8f6b 7a14 887d ),
|
259
258
|
'reng' => %w( 6254 4ecd ),
|
260
259
|
'ri' => %w( 65e5 ),
|
@@ -412,46 +411,6 @@ module Elvuel
|
|
412
411
|
module Hz2py
|
413
412
|
|
414
413
|
class << self
|
415
|
-
def diff_uni_asc(s)
|
416
|
-
return "" unless block_given?
|
417
|
-
str = s.gsub(/#{@@sbc_hash.keys.join("|")}/){ |c| @@sbc_hash[c] }
|
418
|
-
bytes = str.each_byte.collect{|b| b }
|
419
|
-
while bytes.length > 0
|
420
|
-
num = bytes[0]
|
421
|
-
if (num >= 224 and num <= 239) or (num >= 128 and num <= 191)
|
422
|
-
yield bytes[0..2]
|
423
|
-
# u 3 times
|
424
|
-
bytes.shift
|
425
|
-
bytes.shift
|
426
|
-
bytes.shift
|
427
|
-
else
|
428
|
-
yield bytes[0]
|
429
|
-
bytes.shift
|
430
|
-
end
|
431
|
-
end
|
432
|
-
end
|
433
|
-
|
434
|
-
def utf8_to_unicode(utf8chr)
|
435
|
-
bins = utf8chr.to_i(16).to_s(2)
|
436
|
-
s1, s2, s3 = bins[0..7], bins[8..15], bins[16..23]
|
437
|
-
s1, s2, s3 = s1[4..7], s2[2..7], s3[2..7]
|
438
|
-
result = s1 + s2 + s3
|
439
|
-
while result[0].chr == "0"
|
440
|
-
result = result[1..result.length]
|
441
|
-
end
|
442
|
-
result.to_i(2).to_s(16)
|
443
|
-
end
|
444
|
-
|
445
|
-
def unicode_to_utf8(unichr)
|
446
|
-
bins = unichr.to_i(16).to_s(2)
|
447
|
-
(16 - bins.length).times{ bins = "0" + bins }
|
448
|
-
s1 = "1110" + bins[0..3]
|
449
|
-
s2 = "10" + bins[4..9]
|
450
|
-
s3 = "10" + bins[10..15]
|
451
|
-
result = s1 + s2 + s3
|
452
|
-
result.to_i(2).to_s(16)
|
453
|
-
end
|
454
|
-
|
455
414
|
def fetch_py(u)
|
456
415
|
@@dic.each do |k,v|
|
457
416
|
return k if v.index u
|
@@ -461,35 +420,45 @@ module Elvuel
|
|
461
420
|
|
462
421
|
def do(s, options={})
|
463
422
|
return "" if s.to_s.empty?
|
464
|
-
str = s.to_s
|
423
|
+
str = s.to_s.strip
|
465
424
|
delimiter = ' '
|
466
425
|
to_simplified = false
|
467
426
|
if options.is_a?(Hash)
|
468
|
-
delimiter = ' '
|
469
|
-
delimiter = options[:join_with] if options[:join_with]
|
427
|
+
delimiter = options[:join_with] || ' '
|
470
428
|
delimiter = ' ' if delimiter.length > 1
|
471
429
|
to_simplified = options[:to_simplified]
|
472
430
|
end
|
473
|
-
str =
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
if
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
431
|
+
str = TraditionalAndSimplified.conv_t2s(str) if to_simplified
|
432
|
+
str.gsub!(/#{@@sbc_hash.keys.join("|")}/){ |c| @@sbc_hash[c] }
|
433
|
+
utf8_chrs = str.unpack("U*")
|
434
|
+
chrs_size = utf8_chrs.size
|
435
|
+
result = utf8_chrs.each_with_index.collect do |num, index|
|
436
|
+
if num > 255
|
437
|
+
py = ""
|
438
|
+
if index == 0
|
439
|
+
py << fetch_py(num.to_s(16))
|
440
|
+
unless (index + 1) >= chrs_size
|
441
|
+
py << delimiter if utf8_chrs[index+1] <= 255
|
442
|
+
end
|
443
|
+
elsif index == chrs_size - 1
|
444
|
+
py << delimiter
|
445
|
+
py << fetch_py(num.to_s(16))
|
446
|
+
else
|
447
|
+
py << delimiter
|
448
|
+
py << fetch_py(num.to_s(16))
|
449
|
+
unless (index + 1) >= chrs_size
|
450
|
+
py << delimiter if utf8_chrs[index+1] <= 255
|
451
|
+
end
|
452
|
+
end
|
453
|
+
py
|
484
454
|
else
|
485
|
-
|
486
|
-
end
|
487
|
-
end
|
488
|
-
result[-1] = "" if result[-1].chr == delimiter
|
455
|
+
num.chr
|
456
|
+
end # if num
|
457
|
+
end.join
|
489
458
|
result.strip
|
490
|
-
end
|
491
|
-
|
492
|
-
end
|
459
|
+
end # do
|
460
|
+
|
461
|
+
end # module function
|
493
462
|
|
494
|
-
end
|
495
|
-
end
|
463
|
+
end # Hz2py
|
464
|
+
end # Elvuel
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,46 +1,35 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: hz2py
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 4
|
9
|
-
version: 0.0.4
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- elvuel
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-11-10 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: rspec
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &10464800 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
18
|
+
requirements:
|
26
19
|
- - ~>
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 1
|
30
|
-
- 3
|
31
|
-
- 0
|
20
|
+
- !ruby/object:Gem::Version
|
32
21
|
version: 1.3.0
|
33
22
|
type: :development
|
34
|
-
|
35
|
-
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *10464800
|
25
|
+
description: ! '汉字转拼音实现, Chinese pinyin conversion. Chinese Traditional and Simplified
|
26
|
+
conversion '
|
36
27
|
email: elvuel@gmail.com
|
37
|
-
executables:
|
28
|
+
executables:
|
38
29
|
- hz2py
|
39
30
|
extensions: []
|
40
|
-
|
41
31
|
extra_rdoc_files: []
|
42
|
-
|
43
|
-
files:
|
32
|
+
files:
|
44
33
|
- .gitignore
|
45
34
|
- Gemfile
|
46
35
|
- README.rdoc
|
@@ -54,39 +43,29 @@ files:
|
|
54
43
|
- spec/hz2py_spec.rb
|
55
44
|
- spec/spec_helper.rb
|
56
45
|
- spec/ts_spec.rb
|
57
|
-
has_rdoc: true
|
58
46
|
homepage: http://github.com/elvuel
|
59
47
|
licenses: []
|
60
|
-
|
61
48
|
post_install_message:
|
62
49
|
rdoc_options: []
|
63
|
-
|
64
|
-
require_paths:
|
50
|
+
require_paths:
|
65
51
|
- lib
|
66
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
53
|
none: false
|
68
|
-
requirements:
|
69
|
-
- -
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
|
72
|
-
|
73
|
-
version: "0"
|
74
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
59
|
none: false
|
76
|
-
requirements:
|
77
|
-
- -
|
78
|
-
- !ruby/object:Gem::Version
|
79
|
-
|
80
|
-
- 0
|
81
|
-
version: "0"
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
82
64
|
requirements: []
|
83
|
-
|
84
65
|
rubyforge_project: hz2py
|
85
|
-
rubygems_version: 1.
|
66
|
+
rubygems_version: 1.8.10
|
86
67
|
signing_key:
|
87
68
|
specification_version: 3
|
88
|
-
summary:
|
89
|
-
test_files:
|
90
|
-
|
91
|
-
- spec/spec_helper.rb
|
92
|
-
- spec/ts_spec.rb
|
69
|
+
summary: 汉字转拼音,汉字繁简转换-Chinese-Pinyin Conversion
|
70
|
+
test_files: []
|
71
|
+
has_rdoc: false
|