hz2py 0.0.4 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,27 +13,34 @@
13
13
  As GEM
14
14
 
15
15
  rails3: gem 'hz2py'
16
+
16
17
  rails2: config.gem 'hz2py'
17
18
 
18
19
  As PLUGIN
19
20
 
20
21
  rails3: rails plugin install git://github.com/elvuel/hz2py.git
22
+
21
23
  rails2: ./script/plugin install git://github.com/elvuel/hz2py.git
22
24
 
23
25
 
24
26
  == Usage & Example
25
27
 
26
28
  # in rails app add config/initializers/hz2py.rb
27
- class String
29
+
30
+ String.class_eval do
28
31
  def to_pinyin
29
32
  Hz2py.do(self)
30
33
  end
31
34
  end
32
35
 
33
36
  Hz2py.do("你好") => "ni hao"
37
+
34
38
  TraditionalAndSimplified.conv_t2s("鳳凰") => "凤凰"
39
+
35
40
  TraditionalAndSimplified.conv_s2t("无心插柳柳成阴") => "無心插柳柳成陰"
41
+
36
42
  Hz2py.do("你好", :join_with => '-') => "ni-hao"
43
+
37
44
  Hz2py.do("愛你", :join_with => '-', :to_simplified => true) => "ai-ni"
38
45
 
39
46
  == Test
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "hz2py"
6
- s.version = "0.0.4"
6
+ s.version = "1.0.0"
7
7
  s.platform = Gem::Platform::RUBY
8
8
  s.authors = "elvuel"
9
9
  s.email = "elvuel@gmail.com"
@@ -196,12 +196,12 @@ module Elvuel
196
196
  'mo' => %w( 6478 6479 8611 6a21 819c 78e8 6469 9b54 62b9 672b 83ab 58a8 9ed8 6cab 6f20 5bde 964c 8c1f 8309 84e6 998d 5aeb 9546 79e3 763c 8031 87c6 8c8a 8c98 ),
197
197
  'mou' => %w( 8c0b 725f 67d0 53b6 54de 5a7a 7738 936a ),
198
198
  'mu' => %w( 62c7 7261 4ea9 59c6 6bcd 5893 66ae 5e55 52df 6155 6728 76ee 7766 7267 7a46 4eeb 82dc 5452 6c90 6bea 94bc ),
199
- 'na' => %w( 62ff 54ea 5450 94a0 90a3 5a1c 7eb3 5185 637a 80ad 954e 8872 7bac ),
199
+ 'na' => %w( 62ff 54ea 5450 94a0 90a3 5a1c 7eb3 637a 80ad 954e 8872 7bac ),
200
200
  'nai' => %w( 6c16 4e43 5976 8010 5948 9f10 827f 8418 67f0 ),
201
201
  'nan' => %w( 5357 7537 96be 56ca 5583 56e1 6960 8169 877b 8d67 ),
202
202
  'nao' => %w( 6320 8111 607c 95f9 5b6c 57b4 7331 7459 7847 94d9 86f2 ),
203
203
  'ne' => %w( 6dd6 5462 8bb7 ),
204
- 'nei' => %w( 9981 ),
204
+ 'nei' => %w( 9981 5185 ),
205
205
  'nen' => %w( 5ae9 80fd 6798 6041 ),
206
206
  'ni' => %w( 59ae 9713 502a 6ce5 5c3c 62df 4f60 533f 817b 9006 6eba 4f32 576d 730a 6029 6ee0 6635 65ce 7962 615d 7768 94cc 9cb5 ),
207
207
  'nian' => %w( 852b 62c8 5e74 78be 64b5 637b 5ff5 5eff 8f87 9ecf 9c87 9cb6 ),
@@ -215,8 +215,7 @@ module Elvuel
215
215
  'nu' => %w( 5974 52aa 6012 5476 5e11 5f29 80ec 5b65 9a7d ),
216
216
  'nv' => %w( 5973 6067 9495 8844 ),
217
217
  'nuan' => %w( 6696 ),
218
- 'nuenue' => %w( 8650 ),
219
- 'nue' => %w( 759f 8c11 ),
218
+ 'nue' => %w( 8650 759f 8c11 ),
220
219
  'nuo' => %w( 632a 61e6 7cef 8bfa 50a9 6426 558f 9518 ),
221
220
  'ou' => %w( 54e6 6b27 9e25 6bb4 85d5 5455 5076 6ca4 6004 74ef 8026 ),
222
221
  'pa' => %w( 556a 8db4 722c 5e15 6015 7436 8469 7b62 ),
@@ -253,8 +252,8 @@ module Elvuel
253
252
  'ran' => %w( 7136 71c3 5189 67d3 82d2 9aef ),
254
253
  'rang' => %w( 74e4 58e4 6518 56b7 8ba9 79b3 7a70 ),
255
254
  'rao' => %w( 9976 6270 7ed5 835b 5a06 6861 ),
256
- 'ruo' => %w( 60f9 82e5 5f31 ),
257
- 're' => %w( 70ed 504c ),
255
+ 'ruo' => %w( 60f9 82e5 5f31 504c ),
256
+ 're' => %w( 70ed ),
258
257
  'ren' => %w( 58ec 4ec1 4eba 5fcd 97e7 4efb 8ba4 5203 598a 7eab 4ede 834f 845a 996a 8f6b 7a14 887d ),
259
258
  'reng' => %w( 6254 4ecd ),
260
259
  'ri' => %w( 65e5 ),
@@ -412,46 +411,6 @@ module Elvuel
412
411
  module Hz2py
413
412
 
414
413
  class << self
415
- def diff_uni_asc(s)
416
- return "" unless block_given?
417
- str = s.gsub(/#{@@sbc_hash.keys.join("|")}/){ |c| @@sbc_hash[c] }
418
- bytes = str.each_byte.collect{|b| b }
419
- while bytes.length > 0
420
- num = bytes[0]
421
- if (num >= 224 and num <= 239) or (num >= 128 and num <= 191)
422
- yield bytes[0..2]
423
- # u 3 times
424
- bytes.shift
425
- bytes.shift
426
- bytes.shift
427
- else
428
- yield bytes[0]
429
- bytes.shift
430
- end
431
- end
432
- end
433
-
434
- def utf8_to_unicode(utf8chr)
435
- bins = utf8chr.to_i(16).to_s(2)
436
- s1, s2, s3 = bins[0..7], bins[8..15], bins[16..23]
437
- s1, s2, s3 = s1[4..7], s2[2..7], s3[2..7]
438
- result = s1 + s2 + s3
439
- while result[0].chr == "0"
440
- result = result[1..result.length]
441
- end
442
- result.to_i(2).to_s(16)
443
- end
444
-
445
- def unicode_to_utf8(unichr)
446
- bins = unichr.to_i(16).to_s(2)
447
- (16 - bins.length).times{ bins = "0" + bins }
448
- s1 = "1110" + bins[0..3]
449
- s2 = "10" + bins[4..9]
450
- s3 = "10" + bins[10..15]
451
- result = s1 + s2 + s3
452
- result.to_i(2).to_s(16)
453
- end
454
-
455
414
  def fetch_py(u)
456
415
  @@dic.each do |k,v|
457
416
  return k if v.index u
@@ -461,35 +420,45 @@ module Elvuel
461
420
 
462
421
  def do(s, options={})
463
422
  return "" if s.to_s.empty?
464
- str = s.to_s
423
+ str = s.to_s.strip
465
424
  delimiter = ' '
466
425
  to_simplified = false
467
426
  if options.is_a?(Hash)
468
- delimiter = ' '
469
- delimiter = options[:join_with] if options[:join_with]
427
+ delimiter = options[:join_with] || ' '
470
428
  delimiter = ' ' if delimiter.length > 1
471
429
  to_simplified = options[:to_simplified]
472
430
  end
473
- str = ::TraditionalAndSimplified.conv_t2s(str) if to_simplified
474
- result = ""
475
- chrs = []
476
- diff_uni_asc(str){ |out| chrs << out }
477
- chrs.each_with_index do |item, index|
478
- if item.is_a? Array
479
- utf8chr = item.collect { |n| n.to_s(16) }.join("")
480
- uni_hex = utf8_to_unicode(utf8chr)
481
- result << delimiter if chrs[index-1].is_a?(Fixnum) if index >= 1
482
- result << fetch_py(uni_hex)
483
- result << delimiter
431
+ str = TraditionalAndSimplified.conv_t2s(str) if to_simplified
432
+ str.gsub!(/#{@@sbc_hash.keys.join("|")}/){ |c| @@sbc_hash[c] }
433
+ utf8_chrs = str.unpack("U*")
434
+ chrs_size = utf8_chrs.size
435
+ result = utf8_chrs.each_with_index.collect do |num, index|
436
+ if num > 255
437
+ py = ""
438
+ if index == 0
439
+ py << fetch_py(num.to_s(16))
440
+ unless (index + 1) >= chrs_size
441
+ py << delimiter if utf8_chrs[index+1] <= 255
442
+ end
443
+ elsif index == chrs_size - 1
444
+ py << delimiter
445
+ py << fetch_py(num.to_s(16))
446
+ else
447
+ py << delimiter
448
+ py << fetch_py(num.to_s(16))
449
+ unless (index + 1) >= chrs_size
450
+ py << delimiter if utf8_chrs[index+1] <= 255
451
+ end
452
+ end
453
+ py
484
454
  else
485
- result << item.chr
486
- end
487
- end
488
- result[-1] = "" if result[-1].chr == delimiter
455
+ num.chr
456
+ end # if num
457
+ end.join
489
458
  result.strip
490
- end
491
-
492
- end
459
+ end # do
460
+
461
+ end # module function
493
462
 
494
- end
495
- end
463
+ end # Hz2py
464
+ end # Elvuel
@@ -2,7 +2,6 @@
2
2
  $:.unshift File.expand_path('..', __FILE__)
3
3
  $:.unshift File.expand_path('../../lib', __FILE__)
4
4
 
5
- require 'rubygems'
6
- require 'bundler'
5
+ require 'rubygems' unless defined? Gem
7
6
  require 'rspec'
8
7
  require 'hz2py'
metadata CHANGED
@@ -1,46 +1,35 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: hz2py
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 0
8
- - 4
9
- version: 0.0.4
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - elvuel
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-01-19 00:00:00 +08:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2011-11-10 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: rspec
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &10464800 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
18
+ requirements:
26
19
  - - ~>
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 1
30
- - 3
31
- - 0
20
+ - !ruby/object:Gem::Version
32
21
  version: 1.3.0
33
22
  type: :development
34
- version_requirements: *id001
35
- description: "\xE6\xB1\x89\xE5\xAD\x97\xE8\xBD\xAC\xE6\x8B\xBC\xE9\x9F\xB3\xE5\xAE\x9E\xE7\x8E\xB0, Chinese pinyin conversion. Chinese Traditional and Simplified conversion "
23
+ prerelease: false
24
+ version_requirements: *10464800
25
+ description: ! '汉字转拼音实现, Chinese pinyin conversion. Chinese Traditional and Simplified
26
+ conversion '
36
27
  email: elvuel@gmail.com
37
- executables:
28
+ executables:
38
29
  - hz2py
39
30
  extensions: []
40
-
41
31
  extra_rdoc_files: []
42
-
43
- files:
32
+ files:
44
33
  - .gitignore
45
34
  - Gemfile
46
35
  - README.rdoc
@@ -54,39 +43,29 @@ files:
54
43
  - spec/hz2py_spec.rb
55
44
  - spec/spec_helper.rb
56
45
  - spec/ts_spec.rb
57
- has_rdoc: true
58
46
  homepage: http://github.com/elvuel
59
47
  licenses: []
60
-
61
48
  post_install_message:
62
49
  rdoc_options: []
63
-
64
- require_paths:
50
+ require_paths:
65
51
  - lib
66
- required_ruby_version: !ruby/object:Gem::Requirement
52
+ required_ruby_version: !ruby/object:Gem::Requirement
67
53
  none: false
68
- requirements:
69
- - - ">="
70
- - !ruby/object:Gem::Version
71
- segments:
72
- - 0
73
- version: "0"
74
- required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
59
  none: false
76
- requirements:
77
- - - ">="
78
- - !ruby/object:Gem::Version
79
- segments:
80
- - 0
81
- version: "0"
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
82
64
  requirements: []
83
-
84
65
  rubyforge_project: hz2py
85
- rubygems_version: 1.3.7
66
+ rubygems_version: 1.8.10
86
67
  signing_key:
87
68
  specification_version: 3
88
- summary: "\xE6\xB1\x89\xE5\xAD\x97\xE8\xBD\xAC\xE6\x8B\xBC\xE9\x9F\xB3,\xE6\xB1\x89\xE5\xAD\x97\xE7\xB9\x81\xE7\xAE\x80\xE8\xBD\xAC\xE6\x8D\xA2\xEF\xBC\x8DChinese-Pinyin Conversion"
89
- test_files:
90
- - spec/hz2py_spec.rb
91
- - spec/spec_helper.rb
92
- - spec/ts_spec.rb
69
+ summary: 汉字转拼音,汉字繁简转换-Chinese-Pinyin Conversion
70
+ test_files: []
71
+ has_rdoc: false