toPinyin 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ require 'benchmark'
2
+ require 'toPinyin'
3
+
4
+
5
+ Benchmark.bm(15) do |x|
6
+
7
+ pinyins=[]
8
+ words = "
9
+ 人人
10
+ 没有
11
+ 理想
12
+ 跟跟
13
+ 咸鱼
14
+ 有有
15
+ 什么
16
+ 区别".split("\n")
17
+
18
+ #take 4 seconds
19
+ x.report("convert80") { pinyins = (words * 10).map {|w| w.pinyin} }
20
+ x.report("convert800") { pinyins = (words * 100).map {|w| w.pinyin} }
21
+ x.report("sort:") { pinyins.sort {|a , b| a <=> b}}
22
+ end
23
+
24
+
data/lib/toPinyin.rb CHANGED
@@ -16,6 +16,7 @@ class String
16
16
  | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
17
17
  )*\z/mnx
18
18
 
19
+ @@mm = uniToPyMap
19
20
 
20
21
  def utf8?
21
22
  self =~ UTF8REGEX
@@ -41,17 +42,19 @@ def validate_utf8
41
42
  Iconv.iconv('UTF-8//IGNORE', 'UTF-8', (self + ' ') ).first[0..-2]
42
43
  end
43
44
 
44
- def pinyin
45
+ def pinyin
46
+
45
47
  scan(/./mu).map do |c|
46
48
  #conver to unicode
47
49
  #u = Iconv.iconv("UNICODEBIG","utf-8",c)[0].each_byte.map {|b| b.to_s(16)}.join
48
50
  u=sprintf("%04X", c.unpack("U*").first)
49
51
  #handle a-z, A-Z
50
- if u =~ /^00/
52
+ if u =~ /^00/
51
53
  #return c as it is
52
54
  c
53
55
  else
54
- uniToPyMap[u.upcase].chop unless uniToPyMap[u.upcase].nil?
56
+ m = @@mm[u]
57
+ m.chop unless m.nil?
55
58
  end
56
59
  end
57
60
  end
@@ -1,3 +1,3 @@
1
1
  module Topinyin
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: toPinyin
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - pierr.chen
@@ -34,6 +34,7 @@ files:
34
34
  - README
35
35
  - Rakefile
36
36
  - lib/Uni2Pinyin
37
+ - lib/performance.rb
37
38
  - lib/test.rb
38
39
  - lib/toPinyin.rb
39
40
  - lib/toPinyin/uniToPyMap.rb