toPinyin 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,24 @@
1
+ require 'benchmark'
2
+ require 'toPinyin'
3
+
4
+
5
+ Benchmark.bm(15) do |x|
6
+
7
+ pinyins=[]
8
+ words = "
9
+ 人人
10
+ 没有
11
+ 理想
12
+ 跟跟
13
+ 咸鱼
14
+ 有有
15
+ 什么
16
+ 区别".split("\n")
17
+
18
+ #take 4 seconds
19
+ x.report("convert80") { pinyins = (words * 10).map {|w| w.pinyin} }
20
+ x.report("convert800") { pinyins = (words * 100).map {|w| w.pinyin} }
21
+ x.report("sort:") { pinyins.sort {|a , b| a <=> b}}
22
+ end
23
+
24
+
data/lib/toPinyin.rb CHANGED
@@ -16,6 +16,7 @@ class String
16
16
  | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
17
17
  )*\z/mnx
18
18
 
19
+ @@mm = uniToPyMap
19
20
 
20
21
  def utf8?
21
22
  self =~ UTF8REGEX
@@ -41,17 +42,19 @@ def validate_utf8
41
42
  Iconv.iconv('UTF-8//IGNORE', 'UTF-8', (self + ' ') ).first[0..-2]
42
43
  end
43
44
 
44
- def pinyin
45
+ def pinyin
46
+
45
47
  scan(/./mu).map do |c|
46
48
  #conver to unicode
47
49
  #u = Iconv.iconv("UNICODEBIG","utf-8",c)[0].each_byte.map {|b| b.to_s(16)}.join
48
50
  u=sprintf("%04X", c.unpack("U*").first)
49
51
  #handle a-z, A-Z
50
- if u =~ /^00/
52
+ if u =~ /^00/
51
53
  #return c as it is
52
54
  c
53
55
  else
54
- uniToPyMap[u.upcase].chop unless uniToPyMap[u.upcase].nil?
56
+ m = @@mm[u]
57
+ m.chop unless m.nil?
55
58
  end
56
59
  end
57
60
  end
@@ -1,3 +1,3 @@
1
1
  module Topinyin
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: toPinyin
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - pierr.chen
@@ -34,6 +34,7 @@ files:
34
34
  - README
35
35
  - Rakefile
36
36
  - lib/Uni2Pinyin
37
+ - lib/performance.rb
37
38
  - lib/test.rb
38
39
  - lib/toPinyin.rb
39
40
  - lib/toPinyin/uniToPyMap.rb