toPinyin 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/performance.rb +24 -0
- data/lib/toPinyin.rb +6 -3
- data/lib/toPinyin/version.rb +1 -1
- metadata +4 -3
data/lib/performance.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require 'toPinyin'
|
3
|
+
|
4
|
+
|
5
|
+
Benchmark.bm(15) do |x|
|
6
|
+
|
7
|
+
pinyins=[]
|
8
|
+
words = "
|
9
|
+
人人
|
10
|
+
没有
|
11
|
+
理想
|
12
|
+
跟跟
|
13
|
+
咸鱼
|
14
|
+
有有
|
15
|
+
什么
|
16
|
+
区别".split("\n")
|
17
|
+
|
18
|
+
#take 4 seconds
|
19
|
+
x.report("convert80") { pinyins = (words * 10).map {|w| w.pinyin} }
|
20
|
+
x.report("convert800") { pinyins = (words * 100).map {|w| w.pinyin} }
|
21
|
+
x.report("sort:") { pinyins.sort {|a , b| a <=> b}}
|
22
|
+
end
|
23
|
+
|
24
|
+
|
data/lib/toPinyin.rb
CHANGED
@@ -16,6 +16,7 @@ class String
|
|
16
16
|
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
17
17
|
)*\z/mnx
|
18
18
|
|
19
|
+
@@mm = uniToPyMap
|
19
20
|
|
20
21
|
def utf8?
|
21
22
|
self =~ UTF8REGEX
|
@@ -41,17 +42,19 @@ def validate_utf8
|
|
41
42
|
Iconv.iconv('UTF-8//IGNORE', 'UTF-8', (self + ' ') ).first[0..-2]
|
42
43
|
end
|
43
44
|
|
44
|
-
def pinyin
|
45
|
+
def pinyin
|
46
|
+
|
45
47
|
scan(/./mu).map do |c|
|
46
48
|
#conver to unicode
|
47
49
|
#u = Iconv.iconv("UNICODEBIG","utf-8",c)[0].each_byte.map {|b| b.to_s(16)}.join
|
48
50
|
u=sprintf("%04X", c.unpack("U*").first)
|
49
51
|
#handle a-z, A-Z
|
50
|
-
if
|
52
|
+
if u =~ /^00/
|
51
53
|
#return c as it is
|
52
54
|
c
|
53
55
|
else
|
54
|
-
|
56
|
+
m = @@mm[u]
|
57
|
+
m.chop unless m.nil?
|
55
58
|
end
|
56
59
|
end
|
57
60
|
end
|
data/lib/toPinyin/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: toPinyin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- pierr.chen
|
@@ -34,6 +34,7 @@ files:
|
|
34
34
|
- README
|
35
35
|
- Rakefile
|
36
36
|
- lib/Uni2Pinyin
|
37
|
+
- lib/performance.rb
|
37
38
|
- lib/test.rb
|
38
39
|
- lib/toPinyin.rb
|
39
40
|
- lib/toPinyin/uniToPyMap.rb
|