pinyin_split 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/pinyin_split/slipt.rb +97 -0
- data/lib/pinyin_split.rb +28 -0
- data/pinyin_split.gemspec +17 -0
- metadata +4 -1
@@ -0,0 +1,97 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
class PinyinSplit::Slipt
|
3
|
+
PINYINHASH = {
|
4
|
+
'a'=>['a', 'ai', 'an', 'ang', 'ao'],
|
5
|
+
'b'=>['ba', 'bai', 'ban', 'bang', 'bao',
|
6
|
+
'bei', 'ben', 'beng', 'bi', 'bian', 'biao', 'bie', 'bin', 'bing', 'bo', 'bu'],
|
7
|
+
'c'=>['ca', 'cai', 'can', 'cang', 'cao', 'ce', 'ceng', 'cha', 'chai',
|
8
|
+
'chan', 'chang', 'chao', 'che', 'chen', 'cheng', 'chi', 'chong', 'chou', 'chu',
|
9
|
+
'chuai', 'chuan', 'chuang', 'chui', 'chun', 'chuo', 'ci', 'cong', 'cou', 'cu',
|
10
|
+
'cuan', 'cui', 'cun', 'cuo'],
|
11
|
+
'd'=>['da', 'dai', 'dan', 'dang', 'dao', 'de',
|
12
|
+
'deng', 'di', 'dian', 'diao', 'die', 'ding', 'diu',
|
13
|
+
'dong', 'dou', 'du', 'duan', 'dui', 'dun', 'duo','dia'],
|
14
|
+
'e'=>['e', 'en', 'er'],
|
15
|
+
'f'=>['fa', 'fan', 'fang', 'fei', 'fen', 'feng', 'fu', 'fou'],
|
16
|
+
'g'=>['ga', 'gai', 'gan', 'gang', 'gao', 'ge', 'gen', 'geng',
|
17
|
+
'gong', 'gou', 'gu', 'gua', 'guai', 'guan', 'guang', 'gui', 'gun', 'guo'],
|
18
|
+
'h'=>['ha', 'hai', 'han', 'hang', 'hao', 'he',
|
19
|
+
'hei', 'hen', 'heng', 'hong', 'hou', 'hu',
|
20
|
+
'hua', 'huai', 'huan', 'huang', 'hui', 'hun', 'huo'],
|
21
|
+
'i'=>[],
|
22
|
+
'j'=>['ji', 'jia', 'jian', 'jiang', 'qiao', 'jiao', 'jie', 'jin', 'jing', 'jiong',
|
23
|
+
'jiu', 'ju', 'juan', 'jue', 'jun','jv'],
|
24
|
+
'k'=>['ka', 'kai', 'kan', 'kang', 'kao', 'ke', 'ken', 'keng', 'kong', 'kou', 'ku', 'kua', 'kuai',
|
25
|
+
'kuan', 'kuang', 'kui', 'kun', 'kuo'],
|
26
|
+
'l'=>['la', 'lai', 'lan', 'lang', 'lao',
|
27
|
+
'le', 'lei', 'leng', 'li', 'lia', 'lian', 'liang', 'liao', 'lie', 'lin',
|
28
|
+
'ling', 'liu', 'long', 'lou', 'lu', 'luan', 'lue', 'lun', 'luo','lv'],
|
29
|
+
'm'=>['ma', 'mai', 'man', 'mang', 'mao', 'me', 'mei', 'men', 'meng', 'mi', 'mian',
|
30
|
+
'miao', 'mie', 'min', 'ming', 'miu', 'mo', 'mou', 'mu'],
|
31
|
+
'n'=>['na', 'nai', 'nan', 'nang', 'nao', 'ne', 'nei', 'nen', 'neng', 'ni', 'nian', 'niang',
|
32
|
+
'niao', 'nie', 'nin', 'ning', 'niu', 'nong', 'nu', 'nuan', 'nue', 'nuo','nv'],
|
33
|
+
'o'=>['o', 'ou'],
|
34
|
+
'p'=>['pa', 'pai', 'pan', 'pang', 'pao', 'pei', 'pen',
|
35
|
+
'peng', 'pi', 'pian', 'piao', 'pie', 'pin', 'ping', 'po', 'pou', 'pu'],
|
36
|
+
'q'=>['qi', 'qia', 'qian', 'qiang', 'qie', 'qin', 'qing', 'qiong', 'qiu', 'qu',
|
37
|
+
'quan', 'que', 'qun','qv'],
|
38
|
+
'r'=>['ran', 'rang', 'rao', 're', 'ren', 'reng', 'ri',
|
39
|
+
'rong', 'rou', 'ru', 'ruan', 'rui', 'run', 'ruo'],
|
40
|
+
's'=>['sa', 'sai', 'san',
|
41
|
+
'sang', 'sao', 'se', 'sen', 'seng', 'sha', 'shai', 'shan', 'shang', 'shao',
|
42
|
+
'she', 'shen', 'sheng', 'shi', 'shou', 'shu', 'shua', 'shuai', 'shuan', 'shuang',
|
43
|
+
'shui', 'shun', 'shuo', 'si', 'song', 'sou', 'su', 'suan', 'sui', 'sun', 'suo'],
|
44
|
+
't'=>['ta', 'tai', 'tan', 'tang', 'tao', 'te', 'teng', 'ti', 'tian',
|
45
|
+
'tiao', 'tie', 'ting', 'tong', 'tou', 'tu', 'tuan', 'tui', 'tun', 'tuo'],
|
46
|
+
'u'=>[],
|
47
|
+
'v'=>[],
|
48
|
+
'w'=>['wa', 'wai', 'wan', 'wang', 'wei', 'wen', 'weng', 'wo', 'wu'],
|
49
|
+
'x'=>['xi', 'xia', 'xian', 'xiang', 'xiao', 'xie', 'xin', 'xing', 'xiong', 'xiu', 'xu',
|
50
|
+
'xuan', 'xue', 'xun','xv'],
|
51
|
+
'y'=>['ya', 'yan', 'yang','yao', 'ye', 'yi', 'yin', 'ying',
|
52
|
+
'yo', 'yong', 'you', 'yu', 'yuan', 'yue', 'yun'],
|
53
|
+
'z'=>['za', 'zai', 'zan',
|
54
|
+
'zang', 'zao', 'ze', 'zei', 'zen', 'zeng', 'zha', 'zhai', 'zhan', 'zhang',
|
55
|
+
'zhao', 'zhe', 'zhen', 'zheng', 'zhi', 'zhong', 'zhou', 'zhu', 'zhua', 'zhuai',
|
56
|
+
'zhuan', 'zhuang', 'zhui', 'zhun', 'zhuo', 'zi', 'zong', 'zou', 'zu', 'zuan',
|
57
|
+
'zui', 'zun', 'zuo'],
|
58
|
+
' '=>[],
|
59
|
+
'\n'=>[],
|
60
|
+
','=>[],
|
61
|
+
'.'=>[],
|
62
|
+
'\t'=>[],
|
63
|
+
'?'=>[],
|
64
|
+
'!'=>[],
|
65
|
+
';'=>[],
|
66
|
+
'=>'=>[],
|
67
|
+
'"'=>[],
|
68
|
+
'special'=>['', 'ei', 'm', 'n', 'dia', 'cen', 'nou',
|
69
|
+
'jv', 'qv', 'xv', 'lv', 'nv']
|
70
|
+
}
|
71
|
+
PINYINKEYS = PinyinSplit::Slipt::PINYINHASH.keys
|
72
|
+
def self.slipt(source)
|
73
|
+
result = []
|
74
|
+
start = 0
|
75
|
+
len = source.to_s.size
|
76
|
+
while start < len do
|
77
|
+
first = source[start]
|
78
|
+
step = 1
|
79
|
+
tmp = source[start]
|
80
|
+
for i in 0...6
|
81
|
+
if (start+i+1) > len
|
82
|
+
break
|
83
|
+
end
|
84
|
+
piece = source[start...start+i+1]
|
85
|
+
if PinyinSplit::Slipt::PINYINKEYS.include?(first.downcase)
|
86
|
+
if PinyinSplit::Slipt::PINYINHASH[first].include?(piece)
|
87
|
+
tmp = piece
|
88
|
+
step = i + 1
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
result << tmp
|
93
|
+
start += step
|
94
|
+
end
|
95
|
+
result
|
96
|
+
end
|
97
|
+
end
|
data/lib/pinyin_split.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
|
3
|
+
class PinyinSplit
|
4
|
+
# Lost gem for Pinyin*
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
# >> PinyinSplit.split("wolegequ")
|
8
|
+
# => wo le ge qu
|
9
|
+
#
|
10
|
+
# Arguments:
|
11
|
+
# source: (String)
|
12
|
+
def self.split(source)
|
13
|
+
self.slipt(source).join(' ')
|
14
|
+
end
|
15
|
+
|
16
|
+
# Example:
|
17
|
+
# >> PinyinSplit.splitArray("wolegequ")
|
18
|
+
# => ["wo","le","ge","qu"]
|
19
|
+
#
|
20
|
+
# Arguments:
|
21
|
+
# source: (String)
|
22
|
+
|
23
|
+
def self.splitArray(source)
|
24
|
+
self.slipt(source)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'pinyin_split/slipt.rb'
|
@@ -0,0 +1,17 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'pinyin_split'
|
3
|
+
s.version = '1.0.1'
|
4
|
+
|
5
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
6
|
+
s.rubygems_version = "1.8.23"
|
7
|
+
s.date = '2012-11-12'
|
8
|
+
s.summary = "PinyinSplit"
|
9
|
+
s.description = "A Gem for split Chinese Pinyin without space"
|
10
|
+
s.executables = []
|
11
|
+
s.extra_rdoc_files = ["README.md"]
|
12
|
+
s.require_paths = ["lib"]
|
13
|
+
s.authors = ["Liber Liu"]
|
14
|
+
s.email = 'llb0536@gmail.com'
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.homepage = 'https://github.com/llb0536/PinyinSplit'
|
17
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pinyin_split
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -19,6 +19,9 @@ extra_rdoc_files:
|
|
19
19
|
- README.md
|
20
20
|
files:
|
21
21
|
- README.md
|
22
|
+
- lib/pinyin_split.rb
|
23
|
+
- lib/pinyin_split/slipt.rb
|
24
|
+
- pinyin_split.gemspec
|
22
25
|
homepage: https://github.com/llb0536/PinyinSplit
|
23
26
|
licenses: []
|
24
27
|
post_install_message:
|