pinyin_split 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ # -*- encoding : utf-8 -*-
2
+ class PinyinSplit::Slipt
3
+ PINYINHASH = {
4
+ 'a'=>['a', 'ai', 'an', 'ang', 'ao'],
5
+ 'b'=>['ba', 'bai', 'ban', 'bang', 'bao',
6
+ 'bei', 'ben', 'beng', 'bi', 'bian', 'biao', 'bie', 'bin', 'bing', 'bo', 'bu'],
7
+ 'c'=>['ca', 'cai', 'can', 'cang', 'cao', 'ce', 'ceng', 'cha', 'chai',
8
+ 'chan', 'chang', 'chao', 'che', 'chen', 'cheng', 'chi', 'chong', 'chou', 'chu',
9
+ 'chuai', 'chuan', 'chuang', 'chui', 'chun', 'chuo', 'ci', 'cong', 'cou', 'cu',
10
+ 'cuan', 'cui', 'cun', 'cuo'],
11
+ 'd'=>['da', 'dai', 'dan', 'dang', 'dao', 'de',
12
+ 'deng', 'di', 'dian', 'diao', 'die', 'ding', 'diu',
13
+ 'dong', 'dou', 'du', 'duan', 'dui', 'dun', 'duo','dia'],
14
+ 'e'=>['e', 'en', 'er'],
15
+ 'f'=>['fa', 'fan', 'fang', 'fei', 'fen', 'feng', 'fu', 'fou'],
16
+ 'g'=>['ga', 'gai', 'gan', 'gang', 'gao', 'ge', 'gen', 'geng',
17
+ 'gong', 'gou', 'gu', 'gua', 'guai', 'guan', 'guang', 'gui', 'gun', 'guo'],
18
+ 'h'=>['ha', 'hai', 'han', 'hang', 'hao', 'he',
19
+ 'hei', 'hen', 'heng', 'hong', 'hou', 'hu',
20
+ 'hua', 'huai', 'huan', 'huang', 'hui', 'hun', 'huo'],
21
+ 'i'=>[],
22
+ 'j'=>['ji', 'jia', 'jian', 'jiang', 'qiao', 'jiao', 'jie', 'jin', 'jing', 'jiong',
23
+ 'jiu', 'ju', 'juan', 'jue', 'jun','jv'],
24
+ 'k'=>['ka', 'kai', 'kan', 'kang', 'kao', 'ke', 'ken', 'keng', 'kong', 'kou', 'ku', 'kua', 'kuai',
25
+ 'kuan', 'kuang', 'kui', 'kun', 'kuo'],
26
+ 'l'=>['la', 'lai', 'lan', 'lang', 'lao',
27
+ 'le', 'lei', 'leng', 'li', 'lia', 'lian', 'liang', 'liao', 'lie', 'lin',
28
+ 'ling', 'liu', 'long', 'lou', 'lu', 'luan', 'lue', 'lun', 'luo','lv'],
29
+ 'm'=>['ma', 'mai', 'man', 'mang', 'mao', 'me', 'mei', 'men', 'meng', 'mi', 'mian',
30
+ 'miao', 'mie', 'min', 'ming', 'miu', 'mo', 'mou', 'mu'],
31
+ 'n'=>['na', 'nai', 'nan', 'nang', 'nao', 'ne', 'nei', 'nen', 'neng', 'ni', 'nian', 'niang',
32
+ 'niao', 'nie', 'nin', 'ning', 'niu', 'nong', 'nu', 'nuan', 'nue', 'nuo','nv'],
33
+ 'o'=>['o', 'ou'],
34
+ 'p'=>['pa', 'pai', 'pan', 'pang', 'pao', 'pei', 'pen',
35
+ 'peng', 'pi', 'pian', 'piao', 'pie', 'pin', 'ping', 'po', 'pou', 'pu'],
36
+ 'q'=>['qi', 'qia', 'qian', 'qiang', 'qie', 'qin', 'qing', 'qiong', 'qiu', 'qu',
37
+ 'quan', 'que', 'qun','qv'],
38
+ 'r'=>['ran', 'rang', 'rao', 're', 'ren', 'reng', 'ri',
39
+ 'rong', 'rou', 'ru', 'ruan', 'rui', 'run', 'ruo'],
40
+ 's'=>['sa', 'sai', 'san',
41
+ 'sang', 'sao', 'se', 'sen', 'seng', 'sha', 'shai', 'shan', 'shang', 'shao',
42
+ 'she', 'shen', 'sheng', 'shi', 'shou', 'shu', 'shua', 'shuai', 'shuan', 'shuang',
43
+ 'shui', 'shun', 'shuo', 'si', 'song', 'sou', 'su', 'suan', 'sui', 'sun', 'suo'],
44
+ 't'=>['ta', 'tai', 'tan', 'tang', 'tao', 'te', 'teng', 'ti', 'tian',
45
+ 'tiao', 'tie', 'ting', 'tong', 'tou', 'tu', 'tuan', 'tui', 'tun', 'tuo'],
46
+ 'u'=>[],
47
+ 'v'=>[],
48
+ 'w'=>['wa', 'wai', 'wan', 'wang', 'wei', 'wen', 'weng', 'wo', 'wu'],
49
+ 'x'=>['xi', 'xia', 'xian', 'xiang', 'xiao', 'xie', 'xin', 'xing', 'xiong', 'xiu', 'xu',
50
+ 'xuan', 'xue', 'xun','xv'],
51
+ 'y'=>['ya', 'yan', 'yang','yao', 'ye', 'yi', 'yin', 'ying',
52
+ 'yo', 'yong', 'you', 'yu', 'yuan', 'yue', 'yun'],
53
+ 'z'=>['za', 'zai', 'zan',
54
+ 'zang', 'zao', 'ze', 'zei', 'zen', 'zeng', 'zha', 'zhai', 'zhan', 'zhang',
55
+ 'zhao', 'zhe', 'zhen', 'zheng', 'zhi', 'zhong', 'zhou', 'zhu', 'zhua', 'zhuai',
56
+ 'zhuan', 'zhuang', 'zhui', 'zhun', 'zhuo', 'zi', 'zong', 'zou', 'zu', 'zuan',
57
+ 'zui', 'zun', 'zuo'],
58
+ ' '=>[],
59
+ '\n'=>[],
60
+ ','=>[],
61
+ '.'=>[],
62
+ '\t'=>[],
63
+ '?'=>[],
64
+ '!'=>[],
65
+ ';'=>[],
66
+ '=>'=>[],
67
+ '"'=>[],
68
+ 'special'=>['', 'ei', 'm', 'n', 'dia', 'cen', 'nou',
69
+ 'jv', 'qv', 'xv', 'lv', 'nv']
70
+ }
71
+ PINYINKEYS = PinyinSplit::Slipt::PINYINHASH.keys
72
+ def self.slipt(source)
73
+ result = []
74
+ start = 0
75
+ len = source.to_s.size
76
+ while start < len do
77
+ first = source[start]
78
+ step = 1
79
+ tmp = source[start]
80
+ for i in 0...6
81
+ if (start+i+1) > len
82
+ break
83
+ end
84
+ piece = source[start...start+i+1]
85
+ if PinyinSplit::Slipt::PINYINKEYS.include?(first.downcase)
86
+ if PinyinSplit::Slipt::PINYINHASH[first].include?(piece)
87
+ tmp = piece
88
+ step = i + 1
89
+ end
90
+ end
91
+ end
92
+ result << tmp
93
+ start += step
94
+ end
95
+ result
96
+ end
97
+ end
@@ -0,0 +1,28 @@
1
+ # -*- encoding : utf-8 -*-
2
+
3
+ class PinyinSplit
4
+ # Lost gem for Pinyin*
5
+ #
6
+ # Example:
7
+ # >> PinyinSplit.split("wolegequ")
8
+ # => wo le ge qu
9
+ #
10
+ # Arguments:
11
+ # source: (String)
12
+ def self.split(source)
13
+ self.slipt(source).join(' ')
14
+ end
15
+
16
+ # Example:
17
+ # >> PinyinSplit.splitArray("wolegequ")
18
+ # => ["wo","le","ge","qu"]
19
+ #
20
+ # Arguments:
21
+ # source: (String)
22
+
23
+ def self.splitArray(source)
24
+ self.slipt(source)
25
+ end
26
+ end
27
+
28
+ require 'pinyin_split/slipt.rb'
@@ -0,0 +1,17 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'pinyin_split'
3
+ s.version = '1.0.1'
4
+
5
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
6
+ s.rubygems_version = "1.8.23"
7
+ s.date = '2012-11-12'
8
+ s.summary = "PinyinSplit"
9
+ s.description = "A Gem for split Chinese Pinyin without space"
10
+ s.executables = []
11
+ s.extra_rdoc_files = ["README.md"]
12
+ s.require_paths = ["lib"]
13
+ s.authors = ["Liber Liu"]
14
+ s.email = 'llb0536@gmail.com'
15
+ s.files = `git ls-files`.split("\n")
16
+ s.homepage = 'https://github.com/llb0536/PinyinSplit'
17
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pinyin_split
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -19,6 +19,9 @@ extra_rdoc_files:
19
19
  - README.md
20
20
  files:
21
21
  - README.md
22
+ - lib/pinyin_split.rb
23
+ - lib/pinyin_split/slipt.rb
24
+ - pinyin_split.gemspec
22
25
  homepage: https://github.com/llb0536/PinyinSplit
23
26
  licenses: []
24
27
  post_install_message: