pinyinator 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/pinyinator.rb +148 -0
  3. metadata +44 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 77bd305dc207cb1d03ab93aa6530d600ef12cefd
4
+ data.tar.gz: ceb991b6df743c73fb64e8fe70bc7c7bc9ffcfe9
5
+ SHA512:
6
+ metadata.gz: 03a7eced05aa224183e5aa9bd862ffad9a9d4c73f0685fd4a915818f092a391aae824bbb7aa9662b7fa559b8408421f8aa24024af9ec021a18f328a3045c2e23
7
+ data.tar.gz: 34a39b41da9daa01eee46b85a8e86cde3f751233d984ceae40d20bfe4f5d036ed432eb74e26b96d4dbe4692fddafaff932a08e9151e7f365f720d3de08dd20a5
@@ -0,0 +1,148 @@
1
+ module Pinyin
2
+ Regex = /(shuang|chuang|zhuang|xiang|qiong|shuai|niang|guang|sheng|kuang|shang|jiong|huang|jiang|shuan|xiong|zhang|zheng|zhong|zhuai|zhuan|qiang|chang|liang|chuan|cheng|chong|chuai|hang|peng|chuo|piao|pian|chua|ping|yang|pang|chui|chun|chen|chan|chou|chao|chai|zhun|mang|meng|weng|shai|shei|miao|zhui|mian|yong|ming|wang|zhuo|zhua|shao|yuan|bing|zhen|fang|feng|zhan|zhou|zhao|zhei|zhai|rang|suan|reng|song|seng|dang|deng|dong|xuan|sang|rong|duan|cuan|cong|ceng|cang|diao|ruan|dian|ding|shou|xing|zuan|jiao|zong|zeng|zang|jian|tang|teng|tong|bian|biao|shan|tuan|huan|xian|huai|tiao|tian|hong|xiao|heng|ying|jing|shen|beng|kuan|kuai|nang|neng|nong|juan|kong|nuan|keng|kang|shua|niao|guan|nian|ting|shuo|guai|ning|quan|qiao|shui|gong|geng|gang|qian|bang|lang|leng|long|qing|ling|luan|shun|lian|liao|zhi|lia|liu|qin|lun|lin|luo|lan|lou|qiu|gai|gei|gao|gou|gan|gen|lao|lei|lai|que|gua|guo|nin|gui|niu|nie|gun|qie|qia|jun|kai|kei|kao|kou|kan|ken|qun|nun|nuo|xia|kua|kuo|nen|kui|nan|nou|kun|jue|nao|nei|hai|hei|hao|hou|han|hen|nai|rou|xiu|jin|hua|huo|tie|hui|tun|tui|hun|tuo|tan|jiu|zai|zei|zao|zou|zan|zen|eng|tou|tao|tei|tai|zuo|zui|xin|zun|jie|jia|run|diu|cai|cao|cou|can|cen|die|dia|xue|rui|cuo|cui|dun|cun|cin|ruo|rua|dui|sai|sao|sou|san|sen|duo|den|dan|dou|suo|sui|dao|sun|dei|zha|zhe|dai|xun|ang|ong|wai|fen|fan|fou|fei|zhu|wei|wan|min|miu|mie|wen|men|lie|chi|cha|che|man|mou|mao|mei|mai|yao|you|yan|chu|pin|pie|yin|pen|pan|pou|pao|shi|sha|she|pei|pai|yue|bin|bie|yun|nüe|lve|shu|ben|ban|bao|bei|bai|lüe|nve|ren|ran|rao|xie|re|ri|si|su|se|ru|sa|cu|ce|ca|ji|ci|zi|zu|ze|za|hu|he|ha|ju|ku|ke|qi|ka|gu|ge|ga|li|lu|le|qu|la|ni|xi|nu|ne|na|ti|tu|te|ta|xu|di|du|de|bo|lv|ba|ai|ei|ao|ou|an|en|er|da|wu|wa|wo|fu|fo|fa|nv|mi|mu|yi|ya|ye|me|mo|ma|pi|pu|po|yu|pa|bi|nü|bu|lü|e|o|a)r?([1-5])/i
3
+
4
+ AntiRegex = /(ā|ē|ī|ō|ū|ǖ|Ā|Ē|Ī|Ō|Ū|Ǖ|á|é|í|ó|ú|ǘ|Á|É|Í|Ó|Ú|Ǘ|ǎ|ě|ǐ|ǒ|ǔ|ǚ|Ǎ|Ě|Ǐ|Ǒ|Ǔ|Ǚ|à|è|ì|ò|ù|ǜ|À|È|Ì|Ò|Ù|Ǜ)/
5
+
6
+ Vowels = {
7
+ 'a*' => 0,
8
+ 'e*' => 1,
9
+ 'i*' => 2,
10
+ 'o*' => 3,
11
+ 'u*' => 4,
12
+ 'ü*' => 5,
13
+ 'A*' => 6,
14
+ 'E*' => 7,
15
+ 'I*' => 8,
16
+ 'O*' => 9,
17
+ 'U*' => 10,
18
+ 'Ü*' => 11
19
+ }
20
+
21
+ Pinyin = {
22
+ "1" => ['ā','ē','ī','ō','ū','ǖ','Ā','Ē','Ī','Ō','Ū','Ǖ'],
23
+ "2" => ['á','é','í','ó','ú','ǘ','Á','É','Í','Ó','Ú','Ǘ'],
24
+ "3" => ['ǎ','ě','ǐ','ǒ','ǔ','ǚ','Ǎ','Ě','Ǐ','Ǒ','Ǔ','Ǚ'],
25
+ "4" => ['à','è','ì','ò','ù','ǜ','À','È','Ì','Ò','Ù','Ǜ'],
26
+ "5" => ['a','e','i','o','u','ü','A','E','I','O','U','Ü']
27
+ }
28
+
29
+ AntiPinyin = {
30
+ 'ā' => 'a1',
31
+ 'ē' => 'e1',
32
+ 'ī' => 'i1',
33
+ 'ō' => 'o1',
34
+ 'ū' => 'u1',
35
+ 'ǖ' => 'v1',
36
+ 'Ā' => 'A1',
37
+ 'Ē' => 'E1',
38
+ 'Ī' => 'I1',
39
+ 'Ō' => 'O1',
40
+ 'Ū' => 'U1',
41
+ 'Ǖ' => 'V1',
42
+ 'á' => 'a2',
43
+ 'é' => 'e2',
44
+ 'í' => 'i2',
45
+ 'ó' => 'o2',
46
+ 'ú' => 'u2',
47
+ 'ǘ' => 'v2',
48
+ 'Á' => 'A2',
49
+ 'É' => 'E2',
50
+ 'Í' => 'I2',
51
+ 'Ó' => 'O2',
52
+ 'Ú' => 'U2',
53
+ 'Ǘ' => 'V2',
54
+ 'ǎ' => 'a3',
55
+ 'ě' => 'e3',
56
+ 'ǐ' => 'i3',
57
+ 'ǒ' => 'o3',
58
+ 'ǔ' => 'u3',
59
+ 'ǚ' => 'v3',
60
+ 'Ǎ' => 'A3',
61
+ 'Ě' => 'E3',
62
+ 'Ǐ' => 'I3',
63
+ 'Ǒ' => 'O3',
64
+ 'Ǔ' => 'U3',
65
+ 'Ǚ' => 'V3',
66
+ 'à' => 'a4',
67
+ 'è' => 'e4',
68
+ 'ì' => 'i4',
69
+ 'ò' => 'o4',
70
+ 'ù' => 'u4',
71
+ 'ǜ' => 'v4',
72
+ 'À' => 'A4',
73
+ 'È' => 'E4',
74
+ 'Ì' => 'I4',
75
+ 'Ò' => 'O4',
76
+ 'Ù' => 'U4',
77
+ 'Ǜ' => 'V4',
78
+ 'ü' => 'v',
79
+ 'Ü' => 'V'
80
+ }
81
+
82
+ def self.accent_map
83
+ unless @accent_map
84
+ @accent_map = {}
85
+ stars = ('a*i a*o e*i ia* ia*o ie* io* iu* ' +
86
+ 'A*I A*O E*I IA* IA*O IE* IO* IU* ' +
87
+ 'o*u ua* ua*i ue* ui* uo* üe* ' +
88
+ 'O*U UA* UA*I UE* UI* UO* ÜE* ' +
89
+ 'A* E* I* O* U* Ü* ' +
90
+ 'a* e* i* o* u* ü*').split(' ')
91
+ nostars = stars.map {|s| s.sub(/\*/, '')}
92
+
93
+ nostars.each_with_index do |k, i|
94
+ @accent_map[k] = stars[i]
95
+ end
96
+ end
97
+ @accent_map
98
+ end
99
+
100
+ def self.replacement_for(word, tone)
101
+ word = word.gsub('v', 'ü').gsub('V', 'Ü')
102
+ accent_map.each_pair do |base, vowel|
103
+ if word.index(base)
104
+ vowel_char = vowel.scan(/\w\*/)[0]
105
+ vowel_num = Vowels[vowel_char]
106
+ accented_vowel_char = Pinyin[tone][vowel_num]
107
+ return replaced_word = word.sub(base, vowel).sub(vowel_char, accented_vowel_char)
108
+ end
109
+ end
110
+ match
111
+ end
112
+
113
+ def self.to_pinyin(string)
114
+ string.scan(Regex).each do |word, tone|
115
+ string = string.sub(word + tone, replacement_for(word, tone))
116
+ end
117
+ string
118
+ end
119
+
120
+ def self.from_pinyin(string)
121
+ string.scan(AntiRegex).each do |match|
122
+ pinyin = match[0]
123
+ string = string.sub(pinyin, AntiPinyin[pinyin])
124
+ end
125
+ string
126
+ end
127
+
128
+ # instance methods
129
+
130
+ def to_pinyin
131
+ Pinyin::to_pinyin(self.to_s)
132
+ end
133
+
134
+ def from_pinyin
135
+ Pinyin::from_pinyin(self.to_s)
136
+ end
137
+
138
+ end
139
+
140
+ class String
141
+ def to_pinyin
142
+ Pinyin::to_pinyin(self)
143
+ end
144
+
145
+ def from_pinyin
146
+ Pinyin::from_pinyin(self)
147
+ end
148
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pinyinator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Boekhoff
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-14 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A pinyin conversion utility
14
+ email: boekhoffa@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/pinyinator.rb
20
+ homepage: http://rubygems.org/gems/pinyinator
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.6
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Pinyin converter
44
+ test_files: []