pinyinator 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/pinyinator.rb +148 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 77bd305dc207cb1d03ab93aa6530d600ef12cefd
|
4
|
+
data.tar.gz: ceb991b6df743c73fb64e8fe70bc7c7bc9ffcfe9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 03a7eced05aa224183e5aa9bd862ffad9a9d4c73f0685fd4a915818f092a391aae824bbb7aa9662b7fa559b8408421f8aa24024af9ec021a18f328a3045c2e23
|
7
|
+
data.tar.gz: 34a39b41da9daa01eee46b85a8e86cde3f751233d984ceae40d20bfe4f5d036ed432eb74e26b96d4dbe4692fddafaff932a08e9151e7f365f720d3de08dd20a5
|
data/lib/pinyinator.rb
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
module Pinyin
|
2
|
+
Regex = /(shuang|chuang|zhuang|xiang|qiong|shuai|niang|guang|sheng|kuang|shang|jiong|huang|jiang|shuan|xiong|zhang|zheng|zhong|zhuai|zhuan|qiang|chang|liang|chuan|cheng|chong|chuai|hang|peng|chuo|piao|pian|chua|ping|yang|pang|chui|chun|chen|chan|chou|chao|chai|zhun|mang|meng|weng|shai|shei|miao|zhui|mian|yong|ming|wang|zhuo|zhua|shao|yuan|bing|zhen|fang|feng|zhan|zhou|zhao|zhei|zhai|rang|suan|reng|song|seng|dang|deng|dong|xuan|sang|rong|duan|cuan|cong|ceng|cang|diao|ruan|dian|ding|shou|xing|zuan|jiao|zong|zeng|zang|jian|tang|teng|tong|bian|biao|shan|tuan|huan|xian|huai|tiao|tian|hong|xiao|heng|ying|jing|shen|beng|kuan|kuai|nang|neng|nong|juan|kong|nuan|keng|kang|shua|niao|guan|nian|ting|shuo|guai|ning|quan|qiao|shui|gong|geng|gang|qian|bang|lang|leng|long|qing|ling|luan|shun|lian|liao|zhi|lia|liu|qin|lun|lin|luo|lan|lou|qiu|gai|gei|gao|gou|gan|gen|lao|lei|lai|que|gua|guo|nin|gui|niu|nie|gun|qie|qia|jun|kai|kei|kao|kou|kan|ken|qun|nun|nuo|xia|kua|kuo|nen|kui|nan|nou|kun|jue|nao|nei|hai|hei|hao|hou|han|hen|nai|rou|xiu|jin|hua|huo|tie|hui|tun|tui|hun|tuo|tan|jiu|zai|zei|zao|zou|zan|zen|eng|tou|tao|tei|tai|zuo|zui|xin|zun|jie|jia|run|diu|cai|cao|cou|can|cen|die|dia|xue|rui|cuo|cui|dun|cun|cin|ruo|rua|dui|sai|sao|sou|san|sen|duo|den|dan|dou|suo|sui|dao|sun|dei|zha|zhe|dai|xun|ang|ong|wai|fen|fan|fou|fei|zhu|wei|wan|min|miu|mie|wen|men|lie|chi|cha|che|man|mou|mao|mei|mai|yao|you|yan|chu|pin|pie|yin|pen|pan|pou|pao|shi|sha|she|pei|pai|yue|bin|bie|yun|nüe|lve|shu|ben|ban|bao|bei|bai|lüe|nve|ren|ran|rao|xie|re|ri|si|su|se|ru|sa|cu|ce|ca|ji|ci|zi|zu|ze|za|hu|he|ha|ju|ku|ke|qi|ka|gu|ge|ga|li|lu|le|qu|la|ni|xi|nu|ne|na|ti|tu|te|ta|xu|di|du|de|bo|lv|ba|ai|ei|ao|ou|an|en|er|da|wu|wa|wo|fu|fo|fa|nv|mi|mu|yi|ya|ye|me|mo|ma|pi|pu|po|yu|pa|bi|nü|bu|lü|e|o|a)r?([1-5])/i
|
3
|
+
|
4
|
+
AntiRegex = /(ā|ē|ī|ō|ū|ǖ|Ā|Ē|Ī|Ō|Ū|Ǖ|á|é|í|ó|ú|ǘ|Á|É|Í|Ó|Ú|Ǘ|ǎ|ě|ǐ|ǒ|ǔ|ǚ|Ǎ|Ě|Ǐ|Ǒ|Ǔ|Ǚ|à|è|ì|ò|ù|ǜ|À|È|Ì|Ò|Ù|Ǜ)/
|
5
|
+
|
6
|
+
Vowels = {
|
7
|
+
'a*' => 0,
|
8
|
+
'e*' => 1,
|
9
|
+
'i*' => 2,
|
10
|
+
'o*' => 3,
|
11
|
+
'u*' => 4,
|
12
|
+
'ü*' => 5,
|
13
|
+
'A*' => 6,
|
14
|
+
'E*' => 7,
|
15
|
+
'I*' => 8,
|
16
|
+
'O*' => 9,
|
17
|
+
'U*' => 10,
|
18
|
+
'Ü*' => 11
|
19
|
+
}
|
20
|
+
|
21
|
+
Pinyin = {
|
22
|
+
"1" => ['ā','ē','ī','ō','ū','ǖ','Ā','Ē','Ī','Ō','Ū','Ǖ'],
|
23
|
+
"2" => ['á','é','í','ó','ú','ǘ','Á','É','Í','Ó','Ú','Ǘ'],
|
24
|
+
"3" => ['ǎ','ě','ǐ','ǒ','ǔ','ǚ','Ǎ','Ě','Ǐ','Ǒ','Ǔ','Ǚ'],
|
25
|
+
"4" => ['à','è','ì','ò','ù','ǜ','À','È','Ì','Ò','Ù','Ǜ'],
|
26
|
+
"5" => ['a','e','i','o','u','ü','A','E','I','O','U','Ü']
|
27
|
+
}
|
28
|
+
|
29
|
+
AntiPinyin = {
|
30
|
+
'ā' => 'a1',
|
31
|
+
'ē' => 'e1',
|
32
|
+
'ī' => 'i1',
|
33
|
+
'ō' => 'o1',
|
34
|
+
'ū' => 'u1',
|
35
|
+
'ǖ' => 'v1',
|
36
|
+
'Ā' => 'A1',
|
37
|
+
'Ē' => 'E1',
|
38
|
+
'Ī' => 'I1',
|
39
|
+
'Ō' => 'O1',
|
40
|
+
'Ū' => 'U1',
|
41
|
+
'Ǖ' => 'V1',
|
42
|
+
'á' => 'a2',
|
43
|
+
'é' => 'e2',
|
44
|
+
'í' => 'i2',
|
45
|
+
'ó' => 'o2',
|
46
|
+
'ú' => 'u2',
|
47
|
+
'ǘ' => 'v2',
|
48
|
+
'Á' => 'A2',
|
49
|
+
'É' => 'E2',
|
50
|
+
'Í' => 'I2',
|
51
|
+
'Ó' => 'O2',
|
52
|
+
'Ú' => 'U2',
|
53
|
+
'Ǘ' => 'V2',
|
54
|
+
'ǎ' => 'a3',
|
55
|
+
'ě' => 'e3',
|
56
|
+
'ǐ' => 'i3',
|
57
|
+
'ǒ' => 'o3',
|
58
|
+
'ǔ' => 'u3',
|
59
|
+
'ǚ' => 'v3',
|
60
|
+
'Ǎ' => 'A3',
|
61
|
+
'Ě' => 'E3',
|
62
|
+
'Ǐ' => 'I3',
|
63
|
+
'Ǒ' => 'O3',
|
64
|
+
'Ǔ' => 'U3',
|
65
|
+
'Ǚ' => 'V3',
|
66
|
+
'à' => 'a4',
|
67
|
+
'è' => 'e4',
|
68
|
+
'ì' => 'i4',
|
69
|
+
'ò' => 'o4',
|
70
|
+
'ù' => 'u4',
|
71
|
+
'ǜ' => 'v4',
|
72
|
+
'À' => 'A4',
|
73
|
+
'È' => 'E4',
|
74
|
+
'Ì' => 'I4',
|
75
|
+
'Ò' => 'O4',
|
76
|
+
'Ù' => 'U4',
|
77
|
+
'Ǜ' => 'V4',
|
78
|
+
'ü' => 'v',
|
79
|
+
'Ü' => 'V'
|
80
|
+
}
|
81
|
+
|
82
|
+
def self.accent_map
|
83
|
+
unless @accent_map
|
84
|
+
@accent_map = {}
|
85
|
+
stars = ('a*i a*o e*i ia* ia*o ie* io* iu* ' +
|
86
|
+
'A*I A*O E*I IA* IA*O IE* IO* IU* ' +
|
87
|
+
'o*u ua* ua*i ue* ui* uo* üe* ' +
|
88
|
+
'O*U UA* UA*I UE* UI* UO* ÜE* ' +
|
89
|
+
'A* E* I* O* U* Ü* ' +
|
90
|
+
'a* e* i* o* u* ü*').split(' ')
|
91
|
+
nostars = stars.map {|s| s.sub(/\*/, '')}
|
92
|
+
|
93
|
+
nostars.each_with_index do |k, i|
|
94
|
+
@accent_map[k] = stars[i]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
@accent_map
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.replacement_for(word, tone)
|
101
|
+
word = word.gsub('v', 'ü').gsub('V', 'Ü')
|
102
|
+
accent_map.each_pair do |base, vowel|
|
103
|
+
if word.index(base)
|
104
|
+
vowel_char = vowel.scan(/\w\*/)[0]
|
105
|
+
vowel_num = Vowels[vowel_char]
|
106
|
+
accented_vowel_char = Pinyin[tone][vowel_num]
|
107
|
+
return replaced_word = word.sub(base, vowel).sub(vowel_char, accented_vowel_char)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
match
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.to_pinyin(string)
|
114
|
+
string.scan(Regex).each do |word, tone|
|
115
|
+
string = string.sub(word + tone, replacement_for(word, tone))
|
116
|
+
end
|
117
|
+
string
|
118
|
+
end
|
119
|
+
|
120
|
+
def self.from_pinyin(string)
|
121
|
+
string.scan(AntiRegex).each do |match|
|
122
|
+
pinyin = match[0]
|
123
|
+
string = string.sub(pinyin, AntiPinyin[pinyin])
|
124
|
+
end
|
125
|
+
string
|
126
|
+
end
|
127
|
+
|
128
|
+
# instance methods
|
129
|
+
|
130
|
+
def to_pinyin
|
131
|
+
Pinyin::to_pinyin(self.to_s)
|
132
|
+
end
|
133
|
+
|
134
|
+
def from_pinyin
|
135
|
+
Pinyin::from_pinyin(self.to_s)
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
class String
|
141
|
+
def to_pinyin
|
142
|
+
Pinyin::to_pinyin(self)
|
143
|
+
end
|
144
|
+
|
145
|
+
def from_pinyin
|
146
|
+
Pinyin::from_pinyin(self)
|
147
|
+
end
|
148
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pinyinator
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew Boekhoff
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-05-14 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A pinyin conversion utility
|
14
|
+
email: boekhoffa@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/pinyinator.rb
|
20
|
+
homepage: http://rubygems.org/gems/pinyinator
|
21
|
+
licenses:
|
22
|
+
- MIT
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.4.6
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: Pinyin converter
|
44
|
+
test_files: []
|