cjk_converter 0.0.17
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/README.md +34 -0
- data/Rakefile +1 -0
- data/cjk_converter.gemspec +20 -0
- data/lib/cjk_converter.rb +10 -0
- data/lib/cjk_converter/fullwidth.rb +83 -0
- data/lib/cjk_converter/kana.rb +321 -0
- data/lib/cjk_converter/pinyin.rb +170 -0
- data/lib/cjk_converter/string.rb +59 -0
- data/lib/cjk_converter/version.rb +3 -0
- data/lib/cjk_converter/zh_romanization.rb +474 -0
- metadata +58 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
CJK Converter: a romanization converter for CJK languages.
|
2
|
+
====================================
|
3
|
+
|
4
|
+
## DESCRIPTION
|
5
|
+
|
6
|
+
A CJK (Chinese, Japanese, and Korean) romanization converter for CJK languages.
|
7
|
+
|
8
|
+
CJK Converter extends the String class with various convenience
|
9
|
+
functions.
|
10
|
+
|
11
|
+
##INSTALLATION
|
12
|
+
|
13
|
+
Install as a gem
|
14
|
+
|
15
|
+
$ [sudo] gem install cjk_converter
|
16
|
+
|
17
|
+
## Usage
|
18
|
+
|
19
|
+
Converting Pinyin with numbered tone marks to properly formatted pinyin
|
20
|
+
(e.g. "ni3 hao3" => "nǐ hǎo")
|
21
|
+
|
22
|
+
pinyin_with_numbers = "ni3 hao3"
|
23
|
+
pinyin = CJKConverter::Pinyin.convert pinyin_with_numbers
|
24
|
+
puts pinyin #=> "nǐ hǎo"
|
25
|
+
|
26
|
+
Converting between different Chinese romanization systems:
|
27
|
+
(e.g. Zhuyin Fuhao (注音符号) -> Pinyin)
|
28
|
+
|
29
|
+
from_romanization = "pinyin"
|
30
|
+
to_romanization = "zhuyinfuhao"
|
31
|
+
pinyin = "ni3 hao3"
|
32
|
+
zhuyinfuhao = CJKConverter::ZhRomanization.convert(pinyin, from_romanization, to_romanization)
|
33
|
+
|
34
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "cjk_converter/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "cjk_converter"
|
7
|
+
s.version = CjkConverter::VERSION
|
8
|
+
s.authors = ["Steven Daniels"]
|
9
|
+
s.email = ["stevendaniels88@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{A CJK romanization converter for CJK languages.}
|
12
|
+
s.description = %q{A CJK (Chinese, Japanese, and Korean) romanization converter for CJK languages.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "cjk_converter"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../cjk_converter/version", __FILE__)
|
3
|
+
require File.expand_path("../cjk_converter/pinyin", __FILE__)
|
4
|
+
require File.expand_path("../cjk_converter/zh_romanization", __FILE__)
|
5
|
+
require File.expand_path("../cjk_converter/kana", __FILE__)
|
6
|
+
require File.expand_path("../cjk_converter/fullwidth", __FILE__)
|
7
|
+
require File.expand_path("../cjk_converter/string", __FILE__)
|
8
|
+
|
9
|
+
module CJKConverter
|
10
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module CJKConverter
|
3
|
+
def self.hi
|
4
|
+
end
|
5
|
+
FW_HW ={
|
6
|
+
"0" => "0",
|
7
|
+
"1" => "1",
|
8
|
+
"2" => "2",
|
9
|
+
"3" => "3",
|
10
|
+
"4" => "4",
|
11
|
+
"5" => "5",
|
12
|
+
"6" => "6",
|
13
|
+
"7" => "7",
|
14
|
+
"8" => "8",
|
15
|
+
"9" => "9",
|
16
|
+
"A" => "A",
|
17
|
+
"B" => "B",
|
18
|
+
"C" => "C",
|
19
|
+
"D" => "D",
|
20
|
+
"E" => "E",
|
21
|
+
"F" => "F",
|
22
|
+
"G" => "G",
|
23
|
+
"H" => "H",
|
24
|
+
"I" => "I",
|
25
|
+
"J" => "J",
|
26
|
+
"K" => "K",
|
27
|
+
"L" => "L",
|
28
|
+
"M" => "M",
|
29
|
+
"N" => "N",
|
30
|
+
"O" => "O",
|
31
|
+
"P" => "P",
|
32
|
+
"Q" => "Q",
|
33
|
+
"R" => "R",
|
34
|
+
"S" => "S",
|
35
|
+
"T" => "T",
|
36
|
+
"U" => "U",
|
37
|
+
"V" => "V",
|
38
|
+
"W" => "W",
|
39
|
+
"X" => "X",
|
40
|
+
"Y" => "Y",
|
41
|
+
"Z" => "Z",
|
42
|
+
"a" => "a",
|
43
|
+
"b" => "b",
|
44
|
+
"c" => "c",
|
45
|
+
"d" => "d",
|
46
|
+
"e" => "e",
|
47
|
+
"f" => "f",
|
48
|
+
"g" => "g",
|
49
|
+
"h" => "h",
|
50
|
+
"i" => "i",
|
51
|
+
"j" => "j",
|
52
|
+
"k" => "k",
|
53
|
+
"l" => "l",
|
54
|
+
"m" => "m",
|
55
|
+
"n" => "n",
|
56
|
+
"o" => "o",
|
57
|
+
"p" => "p",
|
58
|
+
"q" => "q",
|
59
|
+
"r" => "r",
|
60
|
+
"s" => "s",
|
61
|
+
"t" => "t",
|
62
|
+
"u" => "u",
|
63
|
+
"v" => "v",
|
64
|
+
"w" => "w",
|
65
|
+
"x" => "x",
|
66
|
+
"y" => "y",
|
67
|
+
"z" => "z",
|
68
|
+
"%" => '%',
|
69
|
+
"." => '.',
|
70
|
+
':' => ':',
|
71
|
+
"#" => '#',
|
72
|
+
"$" => "$",
|
73
|
+
"&" => "&",
|
74
|
+
"+" => "+",
|
75
|
+
"-" => "-",
|
76
|
+
"/" => "/",
|
77
|
+
"\" => '\\',
|
78
|
+
'=' => '=',
|
79
|
+
";" => ";",
|
80
|
+
"<" => "<",
|
81
|
+
">" => ">"
|
82
|
+
}
|
83
|
+
end
|
@@ -0,0 +1,321 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module CJKConverter
|
3
|
+
KanaToRomaji = {
|
4
|
+
#romaji uses the hepburn system
|
5
|
+
"あ" => "a",
|
6
|
+
"い" => "i",
|
7
|
+
"う" => "u",
|
8
|
+
"え" => "e",
|
9
|
+
"お" => "o",
|
10
|
+
"が" => "ga",
|
11
|
+
"か" => "ka",
|
12
|
+
"ぎ" => "gi",
|
13
|
+
"き" => "ki",
|
14
|
+
"ぎゃ" => "gya",
|
15
|
+
"きゃ" => "kya",
|
16
|
+
"ぎゅ" => "gyu",
|
17
|
+
"きゅ" => "kyu",
|
18
|
+
"ぎょ" => "gyo",
|
19
|
+
"きょ" => "kyo",
|
20
|
+
"ぐ" => "gu",
|
21
|
+
"く" => "ku",
|
22
|
+
"げ" => "ge",
|
23
|
+
"け" => "ke",
|
24
|
+
"ご" => "go",
|
25
|
+
"こ" => "ko",
|
26
|
+
"さ" => "sa",
|
27
|
+
"ざ" => "za",
|
28
|
+
"じ" => "ji",
|
29
|
+
"し" => "shi",
|
30
|
+
"じゃ" => "ja",
|
31
|
+
"しゃ" => "sha",
|
32
|
+
"じゅ" => "ju",
|
33
|
+
"しゅ" => "shu",
|
34
|
+
"じょ" => "jo",
|
35
|
+
"しょ" => "sho",
|
36
|
+
"す" => "su",
|
37
|
+
"ず" => "zu",
|
38
|
+
"せ" => "se",
|
39
|
+
"ぜ" => "ze",
|
40
|
+
"そ" => "so",
|
41
|
+
"ぞ" => "zo",
|
42
|
+
"だ" => "da",
|
43
|
+
"た" => "ta",
|
44
|
+
"ち" => "chi",
|
45
|
+
"ぢ" => "ji",
|
46
|
+
"ちゃ" => "cha",
|
47
|
+
"ぢゃ" => "ja",
|
48
|
+
"ちゅ" => "chu",
|
49
|
+
"ぢゅ" => "ju",
|
50
|
+
"ちょ" => "cho",
|
51
|
+
"ぢょ" => "jo",
|
52
|
+
"つ" => "tsu",
|
53
|
+
"づ" => "zu",
|
54
|
+
"で" => "de",
|
55
|
+
"て" => "te",
|
56
|
+
"ど" => "do",
|
57
|
+
"と" => "to",
|
58
|
+
"な" => "na",
|
59
|
+
"に" => "ni",
|
60
|
+
"にゃ" => "nya",
|
61
|
+
"にゅ" => "nyu",
|
62
|
+
"にょ" => "nyo",
|
63
|
+
"ぬ" => "nu",
|
64
|
+
"ね" => "ne",
|
65
|
+
"の" => "no",
|
66
|
+
"ば" => "ba",
|
67
|
+
"は" => "ha",
|
68
|
+
"ぱ" => "pa",
|
69
|
+
"び" => "bi",
|
70
|
+
"ひ" => "hi",
|
71
|
+
"ぴ" => "pi",
|
72
|
+
"びゃ" => "bya",
|
73
|
+
"ひゃ" => "hya",
|
74
|
+
"ぴゃ" => "pya",
|
75
|
+
"びゅ" => "byu",
|
76
|
+
"ひゅ" => "hyu",
|
77
|
+
"ぴゅ" => "pyu",
|
78
|
+
"びょ" => "byo",
|
79
|
+
"ひょ" => "hyo",
|
80
|
+
"ぴょ" => "pyo",
|
81
|
+
"ぶ" => "bu",
|
82
|
+
"ふ" => "fu",
|
83
|
+
"ぷ" => "pu",
|
84
|
+
"べ" => "be",
|
85
|
+
"へ" => "he",
|
86
|
+
"ぺ" => "pe",
|
87
|
+
"ぼ" => "bo",
|
88
|
+
"ほ" => "ho",
|
89
|
+
"ぽ" => "po",
|
90
|
+
"ま" => "ma",
|
91
|
+
"み" => "mi",
|
92
|
+
"みゃ" => "mya",
|
93
|
+
"みゅ" => "myu",
|
94
|
+
"みょ" => "myo",
|
95
|
+
"む" => "mu",
|
96
|
+
"め" => "me",
|
97
|
+
"も" => "mo",
|
98
|
+
"や" => "ya",
|
99
|
+
"ゆ" => "yu",
|
100
|
+
"よ" => "yo",
|
101
|
+
"ら" => "ra",
|
102
|
+
"り" => "ri",
|
103
|
+
"りゃ" => "rya",
|
104
|
+
"りゅ" => "ryu",
|
105
|
+
"りょ" => "ryo",
|
106
|
+
"る" => "ru",
|
107
|
+
"れ" => "re",
|
108
|
+
"ろ" => "ro",
|
109
|
+
"わ" => "wa",
|
110
|
+
"ゐ" => "wi",
|
111
|
+
"ゑ" => "we",
|
112
|
+
"を" => "wo",
|
113
|
+
"ん" => "n",
|
114
|
+
"ア" => "a",
|
115
|
+
"イ" => "i",
|
116
|
+
"ウ" => "u",
|
117
|
+
"エ" => "e",
|
118
|
+
"オ" => "o",
|
119
|
+
"ガ" => "ga",
|
120
|
+
"カ" => "ka",
|
121
|
+
"ギ" => "gi",
|
122
|
+
"キ" => "ki",
|
123
|
+
"ギャ" => "gya",
|
124
|
+
"キャ" => "kya",
|
125
|
+
"ギュ" => "gyu",
|
126
|
+
"キュ" => "kyu",
|
127
|
+
"ギョ" => "gyo",
|
128
|
+
"キョ" => "kyo",
|
129
|
+
"グ" => "gu",
|
130
|
+
"ク" => "ku",
|
131
|
+
"ゲ" => "ge",
|
132
|
+
"ケ" => "ke",
|
133
|
+
"ゴ" => "go",
|
134
|
+
"コ" => "ko",
|
135
|
+
"サ" => "sa",
|
136
|
+
"ザ" => "za",
|
137
|
+
"ジ" => "ji",
|
138
|
+
"シ" => "shi",
|
139
|
+
"ジャ" => "ja",
|
140
|
+
"シャ" => "sha",
|
141
|
+
"ジュ" => "ju",
|
142
|
+
"シュ" => "shu",
|
143
|
+
"ジョ" => "jo",
|
144
|
+
"ショ" => "sho",
|
145
|
+
"ス" => "su",
|
146
|
+
"ズ" => "zu",
|
147
|
+
"セ" => "se",
|
148
|
+
"ゼ" => "ze",
|
149
|
+
"ソ" => "so",
|
150
|
+
"ゾ" => "zo",
|
151
|
+
"ダ" => "da",
|
152
|
+
"タ" => "ta",
|
153
|
+
"チ" => "chi",
|
154
|
+
"ヂ" => "ji",
|
155
|
+
"チャ" => "cha",
|
156
|
+
"ヂャ" => "ja",
|
157
|
+
"チュ" => "chu",
|
158
|
+
"ヂュ" => "ju",
|
159
|
+
"チョ" => "cho",
|
160
|
+
"ヂョ" => "jo",
|
161
|
+
"ツ" => "tsu",
|
162
|
+
"ヅ" => "zu",
|
163
|
+
"デ" => "de",
|
164
|
+
"テ" => "te",
|
165
|
+
"ド" => "do",
|
166
|
+
"ト" => "to",
|
167
|
+
"ナ" => "na",
|
168
|
+
"ニ" => "ni",
|
169
|
+
"ニャ" => "nya",
|
170
|
+
"ニュ" => "nyu",
|
171
|
+
"ニョ" => "nyo",
|
172
|
+
"ヌ" => "nu",
|
173
|
+
"ネ" => "ne",
|
174
|
+
"ノ" => "no",
|
175
|
+
"バ" => "ba",
|
176
|
+
"ハ" => "ha",
|
177
|
+
"パ" => "pa",
|
178
|
+
"ビ" => "bi",
|
179
|
+
"ヒ" => "hi",
|
180
|
+
"ピ" => "pi",
|
181
|
+
"ビャ" => "bya",
|
182
|
+
"ヒャ" => "hya",
|
183
|
+
"ピャ" => "pya",
|
184
|
+
"ビュ" => "byu",
|
185
|
+
"ヒュ" => "hyu",
|
186
|
+
"ピュ" => "pyu",
|
187
|
+
"ビョ" => "byo",
|
188
|
+
"ヒョ" => "hyo",
|
189
|
+
"ピョ" => "pyo",
|
190
|
+
"ブ" => "bu",
|
191
|
+
"フ" => "fu",
|
192
|
+
"プ" => "pu",
|
193
|
+
"ベ" => "be",
|
194
|
+
"ヘ" => "he",
|
195
|
+
"ペ" => "pe",
|
196
|
+
"ボ" => "bo",
|
197
|
+
"ホ" => "ho",
|
198
|
+
"ポ" => "po",
|
199
|
+
"マ" => "ma",
|
200
|
+
"ミ" => "mi",
|
201
|
+
"ミャ" => "mya",
|
202
|
+
"ミュ" => "myu",
|
203
|
+
"ミョ" => "myo",
|
204
|
+
"ム" => "mu",
|
205
|
+
"メ" => "me",
|
206
|
+
"モ" => "mo",
|
207
|
+
"ヤ" => "ya",
|
208
|
+
"ユ" => "yu",
|
209
|
+
"ヨ" => "yo",
|
210
|
+
"ラ" => "ra",
|
211
|
+
"リ" => "ri",
|
212
|
+
"リャ" => "rya",
|
213
|
+
"リュ" => "ryu",
|
214
|
+
"リョ" => "ryo",
|
215
|
+
"ル" => "ru",
|
216
|
+
"レ" => "re",
|
217
|
+
"ロ" => "ro",
|
218
|
+
"ワ" => "wa",
|
219
|
+
"ヰ" => "wi",
|
220
|
+
"ヱ" => "we",
|
221
|
+
"ヲ" => "wo",
|
222
|
+
"ン" => "n",
|
223
|
+
"イィ" => "yi",
|
224
|
+
"イェ" => "ye",
|
225
|
+
"ヴ" => "vu",
|
226
|
+
"ヴァ" => "va",
|
227
|
+
"ウァ" => "wa",
|
228
|
+
"ヴィ" => "vi",
|
229
|
+
"ウィ" => "wi",
|
230
|
+
"ヴィェ" => "vye",
|
231
|
+
"ウゥ" => "wu",
|
232
|
+
"ヴェ" => "ve",
|
233
|
+
"ウェ" => "we",
|
234
|
+
"ヴォ" => "vo",
|
235
|
+
"ウォ" => "wo",
|
236
|
+
"ヴャ" => "vya",
|
237
|
+
"ヴュ" => "vyu",
|
238
|
+
"ウュ" => "wyu",
|
239
|
+
"ヴョ" => "vyo",
|
240
|
+
"ギェ" => "gye",
|
241
|
+
"キェ" => "kye",
|
242
|
+
"グァ" => "gwa",
|
243
|
+
"クァ" => "kwa",
|
244
|
+
"グィ" => "gwi",
|
245
|
+
"クィ" => "kwi",
|
246
|
+
"グェ" => "gwe",
|
247
|
+
"クェ" => "kwe",
|
248
|
+
"グォ" => "gwo",
|
249
|
+
"クォ" => "kwo",
|
250
|
+
"グヮ" => "gwa",
|
251
|
+
"クヮ" => "kwa",
|
252
|
+
"ジェ" => "je",
|
253
|
+
"シェ" => "she",
|
254
|
+
"スィ" => "si",
|
255
|
+
"ズィ" => "zi",
|
256
|
+
"チェ" => "che",
|
257
|
+
"ツァ" => "tsa",
|
258
|
+
"ツィ" => "tsi",
|
259
|
+
"ツェ" => "tse",
|
260
|
+
"ツォ" => "tso",
|
261
|
+
"ツュ" => "tsyu",
|
262
|
+
"ディ" => "di",
|
263
|
+
"ティ" => "ti",
|
264
|
+
"デュ" => "dyu",
|
265
|
+
"テュ" => "tyu",
|
266
|
+
"ドゥ" => "du",
|
267
|
+
"トゥ" => "tu",
|
268
|
+
"ニェ" => "nye",
|
269
|
+
"ビェ" => "bye",
|
270
|
+
"ヒェ" => "hye",
|
271
|
+
"ピェ" => "pye",
|
272
|
+
"ファ" => "fa",
|
273
|
+
"フィ" => "fi",
|
274
|
+
"フィェ" => "fye",
|
275
|
+
"フェ" => "fe",
|
276
|
+
"フォ" => "fo",
|
277
|
+
"フャ" => "fya",
|
278
|
+
"フュ" => "fyu",
|
279
|
+
"フョ" => "fyo",
|
280
|
+
"ホゥ" => "hu",
|
281
|
+
"ミェ" => "mye",
|
282
|
+
"ミィ" => "myi",
|
283
|
+
"ラ" => "la",
|
284
|
+
"リ" => "li",
|
285
|
+
"リェ" => "rye",
|
286
|
+
"ル" => "lu",
|
287
|
+
"レ" => "le",
|
288
|
+
"ロ" => "lo",
|
289
|
+
"ヷ" => "va",
|
290
|
+
"ヸ" => "vi",
|
291
|
+
"ヹ" => "ve",
|
292
|
+
"ヺ" => "vo",
|
293
|
+
'1' => "1",
|
294
|
+
'2' => "2",
|
295
|
+
'3' => "3",
|
296
|
+
'4' => "4",
|
297
|
+
'5' => "5",
|
298
|
+
'6' => "6",
|
299
|
+
'7' => "7",
|
300
|
+
'8' => "8",
|
301
|
+
'9' => "9",
|
302
|
+
'0' => "0",
|
303
|
+
'・' => "・",
|
304
|
+
"ぁ" => "a",
|
305
|
+
"ぃ" => "i",
|
306
|
+
# "" => "",
|
307
|
+
"ぇ" => "e",
|
308
|
+
# "" => "",
|
309
|
+
"ァ" => "a",
|
310
|
+
"ィ" => "i",
|
311
|
+
"ゥ" => "u",
|
312
|
+
"ェ" => "e",
|
313
|
+
"ォ" => "o",
|
314
|
+
"〜" => "〜",
|
315
|
+
"ワァ" => "waa",
|
316
|
+
"トィ" => "ti",
|
317
|
+
"ドュ" => "dyu",
|
318
|
+
"ブュ" => "vyu",
|
319
|
+
}
|
320
|
+
end
|
321
|
+
|