cjk_converter 0.0.17

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in cjk_converter.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ CJK Converter: a romanization converter for CJK languages.
2
+ ====================================
3
+
4
+ ## DESCRIPTION
5
+
6
+ A CJK (Chinese, Japanese, and Korean) romanization converter for CJK languages.
7
+
8
+ CJK Converter extends the String class with various convenience
9
+ functions.
10
+
11
+ ##INSTALLATION
12
+
13
+ Install as a gem
14
+
15
+ $ [sudo] gem install cjk_converter
16
+
17
+ ## Usage
18
+
19
+ Converting Pinyin with numbered tone marks to properly formatted pinyin
20
+ (e.g. "ni3 hao3" => "nǐ hǎo")
21
+
22
+ pinyin_with_numbers = "ni3 hao3"
23
+ pinyin = CJKConverter::Pinyin.convert pinyin_with_numbers
24
+ puts pinyin #=> "nǐ hǎo"
25
+
26
+ Converting between different Chinese romanization systems:
27
+ (e.g. Zhuyin Fuhao (注音符号) -> Pinyin)
28
+
29
+ from_romanization = "pinyin"
30
+ to_romanization = "zhuyinfuhao"
31
+ pinyin = "ni3 hao3"
32
+ zhuyinfuhao = CJKConverter::ZhRomanization.convert(pinyin, from_romanization, to_romanization)
33
+
34
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "cjk_converter/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "cjk_converter"
7
+ s.version = CjkConverter::VERSION
8
+ s.authors = ["Steven Daniels"]
9
+ s.email = ["stevendaniels88@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{A CJK romanization converter for CJK languages.}
12
+ s.description = %q{A CJK (Chinese, Japanese, and Korean) romanization converter for CJK languages.}
13
+
14
+ s.rubyforge_project = "cjk_converter"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+ end
@@ -0,0 +1,10 @@
1
+ # encoding: utf-8
2
+ require File.expand_path("../cjk_converter/version", __FILE__)
3
+ require File.expand_path("../cjk_converter/pinyin", __FILE__)
4
+ require File.expand_path("../cjk_converter/zh_romanization", __FILE__)
5
+ require File.expand_path("../cjk_converter/kana", __FILE__)
6
+ require File.expand_path("../cjk_converter/fullwidth", __FILE__)
7
+ require File.expand_path("../cjk_converter/string", __FILE__)
8
+
9
+ module CJKConverter
10
+ end
@@ -0,0 +1,83 @@
1
+ # encoding: utf-8
2
+ module CJKConverter
3
+ def self.hi
4
+ end
5
+ FW_HW ={
6
+ "0" => "0",
7
+ "1" => "1",
8
+ "2" => "2",
9
+ "3" => "3",
10
+ "4" => "4",
11
+ "5" => "5",
12
+ "6" => "6",
13
+ "7" => "7",
14
+ "8" => "8",
15
+ "9" => "9",
16
+ "A" => "A",
17
+ "B" => "B",
18
+ "C" => "C",
19
+ "D" => "D",
20
+ "E" => "E",
21
+ "F" => "F",
22
+ "G" => "G",
23
+ "H" => "H",
24
+ "I" => "I",
25
+ "J" => "J",
26
+ "K" => "K",
27
+ "L" => "L",
28
+ "M" => "M",
29
+ "N" => "N",
30
+ "O" => "O",
31
+ "P" => "P",
32
+ "Q" => "Q",
33
+ "R" => "R",
34
+ "S" => "S",
35
+ "T" => "T",
36
+ "U" => "U",
37
+ "V" => "V",
38
+ "W" => "W",
39
+ "X" => "X",
40
+ "Y" => "Y",
41
+ "Z" => "Z",
42
+ "a" => "a",
43
+ "b" => "b",
44
+ "c" => "c",
45
+ "d" => "d",
46
+ "e" => "e",
47
+ "f" => "f",
48
+ "g" => "g",
49
+ "h" => "h",
50
+ "i" => "i",
51
+ "j" => "j",
52
+ "k" => "k",
53
+ "l" => "l",
54
+ "m" => "m",
55
+ "n" => "n",
56
+ "o" => "o",
57
+ "p" => "p",
58
+ "q" => "q",
59
+ "r" => "r",
60
+ "s" => "s",
61
+ "t" => "t",
62
+ "u" => "u",
63
+ "v" => "v",
64
+ "w" => "w",
65
+ "x" => "x",
66
+ "y" => "y",
67
+ "z" => "z",
68
+ "%" => '%',
69
+ "." => '.',
70
+ ':' => ':',
71
+ "#" => '#',
72
+ "$" => "$",
73
+ "&" => "&",
74
+ "+" => "+",
75
+ "-" => "-",
76
+ "/" => "/",
77
+ "\" => '\\',
78
+ '=' => '=',
79
+ ";" => ";",
80
+ "<" => "<",
81
+ ">" => ">"
82
+ }
83
+ end
@@ -0,0 +1,321 @@
1
+ # encoding: utf-8
2
+ module CJKConverter
3
+ KanaToRomaji = {
4
+ #romaji uses the hepburn system
5
+ "あ" => "a",
6
+ "い" => "i",
7
+ "う" => "u",
8
+ "え" => "e",
9
+ "お" => "o",
10
+ "が" => "ga",
11
+ "か" => "ka",
12
+ "ぎ" => "gi",
13
+ "き" => "ki",
14
+ "ぎゃ" => "gya",
15
+ "きゃ" => "kya",
16
+ "ぎゅ" => "gyu",
17
+ "きゅ" => "kyu",
18
+ "ぎょ" => "gyo",
19
+ "きょ" => "kyo",
20
+ "ぐ" => "gu",
21
+ "く" => "ku",
22
+ "げ" => "ge",
23
+ "け" => "ke",
24
+ "ご" => "go",
25
+ "こ" => "ko",
26
+ "さ" => "sa",
27
+ "ざ" => "za",
28
+ "じ" => "ji",
29
+ "し" => "shi",
30
+ "じゃ" => "ja",
31
+ "しゃ" => "sha",
32
+ "じゅ" => "ju",
33
+ "しゅ" => "shu",
34
+ "じょ" => "jo",
35
+ "しょ" => "sho",
36
+ "す" => "su",
37
+ "ず" => "zu",
38
+ "せ" => "se",
39
+ "ぜ" => "ze",
40
+ "そ" => "so",
41
+ "ぞ" => "zo",
42
+ "だ" => "da",
43
+ "た" => "ta",
44
+ "ち" => "chi",
45
+ "ぢ" => "ji",
46
+ "ちゃ" => "cha",
47
+ "ぢゃ" => "ja",
48
+ "ちゅ" => "chu",
49
+ "ぢゅ" => "ju",
50
+ "ちょ" => "cho",
51
+ "ぢょ" => "jo",
52
+ "つ" => "tsu",
53
+ "づ" => "zu",
54
+ "で" => "de",
55
+ "て" => "te",
56
+ "ど" => "do",
57
+ "と" => "to",
58
+ "な" => "na",
59
+ "に" => "ni",
60
+ "にゃ" => "nya",
61
+ "にゅ" => "nyu",
62
+ "にょ" => "nyo",
63
+ "ぬ" => "nu",
64
+ "ね" => "ne",
65
+ "の" => "no",
66
+ "ば" => "ba",
67
+ "は" => "ha",
68
+ "ぱ" => "pa",
69
+ "び" => "bi",
70
+ "ひ" => "hi",
71
+ "ぴ" => "pi",
72
+ "びゃ" => "bya",
73
+ "ひゃ" => "hya",
74
+ "ぴゃ" => "pya",
75
+ "びゅ" => "byu",
76
+ "ひゅ" => "hyu",
77
+ "ぴゅ" => "pyu",
78
+ "びょ" => "byo",
79
+ "ひょ" => "hyo",
80
+ "ぴょ" => "pyo",
81
+ "ぶ" => "bu",
82
+ "ふ" => "fu",
83
+ "ぷ" => "pu",
84
+ "べ" => "be",
85
+ "へ" => "he",
86
+ "ぺ" => "pe",
87
+ "ぼ" => "bo",
88
+ "ほ" => "ho",
89
+ "ぽ" => "po",
90
+ "ま" => "ma",
91
+ "み" => "mi",
92
+ "みゃ" => "mya",
93
+ "みゅ" => "myu",
94
+ "みょ" => "myo",
95
+ "む" => "mu",
96
+ "め" => "me",
97
+ "も" => "mo",
98
+ "や" => "ya",
99
+ "ゆ" => "yu",
100
+ "よ" => "yo",
101
+ "ら" => "ra",
102
+ "り" => "ri",
103
+ "りゃ" => "rya",
104
+ "りゅ" => "ryu",
105
+ "りょ" => "ryo",
106
+ "る" => "ru",
107
+ "れ" => "re",
108
+ "ろ" => "ro",
109
+ "わ" => "wa",
110
+ "ゐ" => "wi",
111
+ "ゑ" => "we",
112
+ "を" => "wo",
113
+ "ん" => "n",
114
+ "ア" => "a",
115
+ "イ" => "i",
116
+ "ウ" => "u",
117
+ "エ" => "e",
118
+ "オ" => "o",
119
+ "ガ" => "ga",
120
+ "カ" => "ka",
121
+ "ギ" => "gi",
122
+ "キ" => "ki",
123
+ "ギャ" => "gya",
124
+ "キャ" => "kya",
125
+ "ギュ" => "gyu",
126
+ "キュ" => "kyu",
127
+ "ギョ" => "gyo",
128
+ "キョ" => "kyo",
129
+ "グ" => "gu",
130
+ "ク" => "ku",
131
+ "ゲ" => "ge",
132
+ "ケ" => "ke",
133
+ "ゴ" => "go",
134
+ "コ" => "ko",
135
+ "サ" => "sa",
136
+ "ザ" => "za",
137
+ "ジ" => "ji",
138
+ "シ" => "shi",
139
+ "ジャ" => "ja",
140
+ "シャ" => "sha",
141
+ "ジュ" => "ju",
142
+ "シュ" => "shu",
143
+ "ジョ" => "jo",
144
+ "ショ" => "sho",
145
+ "ス" => "su",
146
+ "ズ" => "zu",
147
+ "セ" => "se",
148
+ "ゼ" => "ze",
149
+ "ソ" => "so",
150
+ "ゾ" => "zo",
151
+ "ダ" => "da",
152
+ "タ" => "ta",
153
+ "チ" => "chi",
154
+ "ヂ" => "ji",
155
+ "チャ" => "cha",
156
+ "ヂャ" => "ja",
157
+ "チュ" => "chu",
158
+ "ヂュ" => "ju",
159
+ "チョ" => "cho",
160
+ "ヂョ" => "jo",
161
+ "ツ" => "tsu",
162
+ "ヅ" => "zu",
163
+ "デ" => "de",
164
+ "テ" => "te",
165
+ "ド" => "do",
166
+ "ト" => "to",
167
+ "ナ" => "na",
168
+ "ニ" => "ni",
169
+ "ニャ" => "nya",
170
+ "ニュ" => "nyu",
171
+ "ニョ" => "nyo",
172
+ "ヌ" => "nu",
173
+ "ネ" => "ne",
174
+ "ノ" => "no",
175
+ "バ" => "ba",
176
+ "ハ" => "ha",
177
+ "パ" => "pa",
178
+ "ビ" => "bi",
179
+ "ヒ" => "hi",
180
+ "ピ" => "pi",
181
+ "ビャ" => "bya",
182
+ "ヒャ" => "hya",
183
+ "ピャ" => "pya",
184
+ "ビュ" => "byu",
185
+ "ヒュ" => "hyu",
186
+ "ピュ" => "pyu",
187
+ "ビョ" => "byo",
188
+ "ヒョ" => "hyo",
189
+ "ピョ" => "pyo",
190
+ "ブ" => "bu",
191
+ "フ" => "fu",
192
+ "プ" => "pu",
193
+ "ベ" => "be",
194
+ "ヘ" => "he",
195
+ "ペ" => "pe",
196
+ "ボ" => "bo",
197
+ "ホ" => "ho",
198
+ "ポ" => "po",
199
+ "マ" => "ma",
200
+ "ミ" => "mi",
201
+ "ミャ" => "mya",
202
+ "ミュ" => "myu",
203
+ "ミョ" => "myo",
204
+ "ム" => "mu",
205
+ "メ" => "me",
206
+ "モ" => "mo",
207
+ "ヤ" => "ya",
208
+ "ユ" => "yu",
209
+ "ヨ" => "yo",
210
+ "ラ" => "ra",
211
+ "リ" => "ri",
212
+ "リャ" => "rya",
213
+ "リュ" => "ryu",
214
+ "リョ" => "ryo",
215
+ "ル" => "ru",
216
+ "レ" => "re",
217
+ "ロ" => "ro",
218
+ "ワ" => "wa",
219
+ "ヰ" => "wi",
220
+ "ヱ" => "we",
221
+ "ヲ" => "wo",
222
+ "ン" => "n",
223
+ "イィ" => "yi",
224
+ "イェ" => "ye",
225
+ "ヴ" => "vu",
226
+ "ヴァ" => "va",
227
+ "ウァ" => "wa",
228
+ "ヴィ" => "vi",
229
+ "ウィ" => "wi",
230
+ "ヴィェ" => "vye",
231
+ "ウゥ" => "wu",
232
+ "ヴェ" => "ve",
233
+ "ウェ" => "we",
234
+ "ヴォ" => "vo",
235
+ "ウォ" => "wo",
236
+ "ヴャ" => "vya",
237
+ "ヴュ" => "vyu",
238
+ "ウュ" => "wyu",
239
+ "ヴョ" => "vyo",
240
+ "ギェ" => "gye",
241
+ "キェ" => "kye",
242
+ "グァ" => "gwa",
243
+ "クァ" => "kwa",
244
+ "グィ" => "gwi",
245
+ "クィ" => "kwi",
246
+ "グェ" => "gwe",
247
+ "クェ" => "kwe",
248
+ "グォ" => "gwo",
249
+ "クォ" => "kwo",
250
+ "グヮ" => "gwa",
251
+ "クヮ" => "kwa",
252
+ "ジェ" => "je",
253
+ "シェ" => "she",
254
+ "スィ" => "si",
255
+ "ズィ" => "zi",
256
+ "チェ" => "che",
257
+ "ツァ" => "tsa",
258
+ "ツィ" => "tsi",
259
+ "ツェ" => "tse",
260
+ "ツォ" => "tso",
261
+ "ツュ" => "tsyu",
262
+ "ディ" => "di",
263
+ "ティ" => "ti",
264
+ "デュ" => "dyu",
265
+ "テュ" => "tyu",
266
+ "ドゥ" => "du",
267
+ "トゥ" => "tu",
268
+ "ニェ" => "nye",
269
+ "ビェ" => "bye",
270
+ "ヒェ" => "hye",
271
+ "ピェ" => "pye",
272
+ "ファ" => "fa",
273
+ "フィ" => "fi",
274
+ "フィェ" => "fye",
275
+ "フェ" => "fe",
276
+ "フォ" => "fo",
277
+ "フャ" => "fya",
278
+ "フュ" => "fyu",
279
+ "フョ" => "fyo",
280
+ "ホゥ" => "hu",
281
+ "ミェ" => "mye",
282
+ "ミィ" => "myi",
283
+ "ラ" => "la",
284
+ "リ" => "li",
285
+ "リェ" => "rye",
286
+ "ル" => "lu",
287
+ "レ" => "le",
288
+ "ロ" => "lo",
289
+ "ヷ" => "va",
290
+ "ヸ" => "vi",
291
+ "ヹ" => "ve",
292
+ "ヺ" => "vo",
293
+ '1' => "1",
294
+ '2' => "2",
295
+ '3' => "3",
296
+ '4' => "4",
297
+ '5' => "5",
298
+ '6' => "6",
299
+ '7' => "7",
300
+ '8' => "8",
301
+ '9' => "9",
302
+ '0' => "0",
303
+ '・' => "・",
304
+ "ぁ" => "a",
305
+ "ぃ" => "i",
306
+ # "" => "",
307
+ "ぇ" => "e",
308
+ # "" => "",
309
+ "ァ" => "a",
310
+ "ィ" => "i",
311
+ "ゥ" => "u",
312
+ "ェ" => "e",
313
+ "ォ" => "o",
314
+ "〜" => "〜",
315
+ "ワァ" => "waa",
316
+ "トィ" => "ti",
317
+ "ドュ" => "dyu",
318
+ "ブュ" => "vyu",
319
+ }
320
+ end
321
+