cjk_converter 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in cjk_converter.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ CJK Converter: a romanization converter for CJK languages.
2
+ ====================================
3
+
4
+ ## DESCRIPTION
5
+
6
+ A CJK (Chinese, Japanese, and Korean) romanization converter for CJK languages.
7
+
8
+ CJK Converter extends the String class with various convenience
9
+ functions.
10
+
11
+ ##INSTALLATION
12
+
13
+ Install as a gem
14
+
15
+ $ [sudo] gem install cjk_converter
16
+
17
+ ## Usage
18
+
19
+ Converting Pinyin with numbered tone marks to properly formatted pinyin
20
+ (e.g. "ni3 hao3" => "nǐ hǎo")
21
+
22
+ pinyin_with_numbers = "ni3 hao3"
23
+ pinyin = CJKConverter::Pinyin.convert pinyin_with_numbers
24
+ puts pinyin #=> "nǐ hǎo"
25
+
26
+ Converting between different Chinese romanization systems:
27
+ (e.g. Zhuyin Fuhao (注音符号) -> Pinyin)
28
+
29
+ from_romanization = "pinyin"
30
+ to_romanization = "zhuyinfuhao"
31
+ pinyin = "ni3 hao3"
32
+ zhuyinfuhao = CJKConverter::ZhRomanization.convert(pinyin, from_romanization, to_romanization)
33
+
34
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "cjk_converter/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "cjk_converter"
7
+ s.version = CjkConverter::VERSION
8
+ s.authors = ["Steven Daniels"]
9
+ s.email = ["stevendaniels88@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{A CJK romanization converter for CJK languages.}
12
+ s.description = %q{A CJK (Chinese, Japanese, and Korean) romanization converter for CJK languages.}
13
+
14
+ s.rubyforge_project = "cjk_converter"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+ end
@@ -0,0 +1,10 @@
1
+ # encoding: utf-8
2
+ require File.expand_path("../cjk_converter/version", __FILE__)
3
+ require File.expand_path("../cjk_converter/pinyin", __FILE__)
4
+ require File.expand_path("../cjk_converter/zh_romanization", __FILE__)
5
+ require File.expand_path("../cjk_converter/kana", __FILE__)
6
+ require File.expand_path("../cjk_converter/fullwidth", __FILE__)
7
+ require File.expand_path("../cjk_converter/string", __FILE__)
8
+
9
+ module CJKConverter
10
+ end
@@ -0,0 +1,83 @@
1
+ # encoding: utf-8
2
+ module CJKConverter
3
+ def self.hi
4
+ end
5
+ FW_HW ={
6
+ "0" => "0",
7
+ "1" => "1",
8
+ "2" => "2",
9
+ "3" => "3",
10
+ "4" => "4",
11
+ "5" => "5",
12
+ "6" => "6",
13
+ "7" => "7",
14
+ "8" => "8",
15
+ "9" => "9",
16
+ "A" => "A",
17
+ "B" => "B",
18
+ "C" => "C",
19
+ "D" => "D",
20
+ "E" => "E",
21
+ "F" => "F",
22
+ "G" => "G",
23
+ "H" => "H",
24
+ "I" => "I",
25
+ "J" => "J",
26
+ "K" => "K",
27
+ "L" => "L",
28
+ "M" => "M",
29
+ "N" => "N",
30
+ "O" => "O",
31
+ "P" => "P",
32
+ "Q" => "Q",
33
+ "R" => "R",
34
+ "S" => "S",
35
+ "T" => "T",
36
+ "U" => "U",
37
+ "V" => "V",
38
+ "W" => "W",
39
+ "X" => "X",
40
+ "Y" => "Y",
41
+ "Z" => "Z",
42
+ "a" => "a",
43
+ "b" => "b",
44
+ "c" => "c",
45
+ "d" => "d",
46
+ "e" => "e",
47
+ "f" => "f",
48
+ "g" => "g",
49
+ "h" => "h",
50
+ "i" => "i",
51
+ "j" => "j",
52
+ "k" => "k",
53
+ "l" => "l",
54
+ "m" => "m",
55
+ "n" => "n",
56
+ "o" => "o",
57
+ "p" => "p",
58
+ "q" => "q",
59
+ "r" => "r",
60
+ "s" => "s",
61
+ "t" => "t",
62
+ "u" => "u",
63
+ "v" => "v",
64
+ "w" => "w",
65
+ "x" => "x",
66
+ "y" => "y",
67
+ "z" => "z",
68
+ "%" => '%',
69
+ "." => '.',
70
+ ':' => ':',
71
+ "#" => '#',
72
+ "$" => "$",
73
+ "&" => "&",
74
+ "+" => "+",
75
+ "-" => "-",
76
+ "/" => "/",
77
+ "\" => '\\',
78
+ '=' => '=',
79
+ ";" => ";",
80
+ "<" => "<",
81
+ ">" => ">"
82
+ }
83
+ end
@@ -0,0 +1,321 @@
1
+ # encoding: utf-8
2
+ module CJKConverter
3
+ KanaToRomaji = {
4
+ #romaji uses the hepburn system
5
+ "あ" => "a",
6
+ "い" => "i",
7
+ "う" => "u",
8
+ "え" => "e",
9
+ "お" => "o",
10
+ "が" => "ga",
11
+ "か" => "ka",
12
+ "ぎ" => "gi",
13
+ "き" => "ki",
14
+ "ぎゃ" => "gya",
15
+ "きゃ" => "kya",
16
+ "ぎゅ" => "gyu",
17
+ "きゅ" => "kyu",
18
+ "ぎょ" => "gyo",
19
+ "きょ" => "kyo",
20
+ "ぐ" => "gu",
21
+ "く" => "ku",
22
+ "げ" => "ge",
23
+ "け" => "ke",
24
+ "ご" => "go",
25
+ "こ" => "ko",
26
+ "さ" => "sa",
27
+ "ざ" => "za",
28
+ "じ" => "ji",
29
+ "し" => "shi",
30
+ "じゃ" => "ja",
31
+ "しゃ" => "sha",
32
+ "じゅ" => "ju",
33
+ "しゅ" => "shu",
34
+ "じょ" => "jo",
35
+ "しょ" => "sho",
36
+ "す" => "su",
37
+ "ず" => "zu",
38
+ "せ" => "se",
39
+ "ぜ" => "ze",
40
+ "そ" => "so",
41
+ "ぞ" => "zo",
42
+ "だ" => "da",
43
+ "た" => "ta",
44
+ "ち" => "chi",
45
+ "ぢ" => "ji",
46
+ "ちゃ" => "cha",
47
+ "ぢゃ" => "ja",
48
+ "ちゅ" => "chu",
49
+ "ぢゅ" => "ju",
50
+ "ちょ" => "cho",
51
+ "ぢょ" => "jo",
52
+ "つ" => "tsu",
53
+ "づ" => "zu",
54
+ "で" => "de",
55
+ "て" => "te",
56
+ "ど" => "do",
57
+ "と" => "to",
58
+ "な" => "na",
59
+ "に" => "ni",
60
+ "にゃ" => "nya",
61
+ "にゅ" => "nyu",
62
+ "にょ" => "nyo",
63
+ "ぬ" => "nu",
64
+ "ね" => "ne",
65
+ "の" => "no",
66
+ "ば" => "ba",
67
+ "は" => "ha",
68
+ "ぱ" => "pa",
69
+ "び" => "bi",
70
+ "ひ" => "hi",
71
+ "ぴ" => "pi",
72
+ "びゃ" => "bya",
73
+ "ひゃ" => "hya",
74
+ "ぴゃ" => "pya",
75
+ "びゅ" => "byu",
76
+ "ひゅ" => "hyu",
77
+ "ぴゅ" => "pyu",
78
+ "びょ" => "byo",
79
+ "ひょ" => "hyo",
80
+ "ぴょ" => "pyo",
81
+ "ぶ" => "bu",
82
+ "ふ" => "fu",
83
+ "ぷ" => "pu",
84
+ "べ" => "be",
85
+ "へ" => "he",
86
+ "ぺ" => "pe",
87
+ "ぼ" => "bo",
88
+ "ほ" => "ho",
89
+ "ぽ" => "po",
90
+ "ま" => "ma",
91
+ "み" => "mi",
92
+ "みゃ" => "mya",
93
+ "みゅ" => "myu",
94
+ "みょ" => "myo",
95
+ "む" => "mu",
96
+ "め" => "me",
97
+ "も" => "mo",
98
+ "や" => "ya",
99
+ "ゆ" => "yu",
100
+ "よ" => "yo",
101
+ "ら" => "ra",
102
+ "り" => "ri",
103
+ "りゃ" => "rya",
104
+ "りゅ" => "ryu",
105
+ "りょ" => "ryo",
106
+ "る" => "ru",
107
+ "れ" => "re",
108
+ "ろ" => "ro",
109
+ "わ" => "wa",
110
+ "ゐ" => "wi",
111
+ "ゑ" => "we",
112
+ "を" => "wo",
113
+ "ん" => "n",
114
+ "ア" => "a",
115
+ "イ" => "i",
116
+ "ウ" => "u",
117
+ "エ" => "e",
118
+ "オ" => "o",
119
+ "ガ" => "ga",
120
+ "カ" => "ka",
121
+ "ギ" => "gi",
122
+ "キ" => "ki",
123
+ "ギャ" => "gya",
124
+ "キャ" => "kya",
125
+ "ギュ" => "gyu",
126
+ "キュ" => "kyu",
127
+ "ギョ" => "gyo",
128
+ "キョ" => "kyo",
129
+ "グ" => "gu",
130
+ "ク" => "ku",
131
+ "ゲ" => "ge",
132
+ "ケ" => "ke",
133
+ "ゴ" => "go",
134
+ "コ" => "ko",
135
+ "サ" => "sa",
136
+ "ザ" => "za",
137
+ "ジ" => "ji",
138
+ "シ" => "shi",
139
+ "ジャ" => "ja",
140
+ "シャ" => "sha",
141
+ "ジュ" => "ju",
142
+ "シュ" => "shu",
143
+ "ジョ" => "jo",
144
+ "ショ" => "sho",
145
+ "ス" => "su",
146
+ "ズ" => "zu",
147
+ "セ" => "se",
148
+ "ゼ" => "ze",
149
+ "ソ" => "so",
150
+ "ゾ" => "zo",
151
+ "ダ" => "da",
152
+ "タ" => "ta",
153
+ "チ" => "chi",
154
+ "ヂ" => "ji",
155
+ "チャ" => "cha",
156
+ "ヂャ" => "ja",
157
+ "チュ" => "chu",
158
+ "ヂュ" => "ju",
159
+ "チョ" => "cho",
160
+ "ヂョ" => "jo",
161
+ "ツ" => "tsu",
162
+ "ヅ" => "zu",
163
+ "デ" => "de",
164
+ "テ" => "te",
165
+ "ド" => "do",
166
+ "ト" => "to",
167
+ "ナ" => "na",
168
+ "ニ" => "ni",
169
+ "ニャ" => "nya",
170
+ "ニュ" => "nyu",
171
+ "ニョ" => "nyo",
172
+ "ヌ" => "nu",
173
+ "ネ" => "ne",
174
+ "ノ" => "no",
175
+ "バ" => "ba",
176
+ "ハ" => "ha",
177
+ "パ" => "pa",
178
+ "ビ" => "bi",
179
+ "ヒ" => "hi",
180
+ "ピ" => "pi",
181
+ "ビャ" => "bya",
182
+ "ヒャ" => "hya",
183
+ "ピャ" => "pya",
184
+ "ビュ" => "byu",
185
+ "ヒュ" => "hyu",
186
+ "ピュ" => "pyu",
187
+ "ビョ" => "byo",
188
+ "ヒョ" => "hyo",
189
+ "ピョ" => "pyo",
190
+ "ブ" => "bu",
191
+ "フ" => "fu",
192
+ "プ" => "pu",
193
+ "ベ" => "be",
194
+ "ヘ" => "he",
195
+ "ペ" => "pe",
196
+ "ボ" => "bo",
197
+ "ホ" => "ho",
198
+ "ポ" => "po",
199
+ "マ" => "ma",
200
+ "ミ" => "mi",
201
+ "ミャ" => "mya",
202
+ "ミュ" => "myu",
203
+ "ミョ" => "myo",
204
+ "ム" => "mu",
205
+ "メ" => "me",
206
+ "モ" => "mo",
207
+ "ヤ" => "ya",
208
+ "ユ" => "yu",
209
+ "ヨ" => "yo",
210
+ "ラ" => "ra",
211
+ "リ" => "ri",
212
+ "リャ" => "rya",
213
+ "リュ" => "ryu",
214
+ "リョ" => "ryo",
215
+ "ル" => "ru",
216
+ "レ" => "re",
217
+ "ロ" => "ro",
218
+ "ワ" => "wa",
219
+ "ヰ" => "wi",
220
+ "ヱ" => "we",
221
+ "ヲ" => "wo",
222
+ "ン" => "n",
223
+ "イィ" => "yi",
224
+ "イェ" => "ye",
225
+ "ヴ" => "vu",
226
+ "ヴァ" => "va",
227
+ "ウァ" => "wa",
228
+ "ヴィ" => "vi",
229
+ "ウィ" => "wi",
230
+ "ヴィェ" => "vye",
231
+ "ウゥ" => "wu",
232
+ "ヴェ" => "ve",
233
+ "ウェ" => "we",
234
+ "ヴォ" => "vo",
235
+ "ウォ" => "wo",
236
+ "ヴャ" => "vya",
237
+ "ヴュ" => "vyu",
238
+ "ウュ" => "wyu",
239
+ "ヴョ" => "vyo",
240
+ "ギェ" => "gye",
241
+ "キェ" => "kye",
242
+ "グァ" => "gwa",
243
+ "クァ" => "kwa",
244
+ "グィ" => "gwi",
245
+ "クィ" => "kwi",
246
+ "グェ" => "gwe",
247
+ "クェ" => "kwe",
248
+ "グォ" => "gwo",
249
+ "クォ" => "kwo",
250
+ "グヮ" => "gwa",
251
+ "クヮ" => "kwa",
252
+ "ジェ" => "je",
253
+ "シェ" => "she",
254
+ "スィ" => "si",
255
+ "ズィ" => "zi",
256
+ "チェ" => "che",
257
+ "ツァ" => "tsa",
258
+ "ツィ" => "tsi",
259
+ "ツェ" => "tse",
260
+ "ツォ" => "tso",
261
+ "ツュ" => "tsyu",
262
+ "ディ" => "di",
263
+ "ティ" => "ti",
264
+ "デュ" => "dyu",
265
+ "テュ" => "tyu",
266
+ "ドゥ" => "du",
267
+ "トゥ" => "tu",
268
+ "ニェ" => "nye",
269
+ "ビェ" => "bye",
270
+ "ヒェ" => "hye",
271
+ "ピェ" => "pye",
272
+ "ファ" => "fa",
273
+ "フィ" => "fi",
274
+ "フィェ" => "fye",
275
+ "フェ" => "fe",
276
+ "フォ" => "fo",
277
+ "フャ" => "fya",
278
+ "フュ" => "fyu",
279
+ "フョ" => "fyo",
280
+ "ホゥ" => "hu",
281
+ "ミェ" => "mye",
282
+ "ミィ" => "myi",
283
+ "ラ" => "la",
284
+ "リ" => "li",
285
+ "リェ" => "rye",
286
+ "ル" => "lu",
287
+ "レ" => "le",
288
+ "ロ" => "lo",
289
+ "ヷ" => "va",
290
+ "ヸ" => "vi",
291
+ "ヹ" => "ve",
292
+ "ヺ" => "vo",
293
+ '1' => "1",
294
+ '2' => "2",
295
+ '3' => "3",
296
+ '4' => "4",
297
+ '5' => "5",
298
+ '6' => "6",
299
+ '7' => "7",
300
+ '8' => "8",
301
+ '9' => "9",
302
+ '0' => "0",
303
+ '・' => "・",
304
+ "ぁ" => "a",
305
+ "ぃ" => "i",
306
+ # "" => "",
307
+ "ぇ" => "e",
308
+ # "" => "",
309
+ "ァ" => "a",
310
+ "ィ" => "i",
311
+ "ゥ" => "u",
312
+ "ェ" => "e",
313
+ "ォ" => "o",
314
+ "〜" => "〜",
315
+ "ワァ" => "waa",
316
+ "トィ" => "ti",
317
+ "ドュ" => "dyu",
318
+ "ブュ" => "vyu",
319
+ }
320
+ end
321
+