mojinizer 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +20 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +82 -0
- data/README.md +330 -0
- data/Rakefile +1 -0
- data/lib/mojinizer/conversion.rb +150 -0
- data/lib/mojinizer/detection.rb +68 -0
- data/lib/mojinizer/romaji_tables.rb +105 -0
- data/lib/mojinizer/version.rb +3 -0
- data/lib/mojinizer.rb +9 -0
- data/mojinizer.gemspec +26 -0
- data/spec/mojinizer_spec.rb +218 -0
- data/spec/spec_helper.rb +26 -0
- metadata +132 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 666b4a71f6827c81a227ff94392be444d10e765d
|
4
|
+
data.tar.gz: 59ac86b46149b7b7848be4639e90cf43aeadbc71
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fa21209d9b45a8354310c5475089f86d7472a63506f165319959f5895fe23fdedd2f6c0ae484fd06c829974db5d2afc5685308677e418c2f13a66a7e086e283f
|
7
|
+
data.tar.gz: 7b7e555a40ea337e4f5f2e26afa0ac0c0ac4bbae140a9196c215544a0f18eb30053c536c82c62ed5ddf434e2945dbaa43374be4cfcb69c5a6619e404dd7351dd
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
Copyright (c) 2013 Christopher Kobayashi
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
|
25
|
+
-------------------------------------------------------------------------------
|
26
|
+
Christopher Kobayashi (ckobayashi at ikayzo.com 2013-03-26)
|
27
|
+
https://github.com/ikayzo/mojinizer
|
28
|
+
|
29
|
+
A gem for converting between hiragana, katakana, and romaji.
|
30
|
+
|
31
|
+
This gem:
|
32
|
+
|
33
|
+
* does _NOT_ convert kanji characters
|
34
|
+
* adds Japanese kana detection and conversion methods to the String class
|
35
|
+
* uses [moji](https://github.com/gimite/moji) to detect and convert kana
|
36
|
+
strings (i.e., hiragana, katakana)
|
37
|
+
* uses code from [romajinizer](https://github.com/joeellis/romajinizer) to
|
38
|
+
convert kana to romaji and vice versa
|
39
|
+
|
40
|
+
Since this gem is not backwards compatible with either gem, following
|
41
|
+
community etiquette we have republished this gem under a different name and
|
42
|
+
started over with the version number.
|
43
|
+
|
44
|
+
Versioning is based on the [Semantic Versioning standard](http://semver.org/)
|
45
|
+
-------------------------------------------------------------------------------
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
LICENSE HISTORY of the Romajinizer gem (originally called kana2rom.rb)
|
50
|
+
-------------------------------------------------------------------------------
|
51
|
+
K.Kodama 2002.06
|
52
|
+
This script is distributed freely in the sense of GNU General Public License.
|
53
|
+
http://www.gnu.org/licenses/gpl.html
|
54
|
+
#
|
55
|
+
-------------------------------------------------------------------------------
|
56
|
+
Paul Chapman (paul [a../t] longweekendmobile 2010-04-01)
|
57
|
+
Repaired script to work with modern Ruby versions (1.86+), added comments,
|
58
|
+
made it support gaijin friendly transliterations!
|
59
|
+
kana2kana was added by Paul 2009-05-12 22:31
|
60
|
+
-------------------------------------------------------------------------------
|
61
|
+
Joe Ellis (joe at squarefour.net 2011-03-09)
|
62
|
+
Added a few more edge cases ('n romaji support),
|
63
|
+
Started gemifications so it can easily be used in any project
|
64
|
+
Added normalization for double nn so that こんばn will still be converted to
|
65
|
+
こんばん properly
|
66
|
+
MIT License
|
67
|
+
-------------------------------------------------------------------------------
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
LICENSE HISTORY of the Moji gem
|
72
|
+
-------------------------------------------------------------------------------
|
73
|
+
ライセンス:
|
74
|
+
|
75
|
+
Public Domainです。煮るなり焼くなりご自由に。
|
76
|
+
|
77
|
+
Literal transation -
|
78
|
+
|
79
|
+
LICENSE:
|
80
|
+
|
81
|
+
This is Public Domain. Boil it, bake it, use it freely.
|
82
|
+
-------------------------------------------------------------------------------
|
data/README.md
ADDED
@@ -0,0 +1,330 @@
|
|
1
|
+
# Mojinizer
|
2
|
+
|
3
|
+
A gem for converting between hiragana, katakana, and romaji.
|
4
|
+
|
5
|
+
This gem:
|
6
|
+
|
7
|
+
* does _NOT_ convert kanji characters
|
8
|
+
* adds Japanese kana detection and conversion methods to the String class
|
9
|
+
* uses [moji](https://github.com/gimite/moji) to detect and convert kana strings (i.e., hiragana, katakana)
|
10
|
+
* uses code from [romajinizer](https://github.com/joeellis/romajinizer) to convert kana to romaji and vice versa
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
## Installation
|
15
|
+
|
16
|
+
Add this line to your application's `Gemfile`:
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
gem 'mojinizer'
|
20
|
+
```
|
21
|
+
|
22
|
+
And then execute:
|
23
|
+
|
24
|
+
```term
|
25
|
+
$ bundle
|
26
|
+
```
|
27
|
+
|
28
|
+
Or install it yourself as:
|
29
|
+
|
30
|
+
```term
|
31
|
+
$ gem install mojinizer
|
32
|
+
```
|
33
|
+
|
34
|
+
## Usage
|
35
|
+
|
36
|
+
Japanese string conversion and detection methods are added to the `String` class. Call these like you would call any `String` object's methods. And you can chain them too.
|
37
|
+
|
38
|
+
### Conversion Methods
|
39
|
+
|
40
|
+
* Hiragana/katakana --> romaji conversion (平仮名/片仮名 --> ロ-マ字 変換)
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
"つくえ".romaji #=> "tsukue"
|
44
|
+
"ツクエ".romaji #=> "tsukue"
|
45
|
+
```
|
46
|
+
|
47
|
+
* Katakana/romaji --> hiragana conversion (片仮名/ロ-マ字 --> 平仮名 変換)
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
"ツクエ".hiragana #=> "つくえ"
|
51
|
+
"tsukue".hiragana #=> "つくえ"
|
52
|
+
```
|
53
|
+
|
54
|
+
* Hiragana/romaji --> katakana conversion (平仮名/ロ-マ字 --> 片仮名 変換)
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
"つくえ".katakana #=> "ツクエ"
|
58
|
+
"tsukue".katakana #=> "ツクエ"
|
59
|
+
```
|
60
|
+
|
61
|
+
* Hiragana --> katakana conversion (平仮名 --> 片仮名 変換)
|
62
|
+
* Katakana --> hiragana conversion (片仮名 --> 平仮名 変換)
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
"つくえ".hira_to_kata #=> "ツクエ"
|
66
|
+
"ツクエ".kata_to_hira #=> "つくえ"
|
67
|
+
```
|
68
|
+
|
69
|
+
* Zenkaku --> hankaku conversion (全角 --> 半角 文字種変換)
|
70
|
+
* Hankaku --> zenkaku conversion (半角 --> 全角 文字種変換)
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
"アロハ".zen_to_han #=> "アロハ"
|
74
|
+
"Aloha!".zen_to_han.should == "Aloha!"
|
75
|
+
"アロハ".han_to_zen #=> "アロハ"
|
76
|
+
"Aloha!".han_to_zen #=> "Aloha!"
|
77
|
+
```
|
78
|
+
|
79
|
+
* Chaining conversion methods
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
"tsukue".katakana.zen_to_han #=> "ツクエ"
|
83
|
+
"ツクエ".han_to_zen.hiragana #=> "つくえ"
|
84
|
+
"ツクエ".han_to_zen.romaji #=> "tsukue"
|
85
|
+
"ツクエ".han_to_zen.romaji.upcase #=> "TSUKUE"
|
86
|
+
"ツクエ".han_to_zen.romaji.upcase.han_to_zen #=> "TSUKUE"
|
87
|
+
```
|
88
|
+
|
89
|
+
|
90
|
+
### Detection Methods
|
91
|
+
|
92
|
+
Used to detect Japanese character types (i.e., hiragana, katakana, kanji, full/half-width etc.). There are two groups of detection methods: methods that check the entire string, and methods that checks if the string contains character(s) of a specified character type.
|
93
|
+
|
94
|
+
If you need to detect other types of Japanese characters such as symbols or just need more flexibility, we expose the `moji` gem's `type?` method.
|
95
|
+
|
96
|
+
#### Check the entire string
|
97
|
+
|
98
|
+
* Is the entire string kana(hiragana/katakana)? (かな/カナ・文字種判定)
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
"アロハ".kana? #=> true
|
102
|
+
"すし".kana? #=> true
|
103
|
+
"Aloha".kana? #=> false
|
104
|
+
"Let's eat すし".kana? #=> false
|
105
|
+
```
|
106
|
+
|
107
|
+
* Is the entire string hiragana? (平仮名・文字種判定)
|
108
|
+
* Is the entire string katakana? (片仮名・文字種判定)
|
109
|
+
|
110
|
+
```ruby
|
111
|
+
"アロハ".katakana? #=> true
|
112
|
+
"すし".katakana? #=> false
|
113
|
+
"アロハ everybody".katakana? #=> false
|
114
|
+
"アロハ".hiragana? #=> false
|
115
|
+
"すし".hiragana? #=> true
|
116
|
+
"Let's eat すし".hiragana? #=> false
|
117
|
+
```
|
118
|
+
|
119
|
+
* Is the entire string kanji? (漢字・文字種判定)
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
"金曜日".kanji? #=> true
|
123
|
+
"金曜日だよ".kanji? #=> false
|
124
|
+
"It's Friday, 金曜日".kanji? #=> false
|
125
|
+
```
|
126
|
+
|
127
|
+
* Is the entire string hankaku? (半角・文字種判定)
|
128
|
+
* Is the entire string zenkaku? (全角・文字種判定)
|
129
|
+
|
130
|
+
```ruby
|
131
|
+
"アロハ".hankaku? #=> true
|
132
|
+
"アロハ".hankaku? #=> false
|
133
|
+
"アロハ".zenkaku? #=> false
|
134
|
+
"アロハ".zenkaku? #=> true
|
135
|
+
```
|
136
|
+
|
137
|
+
* Is the entire string Japanese? (日本語・文字種判定)
|
138
|
+
|
139
|
+
```ruby
|
140
|
+
"アロハ".japanese? #=> true
|
141
|
+
"Let's eat すし".japanese? #=> false
|
142
|
+
```
|
143
|
+
|
144
|
+
#### Check if the string contains
|
145
|
+
|
146
|
+
* Does the string contain kana(hiragana/katakana)? (かな/カナ・文字種判定)
|
147
|
+
|
148
|
+
```ruby
|
149
|
+
"Let's eat すし".contains_kana? #=> true
|
150
|
+
```
|
151
|
+
|
152
|
+
* Does the string contain hiragana? (平仮名・文字種判定)
|
153
|
+
* Does the string contain katakana? (片仮名・文字種判定)
|
154
|
+
|
155
|
+
```ruby
|
156
|
+
"アロハ everybody".contains_katakana? #=> true
|
157
|
+
"Let's eat すし".contains_katakana? #=> false
|
158
|
+
"アロハ everybody".contains_hiragana? #=> false
|
159
|
+
"Let's eat すし".contains_hiragana? #=> true
|
160
|
+
```
|
161
|
+
|
162
|
+
* Does the string contain kanji? (漢字・文字種判定)
|
163
|
+
|
164
|
+
```ruby
|
165
|
+
"金曜日だよ".contains_kanji? #=> true
|
166
|
+
"It's Friday, Friday".contains_kanji? #=> false
|
167
|
+
```
|
168
|
+
|
169
|
+
* Does the string contain hankaku? (半角・文字種判定)
|
170
|
+
* Does the string contain zenkaku? (全角・文字種判定)
|
171
|
+
|
172
|
+
```ruby
|
173
|
+
"アロハ everybody".contains_hankaku? #=> true
|
174
|
+
"Let's eat すし".contains_hankaku? #=> false
|
175
|
+
"アロハ everybody".contains_zenkaku? #=> false
|
176
|
+
"Let's eat すし".contains_zenkaku? #=> true
|
177
|
+
```
|
178
|
+
|
179
|
+
* Does the string contain Japanese? (日本語・文字種判定)
|
180
|
+
|
181
|
+
```ruby
|
182
|
+
"Let's eat すし".contains_japanese? #=> true
|
183
|
+
"It's Friday, Friday".contains_japanese? #=> false
|
184
|
+
```
|
185
|
+
|
186
|
+
### MOAR detection with `moji_type?` and `contains_moji_type?`
|
187
|
+
|
188
|
+
The `moji` gem provides a customizable detection method. Mojinizer exposes this method as `moji_type?` and `contains_moji_type?`, the former checks the entire string and the latter checks if the string contains a character of the requested type. Combine types using the pipe character.
|
189
|
+
|
190
|
+
```ruby
|
191
|
+
"アロハー!".moji_type?(Moji::KATA) #=> false
|
192
|
+
"アロハー!".moji_type?(Moji::KATA | Moji::SYMBOL) #=> true
|
193
|
+
"アロハー!".contains_moji_type?(Moji::ZEN_SYMBOL) #=> true
|
194
|
+
"アロハー!".contains_moji_type?(Moji::HIRA) #=> false
|
195
|
+
```
|
196
|
+
|
197
|
+
Here are the types that can be used or combined (copied from the `moji` gem's source code).
|
198
|
+
|
199
|
+
```
|
200
|
+
==定数:
|
201
|
+
|
202
|
+
以下の定数は、文字種の一番細かい分類です。
|
203
|
+
(({Moji.type})) が返すのは、以下の定数のうちの1つです。
|
204
|
+
|
205
|
+
--- HAN_CONTROL
|
206
|
+
制御文字。
|
207
|
+
--- HAN_ASYMBOL
|
208
|
+
ASCIIに含まれる半角記号。
|
209
|
+
--- HAN_JSYMBOL
|
210
|
+
JISに含まれるがASCIIには含まれない半角記号。
|
211
|
+
--- HAN_NUMBER
|
212
|
+
半角数字。
|
213
|
+
--- HAN_UPPER
|
214
|
+
半角アルファベット大文字。
|
215
|
+
--- HAN_LOWER
|
216
|
+
半角アルファベット小文字。
|
217
|
+
--- HAN_KATA
|
218
|
+
半角カタカナ。
|
219
|
+
--- ZEN_ASYMBOL
|
220
|
+
JISの全角記号のうち、ASCIIに対応する半角記号があるもの。
|
221
|
+
--- ZEN_JSYMBOL
|
222
|
+
JISの全角記号のうち、ASCIIに対応する半角記号がないもの。
|
223
|
+
--- ZEN_NUMBER
|
224
|
+
全角数字。
|
225
|
+
--- ZEN_UPPER
|
226
|
+
全角アルファベット大文字。
|
227
|
+
--- ZEN_LOWER
|
228
|
+
全角アルファベット小文字。
|
229
|
+
--- ZEN_HIRA
|
230
|
+
ひらがな。
|
231
|
+
--- ZEN_KATA
|
232
|
+
全角カタカナ。
|
233
|
+
--- ZEN_GREEK
|
234
|
+
ギリシャ文字。
|
235
|
+
--- ZEN_CYRILLIC
|
236
|
+
キリル文字。
|
237
|
+
--- ZEN_LINE
|
238
|
+
罫線のかけら。
|
239
|
+
--- ZEN_KANJI
|
240
|
+
漢字。
|
241
|
+
|
242
|
+
以下の定数は、上の文字種の組み合わせと別名です。
|
243
|
+
|
244
|
+
--- HAN_SYMBOL
|
245
|
+
JISに含まれる半角記号。(({HAN_ASYMBOL | HAN_JSYMBOL}))
|
246
|
+
--- HAN_ALPHA
|
247
|
+
半角アルファベット。(({HAN_UPPER | HAN_LOWER}))
|
248
|
+
--- HAN_ALNUM
|
249
|
+
半角英数字。(({HAN_ALPHA | HAN_NUMBER}))
|
250
|
+
--- HAN
|
251
|
+
全ての半角文字。(({HAN_CONTROL | HAN_SYMBOL | HAN_ALNUM | HAN_KATA}))
|
252
|
+
--- ZEN_SYMBOL
|
253
|
+
JISに含まれる全角記号。(({ZEN_ASYMBOL | ZEN_JSYMBOL}))
|
254
|
+
--- ZEN_ALPHA
|
255
|
+
全角アルファベット。(({ZEN_UPPER | ZEN_LOWER}))
|
256
|
+
--- ZEN_ALNUM
|
257
|
+
全角英数字。(({ZEN_ALPHA | ZEN_NUMBER}))
|
258
|
+
--- ZEN_KANA
|
259
|
+
全角かな/カナ。(({ZEN_KATA | ZEN_HIRA}))
|
260
|
+
--- ZEN
|
261
|
+
JISに含まれる全ての全角文字。(({ZEN_SYMBOL | ZEN_ALNUM | ZEN_KANA | ZEN_GREEK | ZEN_CYRILLIC | ZEN_LINE | ZEN_KANJI}))
|
262
|
+
--- ASYMBOL
|
263
|
+
ASCIIに含まれる半角記号とその全角版。(({HAN_ASYMBOL | ZEN_ASYMBOL}))
|
264
|
+
--- JSYMBOL
|
265
|
+
JISに含まれるが (({ASYMBOL})) には含まれない全角/半角記号。(({HAN_JSYMBOL | ZEN_JSYMBOL}))
|
266
|
+
--- SYMBOL
|
267
|
+
JISに含まれる全ての全角/半角記号。(({HAN_SYMBOL | ZEN_SYMBOL}))
|
268
|
+
--- NUMBER
|
269
|
+
全角/半角数字。(({HAN_NUMBER | ZEN_NUMBER}))
|
270
|
+
--- UPPER
|
271
|
+
全角/半角アルファベット大文字。(({HAN_UPPER | ZEN_UPPER}))
|
272
|
+
--- LOWER
|
273
|
+
全角/半角アルファベット小文字。(({HAN_LOWER | ZEN_LOWER}))
|
274
|
+
--- ALPHA
|
275
|
+
全角/半角アルファベット。(({HAN_ALPHA | ZEN_ALPHA}))
|
276
|
+
--- ALNUM
|
277
|
+
全角/半角英数字。(({HAN_ALNUM | ZEN_ALNUM}))
|
278
|
+
--- HIRA
|
279
|
+
(({ZEN_HIRA})) の別名。
|
280
|
+
--- KATA
|
281
|
+
全角/半角カタカナ。(({HAN_KATA | ZEN_KATA}))
|
282
|
+
--- KANA
|
283
|
+
全角/半角 かな/カナ。(({KATA | ZEN_HIRA}))
|
284
|
+
--- GREEK
|
285
|
+
(({ZEN_GREEK})) の別名。
|
286
|
+
--- CYRILLIC
|
287
|
+
(({ZEN_CYRILLIC})) の別名。
|
288
|
+
--- LINE
|
289
|
+
(({ZEN_LINE})) の別名。
|
290
|
+
--- KANJI
|
291
|
+
(({ZEN_KANJI})) の別名。
|
292
|
+
--- ALL
|
293
|
+
上記全ての文字。
|
294
|
+
```
|
295
|
+
|
296
|
+
|
297
|
+
## License
|
298
|
+
|
299
|
+
MIT License
|
300
|
+
|
301
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
302
|
+
a copy of this software and associated documentation files (the
|
303
|
+
"Software"), to deal in the Software without restriction, including
|
304
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
305
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
306
|
+
permit persons to whom the Software is furnished to do so, subject to
|
307
|
+
the following conditions:
|
308
|
+
|
309
|
+
The above copyright notice and this permission notice shall be
|
310
|
+
included in all copies or substantial portions of the Software.
|
311
|
+
|
312
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
313
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
314
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
315
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
316
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
317
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
318
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
319
|
+
|
320
|
+
Refer to [LICENSE.txt](LICENSE.txt) file for addtional information.
|
321
|
+
|
322
|
+
|
323
|
+
|
324
|
+
## Contributing
|
325
|
+
|
326
|
+
1. Fork it
|
327
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
328
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
329
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
330
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Mojinizer
|
3
|
+
|
4
|
+
def romaji
|
5
|
+
s=""
|
6
|
+
self.each_char do |c|
|
7
|
+
if (KANA_TO_ROM.key?(c))
|
8
|
+
s += KANA_TO_ROM[c]
|
9
|
+
else
|
10
|
+
s += c
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
s=s.gsub(/(k)([aiueo])(")/,'g\2').gsub(/(s)([aiueo])(")/,'z\2').gsub(/(t)([aiueo])(")/,'d\2')
|
15
|
+
s=s.gsub(/(h)([aiueo])(")/,'b\2').gsub(/([fh])([aiueo])(')/,'p\2').gsub(/u"/,'vu') # [半]濁点゛゜
|
16
|
+
#---------------------------------------------------------
|
17
|
+
s=s.gsub(/\s(xtsu)?\s/,'xtsu') # Remove spaces before/after hanging 'っ'
|
18
|
+
#---------------------------------------------------------
|
19
|
+
sw=s;
|
20
|
+
while nil!=sw.gsub!(/(xtsu)([ckgszjtdhfbpmyrwnv])/,'\2\2') do; s=sw; end # ッカ-->xtsuka-->kka
|
21
|
+
#---------------------------------------------------------
|
22
|
+
# Compound Phoneme Pattern Rollbacks
|
23
|
+
# NB: Uses regex backrefs like "\1y\3" where \1 = 1st capture grp, y='y' and \3 = 3rd capture grp
|
24
|
+
#---------------------------------------------------------
|
25
|
+
s=s.gsub(/( +x)(.*)/,'x\2') # Avoid hanging chisaii moji due to leading spaces
|
26
|
+
s=s.gsub(/(ch)(ixy)([aueo])/,'\1\3') # チョ-->chixyo-->cho
|
27
|
+
s=s.gsub(/([kgszjtdnhfbpmr])(ixy)([auo])/,'\1y\3') # キャ-->kixya-->kya
|
28
|
+
s=s.gsub(/([kgszjtdnhfbpmr])(ix)([ie])/,'\1y\3') # キィ-->kixi-->kyi
|
29
|
+
#---------------------------------------------------------
|
30
|
+
s=s.gsub(/(sh)(y)([aueo])/,'\1\3') # シュ-->shyu-->shu
|
31
|
+
s=s.gsub(/(j)(y)([aueo])/,'\1\3') # ジュ-->jyu-->ju
|
32
|
+
#---------------------------------------------------------
|
33
|
+
s=s.gsub(/([td])(exy)([aueo])/,'\1h\3') # テャ-->texya-->tha
|
34
|
+
s=s.gsub(/([td])(ex)([ie])/,'\1\3') # ティ-->texi-->ti
|
35
|
+
s=s.gsub(/([td])(oxu)/,'\1oo') # ドゥ-->toxu-->too
|
36
|
+
s=s.gsub(/(tsu)(x)([aiueo])/,'ts\3') # ツァ-->tsuxa-->tsa
|
37
|
+
s=s.gsub(/([d])(oxy)/,'\1o\'y') # ドュ-->doxyu-->doyu
|
38
|
+
#---------------------------------------------------------
|
39
|
+
s=s.gsub(/(vux)([aieo])/ ,'v\2') # ヴァヴィヴェヴォ, ヴァ-->vuxa-->va
|
40
|
+
s=s.gsub(/(vuxy)([aueo])/ ,'vy\2') # ヴュ-->vuxyu-->vyu
|
41
|
+
s=s.gsub(/(ixe)/ ,'iye') # イェ-->ixe-->iye
|
42
|
+
s=s.gsub(/(hoxe)/ ,'howe') # ホェ-->hoxe-->howe
|
43
|
+
s=s.gsub(/(fux)([aieo])/ ,'f\2') # ファフィフェフォ, ファ-->fuxa-->fa
|
44
|
+
s=s.gsub(/(fuxy)([aueo])/,'fy\2') # フュ-->fuxyu-->fyu
|
45
|
+
s=s.gsub(/(ux)([ieo])/, 'w\2') # ウァウィウェ, ウァ-->uxa-->wa
|
46
|
+
#---------------------------------------------------------
|
47
|
+
s=s.strip.gsub(/(xtsu)$/,'h!') # Recombine hanging 'っ' followed by EOL
|
48
|
+
s=s.gsub(/([aiueo]?)(\-)/, '\1\1') # Replace boubiki chars and double preceding vowel
|
49
|
+
#---------------------------------------------------------
|
50
|
+
# Cleanup specifically for source strings that contain spaces!
|
51
|
+
s=s.gsub(/( +)([^a-z|A-z])/, '\2') # Remove spaces before any non-alphabetical char
|
52
|
+
s=s.gsub(/(n')/,'n') # ン-->nn-->n
|
53
|
+
s=s.gsub(/(nn)/,'n') # ン-->nn-->n
|
54
|
+
s=s.gsub(/( n)[^a-z|A-Z]?$/,'n') # Fix "n" appearing as separate word
|
55
|
+
s=s.gsub(/\s{2,}/, ' ') # Remove duplicate spaces!
|
56
|
+
#---------------------------------------------------------
|
57
|
+
return s
|
58
|
+
end
|
59
|
+
|
60
|
+
def hiragana
|
61
|
+
self.roma_to_kata.kata_to_hira
|
62
|
+
end
|
63
|
+
|
64
|
+
def katakana
|
65
|
+
self.hira_to_kata.roma_to_kata
|
66
|
+
end
|
67
|
+
|
68
|
+
def hira_to_kata
|
69
|
+
Moji.hira_to_kata(self)
|
70
|
+
end
|
71
|
+
|
72
|
+
def kata_to_hira
|
73
|
+
Moji.kata_to_hira(self)
|
74
|
+
end
|
75
|
+
|
76
|
+
def han_to_zen
|
77
|
+
Moji.han_to_zen(self)
|
78
|
+
end
|
79
|
+
|
80
|
+
def zen_to_han
|
81
|
+
Moji.zen_to_han(self)
|
82
|
+
end
|
83
|
+
|
84
|
+
def roma_to_kata
|
85
|
+
|
86
|
+
result=""
|
87
|
+
word_buffer=[]
|
88
|
+
chars=self.each_char.collect{|c| c}
|
89
|
+
loop do
|
90
|
+
case word_buffer.size
|
91
|
+
##### When 0 characters in the buffer
|
92
|
+
when 0 then
|
93
|
+
if chars.size > 0
|
94
|
+
word_buffer.push(chars.shift)
|
95
|
+
else
|
96
|
+
return result
|
97
|
+
end
|
98
|
+
##### Patterns with 1 roman character
|
99
|
+
when 1 then
|
100
|
+
if word_buffer[0] =~ /[aiueo-]/
|
101
|
+
result += ROM_TO_KATA1[word_buffer[0]]
|
102
|
+
word_buffer = [] # a-->ア
|
103
|
+
elsif word_buffer[0] =~ /[xkcgszjtdnhbpvfmyrlw']/
|
104
|
+
if chars.size > 0
|
105
|
+
word_buffer.push(chars.shift)
|
106
|
+
else
|
107
|
+
return result + (word_buffer[0].gsub(/n/,"ン"))
|
108
|
+
end
|
109
|
+
else
|
110
|
+
result += word_buffer.shift
|
111
|
+
end
|
112
|
+
##### Patterns with 2 roman characters
|
113
|
+
when 2 then
|
114
|
+
if ROM_TO_KATA2.key?(word_buffer.join)
|
115
|
+
result += ROM_TO_KATA2[word_buffer.join]
|
116
|
+
word_buffer = []
|
117
|
+
elsif word_buffer.join =~ /([kgszjtcdnhbpmrl]y)|([stcd]h)|ts|(x[wytk])/ # goto 3
|
118
|
+
if chars.size > 0
|
119
|
+
# Consume next letter from source array
|
120
|
+
word_buffer.push(chars.shift)
|
121
|
+
else
|
122
|
+
return result + (word_buffer.join.gsub(/n/,"ン"))
|
123
|
+
end
|
124
|
+
elsif word_buffer.join == "n'"
|
125
|
+
result += "ン"
|
126
|
+
word_buffer.shift(2) # n'--> ン
|
127
|
+
elsif word_buffer[0] == "n"
|
128
|
+
result += "ン"
|
129
|
+
word_buffer.shift # nk-->ンk
|
130
|
+
elsif word_buffer[0] == word_buffer[1]
|
131
|
+
result += "ッ"
|
132
|
+
word_buffer.shift # kk-->ッk
|
133
|
+
else
|
134
|
+
result += word_buffer.shift;
|
135
|
+
end
|
136
|
+
##### Patterns with 3 roman characters
|
137
|
+
when 3 then
|
138
|
+
if ROM_TO_KATA3.key?(word_buffer.join)
|
139
|
+
result += ROM_TO_KATA3[word_buffer.join]
|
140
|
+
word_buffer=[]
|
141
|
+
elsif word_buffer[0] == "n"
|
142
|
+
result += "ン"
|
143
|
+
word_buffer.shift
|
144
|
+
else
|
145
|
+
result += word_buffer.shift
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Mojinizer
|
3
|
+
def hiragana?
|
4
|
+
moji_type?(Moji::HIRA)
|
5
|
+
end
|
6
|
+
|
7
|
+
def katakana?
|
8
|
+
moji_type?(Moji::KATA)
|
9
|
+
end
|
10
|
+
|
11
|
+
def kana?
|
12
|
+
return (hiragana? || katakana?)
|
13
|
+
end
|
14
|
+
|
15
|
+
def kanji?
|
16
|
+
moji_type?(Moji::KANJI)
|
17
|
+
end
|
18
|
+
|
19
|
+
def hankaku?
|
20
|
+
moji_type?(Moji::HAN_KATA | Moji::HAN_JSYMBOL)
|
21
|
+
end
|
22
|
+
|
23
|
+
def zenkaku?
|
24
|
+
moji_type?(Moji::ZEN)
|
25
|
+
end
|
26
|
+
|
27
|
+
def japanese?
|
28
|
+
moji_type?(Moji::ZEN | Moji::JSYMBOL | Moji::HAN_KATA)
|
29
|
+
end
|
30
|
+
|
31
|
+
def moji_type?(type)
|
32
|
+
self.each_char { |c| return false unless Moji.type?(c, type) }
|
33
|
+
return true
|
34
|
+
end
|
35
|
+
|
36
|
+
def contains_hiragana?
|
37
|
+
contains_moji_type?(Moji::HIRA)
|
38
|
+
end
|
39
|
+
|
40
|
+
def contains_kana?
|
41
|
+
contains_moji_type?(Moji::KANA)
|
42
|
+
end
|
43
|
+
|
44
|
+
def contains_katakana?
|
45
|
+
contains_moji_type?(Moji::KATA)
|
46
|
+
end
|
47
|
+
|
48
|
+
def contains_kanji?
|
49
|
+
contains_moji_type?(Moji::KANJI)
|
50
|
+
end
|
51
|
+
|
52
|
+
def contains_hankaku?
|
53
|
+
contains_moji_type?(Moji::HAN_KATA | Moji::HAN_JSYMBOL)
|
54
|
+
end
|
55
|
+
|
56
|
+
def contains_zenkaku?
|
57
|
+
contains_moji_type?(Moji::ZEN)
|
58
|
+
end
|
59
|
+
|
60
|
+
def contains_japanese?
|
61
|
+
contains_moji_type?(Moji::ZEN | Moji::JSYMBOL | Moji::HAN_KATA)
|
62
|
+
end
|
63
|
+
|
64
|
+
def contains_moji_type?(type)
|
65
|
+
self.each_char { |c| return true if Moji.type?(c, type) }
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Mojinizer
|
3
|
+
|
4
|
+
KANA_TO_ROM={
|
5
|
+
"ア"=>"a", "イ"=>"i", "ウ"=>"u", "エ"=>"e","オ"=>"o",
|
6
|
+
"あ"=>"a", "い"=>"i", "う"=>"u", "え"=>"e","お"=>"o",
|
7
|
+
"カ"=>"ka", "キ"=>"ki", "ク"=>"ku", "ケ"=>"ke", "コ"=>"ko",
|
8
|
+
"か"=>"ka", "き"=>"ki", "く"=>"ku", "け"=>"ke", "こ"=>"ko",
|
9
|
+
"ガ"=>"ga", "ギ"=>"gi", "グ"=>"gu", "ゲ"=>"ge", "ゴ"=>"go",
|
10
|
+
"が"=>"ga", "ぎ"=>"gi", "ぐ"=>"gu", "げ"=>"ge", "ご"=>"go",
|
11
|
+
"サ"=>"sa", "シ"=>"si", "ス"=>"su", "セ"=>"se", "ソ"=>"so",
|
12
|
+
"さ"=>"sa", "し"=>"shi","す"=>"su", "せ"=>"se", "そ"=>"so",
|
13
|
+
"ザ"=>"za", "ジ"=>"ji", "ズ"=>"zu", "ゼ"=>"ze", "ゾ"=>"zo",
|
14
|
+
"ざ"=>"za", "じ"=>"ji", "ず"=>"zu", "ぜ"=>"ze", "ぞ"=>"zo",
|
15
|
+
"タ"=>"ta", "チ"=>"chi","ツ"=>"tsu","テ"=>"te", "ト"=>"to",
|
16
|
+
"た"=>"ta", "ち"=>"chi","つ"=>"tsu","て"=>"te", "と"=>"to",
|
17
|
+
"ダ"=>"da", "ヂ"=>"dji","ヅ"=>"dzu","デ"=>"de", "ド"=>"do",
|
18
|
+
"だ"=>"da", "ぢ"=>"dji","づ"=>"dzu","で"=>"de", "ど"=>"do",
|
19
|
+
"ナ"=>"na", "ニ"=>"ni", "ヌ"=>"nu", "ネ"=>"ne", "ノ"=>"no",
|
20
|
+
"な"=>"na", "に"=>"ni", "ぬ"=>"nu", "ね"=>"ne", "の"=>"no",
|
21
|
+
"ハ"=>"ha", "ヒ"=>"hi", "フ"=>"fu", "ヘ"=>"he", "ホ"=>"ho",
|
22
|
+
"は"=>"ha", "ひ"=>"hi", "ふ"=>"fu", "へ"=>"he", "ほ"=>"ho",
|
23
|
+
"バ"=>"ba", "ビ"=>"bi", "ブ"=>"bu", "ベ"=>"be", "ボ"=>"bo",
|
24
|
+
"ば"=>"ba", "び"=>"bi", "ぶ"=>"bu", "べ"=>"be", "ぼ"=>"bo",
|
25
|
+
"パ"=>"pa", "ピ"=>"pi", "プ"=>"pu", "ペ"=>"pe", "ポ"=>"po",
|
26
|
+
"ぱ"=>"pa", "ぴ"=>"pi", "ぷ"=>"pu", "ぺ"=>"pe", "ぽ"=>"po",
|
27
|
+
"マ"=>"ma", "ミ"=>"mi", "ム"=>"mu", "メ"=>"me", "モ"=>"mo",
|
28
|
+
"ま"=>"ma", "み"=>"mi", "む"=>"mu", "め"=>"me", "も"=>"mo",
|
29
|
+
"ヤ"=>"ya", "ユ"=>"yu", "ヨ"=>"yo",
|
30
|
+
"や"=>"ya", "ゆ"=>"yu", "よ"=>"yo",
|
31
|
+
"ラ"=>"ra", "リ"=>"ri", "ル"=>"ru","レ"=>"re","ロ"=>"ro",
|
32
|
+
"ら"=>"ra", "り"=>"ri", "る"=>"ru","れ"=>"re","ろ"=>"ro",
|
33
|
+
"ワ"=>"wa", "ヰ"=>"wi", "ヱ"=>"we", "ヲ"=>"wo", "ン"=>"nn",
|
34
|
+
"わ"=>"wa", "ゐ"=>"wi", "ゑ"=>"we", "を"=>"wo", "ん"=>"nn",
|
35
|
+
"ァ"=>"xa", "ィ"=>"xi", "ゥ"=>"xu", "ェ"=>"xe", "ォ"=>"xo",
|
36
|
+
"ぁ"=>"xa", "ぃ"=>"xi", "ぅ"=>"xu", "ぇ"=>"xe", "ぉ"=>"xo",
|
37
|
+
"ッ"=>"xtsu","ャ"=>"xya", "ュ"=>"xyu", "ョ"=>"xyo",
|
38
|
+
"っ"=>"xtsu","ゃ"=>"xya", "ゅ"=>"xyu", "ょ"=>"xyo",
|
39
|
+
"ヴ"=>"vu", "ヵ"=>"xka","ヶ"=>"ga","ヮ"=>"xwa",
|
40
|
+
"ゎ"=>"xwa",
|
41
|
+
"ー"=>"-", "−"=>"-", "゛"=>'"', "゜"=>"'", "、"=>",", "。"=>".",
|
42
|
+
":"=>":", " " => " ", "@" => "@", "(" => "(", ")" => ")",
|
43
|
+
" " => " "
|
44
|
+
}
|
45
|
+
|
46
|
+
KANA_TO_ROM2={
|
47
|
+
"てぃ" => "ti", "でぃ" => "di"
|
48
|
+
}
|
49
|
+
# 1 character romaji patterns
|
50
|
+
ROM_TO_KATA1={
|
51
|
+
"a"=>"ア", "i"=>"イ", "u"=>"ウ", "e"=>"エ", "o"=>"オ", "-"=>"ー"
|
52
|
+
}
|
53
|
+
|
54
|
+
# 2 character romaji patterns
|
55
|
+
ROM_TO_KATA2={
|
56
|
+
"xa"=>"ァ", "xi"=>"ィ", "xu"=>"ゥ", "xe"=>"ェ", "xo"=>"ォ",
|
57
|
+
"ka"=>"カ", "ki"=>"キ", "ku"=>"ク", "ke"=>"ケ", "ko"=>"コ",
|
58
|
+
"ca"=>"カ", "cu"=>"ク", "co"=>"コ",
|
59
|
+
"ga"=>"ガ", "gi"=>"ギ", "gu"=>"グ", "ge"=>"ゲ", "go"=>"ゴ",
|
60
|
+
"sa"=>"サ", "si"=>"シ", "su"=>"ス", "se"=>"セ", "so"=>"ソ",
|
61
|
+
"za"=>"ザ", "zi"=>"ジ", "zu"=>"ズ", "ze"=>"ゼ", "zo"=>"ゾ",
|
62
|
+
"ja"=>"ジャ","ji"=>"ジ", "ju"=>"ジュ","je"=>"ジェ","jo"=>"ジョ",
|
63
|
+
"ta"=>"タ", "ti"=>"チ", "tsu"=>"ツ", "te"=>"テ", "to"=>"ト",
|
64
|
+
"da"=>"ダ", "di"=>"ヂ", "du"=>"ヅ", "de"=>"デ", "do"=>"ド",
|
65
|
+
"na"=>"ナ", "ni"=>"ニ", "nu"=>"ヌ", "ne"=>"ネ", "no"=>"ノ",
|
66
|
+
"ha"=>"ハ", "hi"=>"ヒ", "hu"=>"フ", "he"=>"ヘ", "ho"=>"ホ",
|
67
|
+
"ba"=>"バ", "bi"=>"ビ", "bu"=>"ブ", "be"=>"ベ", "bo"=>"ボ",
|
68
|
+
"pa"=>"パ", "pi"=>"ピ", "pu"=>"プ", "pe"=>"ペ", "po"=>"ポ",
|
69
|
+
"va"=>"ヴァ","vi"=>"ヴィ","vu"=>"ヴ", "ve"=>"ヴェ","vo"=>"ヴォ",
|
70
|
+
"fa"=>"ファ","fi"=>"フィ","fu"=>"フ", "fe"=>"フェ","fo"=>"フォ",
|
71
|
+
"ma"=>"マ", "mi"=>"ミ", "mu"=>"ム", "me"=>"メ", "mo"=>"モ",
|
72
|
+
"ya"=>"ヤ", "yi"=>"イ", "yu"=>"ユ", "ye"=>"イェ", "yo"=>"ヨ",
|
73
|
+
"ra"=>"ラ", "ri"=>"リ", "ru"=>"ル", "re"=>"レ", "ro"=>"ロ",
|
74
|
+
"la"=>"ラ", "li"=>"リ", "lu"=>"ル", "le"=>"レ", "lo"=>"ロ",
|
75
|
+
"wa"=>"ワ", "wi"=>"ヰ", "wu"=>"ウ", "we"=>"ヱ", "wo"=>"ヲ",
|
76
|
+
"nn"=>"ン"
|
77
|
+
}
|
78
|
+
|
79
|
+
# 3 character romaji patterns
|
80
|
+
ROM_TO_KATA3={
|
81
|
+
"tsu"=>"ツ",
|
82
|
+
"xka"=>"ヵ", "xke"=>"ヶ",
|
83
|
+
"xwa"=>"ヮ", "xtsu"=>"ッ", "xya"=>"ャ", "xyu"=>"ュ", "xyo"=>"ョ",
|
84
|
+
"kya"=>"キャ", "kyi"=>"キィ", "kyu"=>"キュ", "kye"=>"キェ", "kyo"=>"キョ",
|
85
|
+
"gya"=>"ギャ", "gyi"=>"ギィ", "gyu"=>"ギュ", "gye"=>"ギェ", "gyo"=>"ギョ",
|
86
|
+
"sya"=>"シャ", "syi"=>"シィ", "syu"=>"シュ", "sye"=>"シェ", "syo"=>"ショ",
|
87
|
+
"sha"=>"シャ", "shi"=>"シ", "shu"=>"シュ", "she"=>"シェ", "sho"=>"ショ",
|
88
|
+
"zya"=>"ジャ", "zyi"=>"ジィ", "zyu"=>"ジュ", "zye"=>"ジェ", "zyo"=>"ジョ",
|
89
|
+
"jya"=>"ジャ", "jyi"=>"ジィ", "jyu"=>"ジュ", "jye"=>"ジェ", "jyo"=>"ジョ",
|
90
|
+
"tya"=>"チャ", "tyi"=>"チィ", "tyu"=>"チュ", "tye"=>"チェ", "tyo"=>"チョ",
|
91
|
+
"cya"=>"チャ", "cyi"=>"チィ", "cyu"=>"チュ", "cye"=>"チェ", "cyo"=>"チョ",
|
92
|
+
"cha"=>"チャ", "chi"=>"チ", "chu"=>"チュ", "che"=>"チェ", "cho"=>"チョ",
|
93
|
+
"tha"=>"テャ", "thi"=>"ティ", "thu"=>"テュ", "the"=>"テェ", "tho"=>"テョ",
|
94
|
+
"dya"=>"ヂャ", "dyi"=>"ヂィ", "dyu"=>"ヂュ", "dye"=>"ヂェ", "dyo"=>"ヂョ",
|
95
|
+
"dha"=>"デャ", "dhi"=>"ディ", "dhu"=>"デュ", "dhe"=>"デェ", "dho"=>"デョ",
|
96
|
+
"nya"=>"ニャ", "nyi"=>"ニィ", "nyu"=>"ニュ", "nye"=>"ニェ", "nyo"=>"ニョ",
|
97
|
+
"hya"=>"ヒャ", "hyi"=>"ヒィ", "hyu"=>"ヒュ", "hye"=>"ヒェ", "hyo"=>"ヒョ",
|
98
|
+
"bya"=>"ビャ", "byi"=>"ビィ", "byu"=>"ビュ", "bye"=>"ビェ", "byo"=>"ビョ",
|
99
|
+
"pya"=>"ピャ", "pyi"=>"ピィ", "pyu"=>"ピュ", "pye"=>"ピェ", "pyo"=>"ピョ",
|
100
|
+
"mya"=>"ミャ", "myi"=>"ミィ", "myu"=>"ミュ", "mye"=>"ミェ", "myo"=>"ミョ",
|
101
|
+
"rya"=>"リャ", "ryi"=>"リィ", "ryu"=>"リュ", "rye"=>"リェ", "ryo"=>"リョ",
|
102
|
+
"lya"=>"リャ", "lyi"=>"リィ", "lyu"=>"リュ", "lye"=>"リェ", "lyo"=>"リョ"
|
103
|
+
}
|
104
|
+
|
105
|
+
end
|
data/lib/mojinizer.rb
ADDED
data/mojinizer.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'mojinizer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "mojinizer"
|
8
|
+
spec.version = Mojinizer::VERSION
|
9
|
+
spec.authors = ["Ikayzo"]
|
10
|
+
spec.email = ["ckobayashi@ikayzo.com"]
|
11
|
+
spec.description = %q{Combines the functionality of the Moji and Romajinizer gems. And adds Japanese kana detection and conversion methods to the String class.}
|
12
|
+
spec.summary = %q{A gem for converting between hiragana, katakana, and romaji. This gem does not convert kanji characters.}
|
13
|
+
spec.homepage = "https://github.com/ikayzo/mojinizer"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_runtime_dependency "moji", "~> 1.6"
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
23
|
+
spec.add_development_dependency "rake", "~> 10.0.4"
|
24
|
+
spec.add_development_dependency "rspec", "~> 2.13.0"
|
25
|
+
spec.add_development_dependency "simplecov", "~> 0.7.1"
|
26
|
+
end
|
@@ -0,0 +1,218 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Mojinizer do
|
5
|
+
context "should convert" do
|
6
|
+
it "romaji or katakana to hiragana properly" do
|
7
|
+
"tsukue".hiragana.should == "つくえ"
|
8
|
+
"kinnyoubi".hiragana.should == "きんようび"
|
9
|
+
"kin'youbi".hiragana.should == "きんようび"
|
10
|
+
"konnya".hiragana.should == "こんや"
|
11
|
+
"konnnichi".hiragana.should == "こんにち"
|
12
|
+
"kaetta".hiragana.should == "かえった"
|
13
|
+
"ツクエ".hiragana.should == "つくえ"
|
14
|
+
"こんばn".hiragana.should == "こんばん"
|
15
|
+
"konnbann".hiragana.should == "こんばん"
|
16
|
+
"".hiragana.should == ""
|
17
|
+
end
|
18
|
+
|
19
|
+
it "romaji or hiragana to katakana properly" do
|
20
|
+
"tsukue".katakana.should == "ツクエ"
|
21
|
+
"kinnyoubi".katakana.should == "キンヨウビ"
|
22
|
+
"kin'youbi".katakana.should == "キンヨウビ"
|
23
|
+
"konnya".katakana.should == "コンヤ"
|
24
|
+
"konnnichi".katakana.should == "コンニチ"
|
25
|
+
"kaetta".katakana.should == "カエッタ"
|
26
|
+
"つくえ".katakana.should == "ツクエ"
|
27
|
+
"行きます".katakana.should == "行キマス"
|
28
|
+
"こんばn".katakana.should == "コンバン"
|
29
|
+
"konnbann".katakana.should == "コンバン"
|
30
|
+
"aloha元気?".katakana.should == "アロハ元気?"
|
31
|
+
"アロハ、げんき?".katakana.should == "アロハ、ゲンキ?"
|
32
|
+
"aloha まはろ".katakana.should == "アロハ マハロ"
|
33
|
+
"".katakana.should == ""
|
34
|
+
end
|
35
|
+
|
36
|
+
it "kana to romaji properly" do
|
37
|
+
"つくえ".romaji.should == "tsukue"
|
38
|
+
"きんようび".romaji.should == "kinyoubi"
|
39
|
+
"こんや".romaji.should == "konya"
|
40
|
+
"こんにち".romaji.should == "konnichi"
|
41
|
+
"ツクエ".romaji.should == "tsukue"
|
42
|
+
"キンヨウビ".romaji.should == "kinyoubi"
|
43
|
+
"コンヤ".romaji.should == "konya"
|
44
|
+
"コンニチ".romaji.should == "konnichi"
|
45
|
+
"today is きんようび".romaji.should == "today is kinyoubi"
|
46
|
+
"today is キンヨウビ".romaji.should == "today is kinyoubi"
|
47
|
+
"".romaji.should == ""
|
48
|
+
end
|
49
|
+
|
50
|
+
it "romaji or kana to hankaku properly" do
|
51
|
+
"あろは".zen_to_han.should == "あろは"
|
52
|
+
"アロハ!".zen_to_han.should == "アロハ!"
|
53
|
+
"aloha!".zen_to_han.should == "aloha!"
|
54
|
+
"ALOHA!".zen_to_han.should == "ALOHA!"
|
55
|
+
"".zen_to_han.should == ""
|
56
|
+
end
|
57
|
+
|
58
|
+
it "romaji or kana to zenkaku properly" do
|
59
|
+
"アロハ!".han_to_zen.should == "アロハ!"
|
60
|
+
"あろは!".han_to_zen.should == "あろは!"
|
61
|
+
"aloha!".han_to_zen.should == "aloha!"
|
62
|
+
"ALOHA!".han_to_zen.should == "ALOHA!"
|
63
|
+
"".han_to_zen.should == ""
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
context "should be able to tell if a string contains" do
|
68
|
+
it "kana" do
|
69
|
+
"行きます".contains_kana?.should == true
|
70
|
+
"abcdefg!".contains_kana?.should == false
|
71
|
+
"アロハeverybody".contains_kana?.should == true
|
72
|
+
"あろはeverybody".contains_kana?.should == true
|
73
|
+
"アロハeverybody".contains_kana?.should == true
|
74
|
+
end
|
75
|
+
|
76
|
+
it "hiragana" do
|
77
|
+
"行きます".contains_hiragana?.should == true
|
78
|
+
"abcdefg!".contains_hiragana?.should == false
|
79
|
+
"アロハeverybody".contains_hiragana?.should == false
|
80
|
+
"あろはeverybody".contains_hiragana?.should == true
|
81
|
+
"アロハeverybody".contains_hiragana?.should == false
|
82
|
+
end
|
83
|
+
|
84
|
+
it "katakana" do
|
85
|
+
"行きます".contains_katakana?.should == false
|
86
|
+
"abcdefg!".contains_katakana?.should == false
|
87
|
+
"アロハeverybody".contains_katakana?.should == true
|
88
|
+
"アロハeverybody".contains_katakana?.should == true
|
89
|
+
end
|
90
|
+
|
91
|
+
it "kanji" do
|
92
|
+
"行きます".contains_kanji?.should == true
|
93
|
+
"abcdefg!".contains_kanji?.should == false
|
94
|
+
"アロハeverybody".contains_kanji?.should == false
|
95
|
+
"アロハeverybody".contains_kanji?.should == false
|
96
|
+
end
|
97
|
+
|
98
|
+
it "hankaku" do
|
99
|
+
"あ".contains_hankaku?.should == false
|
100
|
+
"ア".contains_hankaku?.should == false
|
101
|
+
"a".contains_hankaku?.should == false
|
102
|
+
"A".contains_hankaku?.should == false
|
103
|
+
"ALOHA".contains_hankaku?.should == false
|
104
|
+
"アろは".contains_hankaku?.should == false
|
105
|
+
"aloha".contains_hankaku?.should == false
|
106
|
+
"aloは".contains_hankaku?.should == false
|
107
|
+
"ア".contains_hankaku?.should == true
|
108
|
+
"アロハ".contains_hankaku?.should == true
|
109
|
+
"アロハeverybody".contains_hankaku?.should == true
|
110
|
+
end
|
111
|
+
|
112
|
+
it "zenkaku" do
|
113
|
+
"あ".contains_zenkaku?.should == true
|
114
|
+
"ア".contains_zenkaku?.should == true
|
115
|
+
"a".contains_zenkaku?.should == false
|
116
|
+
"A".contains_zenkaku?.should == true
|
117
|
+
"ALOHA".contains_zenkaku?.should == true
|
118
|
+
"アろは".contains_zenkaku?.should == true
|
119
|
+
"aloha".contains_zenkaku?.should == false
|
120
|
+
"aloは".contains_zenkaku?.should == true
|
121
|
+
"ア".contains_zenkaku?.should == false
|
122
|
+
"アロハ".contains_zenkaku?.should == false
|
123
|
+
"アロハeverybody".contains_zenkaku?.should == false
|
124
|
+
end
|
125
|
+
|
126
|
+
it "Japanese characters" do
|
127
|
+
"行きます".contains_japanese?.should == true
|
128
|
+
"abcdefg!".contains_japanese?.should == false
|
129
|
+
"アロハeverybody".contains_japanese?.should == true
|
130
|
+
"everybody、行きます".contains_japanese?.should == true
|
131
|
+
"aloha〜!".contains_japanese?.should == true
|
132
|
+
"アロハeverybody".contains_japanese?.should == true
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
context "should be able to tell if a character or the entire string is" do
|
137
|
+
it "kana" do
|
138
|
+
"す".kana?.should == true
|
139
|
+
"すし".kana?.should == true
|
140
|
+
"アロハ".kana?.should == true
|
141
|
+
"行".kana?.should == false
|
142
|
+
"sushi".kana?.should == false
|
143
|
+
"アロハeverybody".kana?.should == false
|
144
|
+
end
|
145
|
+
|
146
|
+
it "hiragana" do
|
147
|
+
"あ".hiragana?.should == true
|
148
|
+
"ア".hiragana?.should == false
|
149
|
+
"a".hiragana?.should == false
|
150
|
+
"あろは".hiragana?.should == true
|
151
|
+
"あロは".hiragana?.should == false
|
152
|
+
end
|
153
|
+
|
154
|
+
it "katakana" do
|
155
|
+
"あ".katakana?.should == false
|
156
|
+
"ア".katakana?.should == true
|
157
|
+
"a".katakana?.should == false
|
158
|
+
"アろは".katakana?.should == false
|
159
|
+
"アロハ".katakana?.should == true
|
160
|
+
"アロは".katakana?.should == false
|
161
|
+
end
|
162
|
+
|
163
|
+
it "kanji" do
|
164
|
+
"行".kanji?.should == true
|
165
|
+
"あ".kanji?.should == false
|
166
|
+
"ア".kanji?.should == false
|
167
|
+
"〜".kanji?.should == false
|
168
|
+
"a".kanji?.should == false
|
169
|
+
"アロハ".kanji?.should == false
|
170
|
+
"ALOHA".kanji?.should == false
|
171
|
+
"金曜日".kanji?.should == true
|
172
|
+
"金曜日だ〜".kanji?.should == false
|
173
|
+
"金曜日FRIDAY".kanji?.should == false
|
174
|
+
end
|
175
|
+
|
176
|
+
it "hankaku" do
|
177
|
+
"あ".hankaku?.should == false
|
178
|
+
"ア".hankaku?.should == false
|
179
|
+
"a".hankaku?.should == false
|
180
|
+
"A".hankaku?.should == false
|
181
|
+
"ALOHA".hankaku?.should == false
|
182
|
+
"アろは".hankaku?.should == false
|
183
|
+
"aloha".hankaku?.should == false
|
184
|
+
"aloは".hankaku?.should == false
|
185
|
+
"ア".hankaku?.should == true
|
186
|
+
"アロハ".hankaku?.should == true
|
187
|
+
"。".hankaku?.should == true
|
188
|
+
end
|
189
|
+
|
190
|
+
it "zenkaku" do
|
191
|
+
"あ".zenkaku?.should == true
|
192
|
+
"ア".zenkaku?.should == true
|
193
|
+
"a".zenkaku?.should == false
|
194
|
+
"A".zenkaku?.should == true
|
195
|
+
"ALOHA".zenkaku?.should == true
|
196
|
+
"アろは".zenkaku?.should == true
|
197
|
+
"アロハ".zenkaku?.should == true
|
198
|
+
"aloha".zenkaku?.should == false
|
199
|
+
"aloは".zenkaku?.should == false
|
200
|
+
"ア".zenkaku?.should == false
|
201
|
+
"アロハ".zenkaku?.should == false
|
202
|
+
end
|
203
|
+
|
204
|
+
it "Japanese characters" do
|
205
|
+
"あ".japanese?.should == true
|
206
|
+
"ア".japanese?.should == true
|
207
|
+
"a".japanese?.should == false
|
208
|
+
"アろは".japanese?.should == true
|
209
|
+
"アロハ".japanese?.should == true
|
210
|
+
"aloha".japanese?.should == false
|
211
|
+
"aloは".japanese?.should == false
|
212
|
+
"ア".japanese?.should == true
|
213
|
+
"アロハ".japanese?.should == true
|
214
|
+
"アロハeverybody".japanese?.should == false
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
|
8
|
+
require 'simplecov'
|
9
|
+
SimpleCov.start
|
10
|
+
|
11
|
+
require 'rubygems'
|
12
|
+
require 'bundler/setup'
|
13
|
+
|
14
|
+
require 'mojinizer'
|
15
|
+
|
16
|
+
RSpec.configure do |config|
|
17
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
18
|
+
config.run_all_when_everything_filtered = true
|
19
|
+
config.filter_run :focus
|
20
|
+
|
21
|
+
# Run specs in random order to surface order dependencies. If you find an
|
22
|
+
# order dependency and want to debug it, you can fix the order by providing
|
23
|
+
# the seed, which is printed after each run.
|
24
|
+
# --seed 1234
|
25
|
+
config.order = 'random'
|
26
|
+
end
|
metadata
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mojinizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ikayzo
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: moji
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.3'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.3'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 10.0.4
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 10.0.4
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.13.0
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.13.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: simplecov
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.7.1
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.7.1
|
83
|
+
description: Combines the functionality of the Moji and Romajinizer gems. And adds
|
84
|
+
Japanese kana detection and conversion methods to the String class.
|
85
|
+
email:
|
86
|
+
- ckobayashi@ikayzo.com
|
87
|
+
executables: []
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- .gitignore
|
92
|
+
- .rspec
|
93
|
+
- Gemfile
|
94
|
+
- LICENSE.txt
|
95
|
+
- README.md
|
96
|
+
- Rakefile
|
97
|
+
- lib/mojinizer.rb
|
98
|
+
- lib/mojinizer/conversion.rb
|
99
|
+
- lib/mojinizer/detection.rb
|
100
|
+
- lib/mojinizer/romaji_tables.rb
|
101
|
+
- lib/mojinizer/version.rb
|
102
|
+
- mojinizer.gemspec
|
103
|
+
- spec/mojinizer_spec.rb
|
104
|
+
- spec/spec_helper.rb
|
105
|
+
homepage: https://github.com/ikayzo/mojinizer
|
106
|
+
licenses:
|
107
|
+
- MIT
|
108
|
+
metadata: {}
|
109
|
+
post_install_message:
|
110
|
+
rdoc_options: []
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - '>='
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
requirements: []
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 2.0.3
|
126
|
+
signing_key:
|
127
|
+
specification_version: 4
|
128
|
+
summary: A gem for converting between hiragana, katakana, and romaji. This gem does
|
129
|
+
not convert kanji characters.
|
130
|
+
test_files:
|
131
|
+
- spec/mojinizer_spec.rb
|
132
|
+
- spec/spec_helper.rb
|