mojinizer 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +20 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +82 -0
- data/README.md +330 -0
- data/Rakefile +1 -0
- data/lib/mojinizer/conversion.rb +150 -0
- data/lib/mojinizer/detection.rb +68 -0
- data/lib/mojinizer/romaji_tables.rb +105 -0
- data/lib/mojinizer/version.rb +3 -0
- data/lib/mojinizer.rb +9 -0
- data/mojinizer.gemspec +26 -0
- data/spec/mojinizer_spec.rb +218 -0
- data/spec/spec_helper.rb +26 -0
- metadata +132 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 666b4a71f6827c81a227ff94392be444d10e765d
|
4
|
+
data.tar.gz: 59ac86b46149b7b7848be4639e90cf43aeadbc71
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fa21209d9b45a8354310c5475089f86d7472a63506f165319959f5895fe23fdedd2f6c0ae484fd06c829974db5d2afc5685308677e418c2f13a66a7e086e283f
|
7
|
+
data.tar.gz: 7b7e555a40ea337e4f5f2e26afa0ac0c0ac4bbae140a9196c215544a0f18eb30053c536c82c62ed5ddf434e2945dbaa43374be4cfcb69c5a6619e404dd7351dd
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
Copyright (c) 2013 Christopher Kobayashi
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
|
25
|
+
-------------------------------------------------------------------------------
|
26
|
+
Christopher Kobayashi (ckobayashi at ikayzo.com 2013-03-26)
|
27
|
+
https://github.com/ikayzo/mojinizer
|
28
|
+
|
29
|
+
A gem for converting between hiragana, katakana, and romaji.
|
30
|
+
|
31
|
+
This gem:
|
32
|
+
|
33
|
+
* does _NOT_ convert kanji characters
|
34
|
+
* adds Japanese kana detection and conversion methods to the String class
|
35
|
+
* uses [moji](https://github.com/gimite/moji) to detect and convert kana
|
36
|
+
strings (i.e., hiragana, katakana)
|
37
|
+
* uses code from [romajinizer](https://github.com/joeellis/romajinizer) to
|
38
|
+
convert kana to romaji and vice versa
|
39
|
+
|
40
|
+
Since this gem is not backwards compatible with either gem, following
|
41
|
+
community etiquette we have republished this gem under a different name and
|
42
|
+
started over with the version number.
|
43
|
+
|
44
|
+
Versioning is based on the [Semantic Versioning standard](http://semver.org/)
|
45
|
+
-------------------------------------------------------------------------------
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
LICENSE HISTORY of the Romajinizer gem (originally called kana2rom.rb)
|
50
|
+
-------------------------------------------------------------------------------
|
51
|
+
K.Kodama 2002.06
|
52
|
+
This script is distributed freely in the sense of GNU General Public License.
|
53
|
+
http://www.gnu.org/licenses/gpl.html
|
54
|
+
#
|
55
|
+
-------------------------------------------------------------------------------
|
56
|
+
Paul Chapman (paul [a../t] longweekendmobile 2010-04-01)
|
57
|
+
Repaired script to work with modern Ruby versions (1.86+), added comments,
|
58
|
+
made it support gaijin friendly transliterations!
|
59
|
+
kana2kana was added by Paul 2009-05-12 22:31
|
60
|
+
-------------------------------------------------------------------------------
|
61
|
+
Joe Ellis (joe at squarefour.net 2011-03-09)
|
62
|
+
Added a few more edge cases ('n romaji support),
|
63
|
+
Started gemifications so it can easily be used in any project
|
64
|
+
Added normalization for double nn so that こんばn will still be converted to
|
65
|
+
こんばん properly
|
66
|
+
MIT License
|
67
|
+
-------------------------------------------------------------------------------
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
LICENSE HISTORY of the Moji gem
|
72
|
+
-------------------------------------------------------------------------------
|
73
|
+
ライセンス:
|
74
|
+
|
75
|
+
Public Domainです。煮るなり焼くなりご自由に。
|
76
|
+
|
77
|
+
Literal transation -
|
78
|
+
|
79
|
+
LICENSE:
|
80
|
+
|
81
|
+
This is Public Domain. Boil it, bake it, use it freely.
|
82
|
+
-------------------------------------------------------------------------------
|
data/README.md
ADDED
@@ -0,0 +1,330 @@
|
|
1
|
+
# Mojinizer
|
2
|
+
|
3
|
+
A gem for converting between hiragana, katakana, and romaji.
|
4
|
+
|
5
|
+
This gem:
|
6
|
+
|
7
|
+
* does _NOT_ convert kanji characters
|
8
|
+
* adds Japanese kana detection and conversion methods to the String class
|
9
|
+
* uses [moji](https://github.com/gimite/moji) to detect and convert kana strings (i.e., hiragana, katakana)
|
10
|
+
* uses code from [romajinizer](https://github.com/joeellis/romajinizer) to convert kana to romaji and vice versa
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
## Installation
|
15
|
+
|
16
|
+
Add this line to your application's `Gemfile`:
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
gem 'mojinizer'
|
20
|
+
```
|
21
|
+
|
22
|
+
And then execute:
|
23
|
+
|
24
|
+
```term
|
25
|
+
$ bundle
|
26
|
+
```
|
27
|
+
|
28
|
+
Or install it yourself as:
|
29
|
+
|
30
|
+
```term
|
31
|
+
$ gem install mojinizer
|
32
|
+
```
|
33
|
+
|
34
|
+
## Usage
|
35
|
+
|
36
|
+
Japanese string conversion and detection methods are added to the `String` class. Call these like you would call any `String` object's methods. And you can chain them too.
|
37
|
+
|
38
|
+
### Conversion Methods
|
39
|
+
|
40
|
+
* Hiragana/katakana --> romaji conversion (平仮名/片仮名 --> ロ-マ字 変換)
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
"つくえ".romaji #=> "tsukue"
|
44
|
+
"ツクエ".romaji #=> "tsukue"
|
45
|
+
```
|
46
|
+
|
47
|
+
* Katakana/romaji --> hiragana conversion (片仮名/ロ-マ字 --> 平仮名 変換)
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
"ツクエ".hiragana #=> "つくえ"
|
51
|
+
"tsukue".hiragana #=> "つくえ"
|
52
|
+
```
|
53
|
+
|
54
|
+
* Hiragana/romaji --> katakana conversion (平仮名/ロ-マ字 --> 片仮名 変換)
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
"つくえ".katakana #=> "ツクエ"
|
58
|
+
"tsukue".katakana #=> "ツクエ"
|
59
|
+
```
|
60
|
+
|
61
|
+
* Hiragana --> katakana conversion (平仮名 --> 片仮名 変換)
|
62
|
+
* Katakana --> hiragana conversion (片仮名 --> 平仮名 変換)
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
"つくえ".hira_to_kata #=> "ツクエ"
|
66
|
+
"ツクエ".kata_to_hira #=> "つくえ"
|
67
|
+
```
|
68
|
+
|
69
|
+
* Zenkaku --> hankaku conversion (全角 --> 半角 文字種変換)
|
70
|
+
* Hankaku --> zenkaku conversion (半角 --> 全角 文字種変換)
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
"アロハ".zen_to_han #=> "アロハ"
|
74
|
+
"Aloha!".zen_to_han.should == "Aloha!"
|
75
|
+
"アロハ".han_to_zen #=> "アロハ"
|
76
|
+
"Aloha!".han_to_zen #=> "Aloha!"
|
77
|
+
```
|
78
|
+
|
79
|
+
* Chaining conversion methods
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
"tsukue".katakana.zen_to_han #=> "ツクエ"
|
83
|
+
"ツクエ".han_to_zen.hiragana #=> "つくえ"
|
84
|
+
"ツクエ".han_to_zen.romaji #=> "tsukue"
|
85
|
+
"ツクエ".han_to_zen.romaji.upcase #=> "TSUKUE"
|
86
|
+
"ツクエ".han_to_zen.romaji.upcase.han_to_zen #=> "TSUKUE"
|
87
|
+
```
|
88
|
+
|
89
|
+
|
90
|
+
### Detection Methods
|
91
|
+
|
92
|
+
Used to detect Japanese character types (i.e., hiragana, katakana, kanji, full/half-width etc.). There are two groups of detection methods: methods that check the entire string, and methods that checks if the string contains character(s) of a specified character type.
|
93
|
+
|
94
|
+
If you need to detect other types of Japanese characters such as symbols or just need more flexibility, we expose the `moji` gem's `type?` method.
|
95
|
+
|
96
|
+
#### Check the entire string
|
97
|
+
|
98
|
+
* Is the entire string kana(hiragana/katakana)? (かな/カナ・文字種判定)
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
"アロハ".kana? #=> true
|
102
|
+
"すし".kana? #=> true
|
103
|
+
"Aloha".kana? #=> false
|
104
|
+
"Let's eat すし".kana? #=> false
|
105
|
+
```
|
106
|
+
|
107
|
+
* Is the entire string hiragana? (平仮名・文字種判定)
|
108
|
+
* Is the entire string katakana? (片仮名・文字種判定)
|
109
|
+
|
110
|
+
```ruby
|
111
|
+
"アロハ".katakana? #=> true
|
112
|
+
"すし".katakana? #=> false
|
113
|
+
"アロハ everybody".katakana? #=> false
|
114
|
+
"アロハ".hiragana? #=> false
|
115
|
+
"すし".hiragana? #=> true
|
116
|
+
"Let's eat すし".hiragana? #=> false
|
117
|
+
```
|
118
|
+
|
119
|
+
* Is the entire string kanji? (漢字・文字種判定)
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
"金曜日".kanji? #=> true
|
123
|
+
"金曜日だよ".kanji? #=> false
|
124
|
+
"It's Friday, 金曜日".kanji? #=> false
|
125
|
+
```
|
126
|
+
|
127
|
+
* Is the entire string hankaku? (半角・文字種判定)
|
128
|
+
* Is the entire string zenkaku? (全角・文字種判定)
|
129
|
+
|
130
|
+
```ruby
|
131
|
+
"アロハ".hankaku? #=> true
|
132
|
+
"アロハ".hankaku? #=> false
|
133
|
+
"アロハ".zenkaku? #=> false
|
134
|
+
"アロハ".zenkaku? #=> true
|
135
|
+
```
|
136
|
+
|
137
|
+
* Is the entire string Japanese? (日本語・文字種判定)
|
138
|
+
|
139
|
+
```ruby
|
140
|
+
"アロハ".japanese? #=> true
|
141
|
+
"Let's eat すし".japanese? #=> false
|
142
|
+
```
|
143
|
+
|
144
|
+
#### Check if the string contains
|
145
|
+
|
146
|
+
* Does the string contain kana(hiragana/katakana)? (かな/カナ・文字種判定)
|
147
|
+
|
148
|
+
```ruby
|
149
|
+
"Let's eat すし".contains_kana? #=> true
|
150
|
+
```
|
151
|
+
|
152
|
+
* Does the string contain hiragana? (平仮名・文字種判定)
|
153
|
+
* Does the string contain katakana? (片仮名・文字種判定)
|
154
|
+
|
155
|
+
```ruby
|
156
|
+
"アロハ everybody".contains_katakana? #=> true
|
157
|
+
"Let's eat すし".contains_katakana? #=> false
|
158
|
+
"アロハ everybody".contains_hiragana? #=> false
|
159
|
+
"Let's eat すし".contains_hiragana? #=> true
|
160
|
+
```
|
161
|
+
|
162
|
+
* Does the string contain kanji? (漢字・文字種判定)
|
163
|
+
|
164
|
+
```ruby
|
165
|
+
"金曜日だよ".contains_kanji? #=> true
|
166
|
+
"It's Friday, Friday".contains_kanji? #=> false
|
167
|
+
```
|
168
|
+
|
169
|
+
* Does the string contain hankaku? (半角・文字種判定)
|
170
|
+
* Does the string contain zenkaku? (全角・文字種判定)
|
171
|
+
|
172
|
+
```ruby
|
173
|
+
"アロハ everybody".contains_hankaku? #=> true
|
174
|
+
"Let's eat すし".contains_hankaku? #=> false
|
175
|
+
"アロハ everybody".contains_zenkaku? #=> false
|
176
|
+
"Let's eat すし".contains_zenkaku? #=> true
|
177
|
+
```
|
178
|
+
|
179
|
+
* Does the string contain Japanese? (日本語・文字種判定)
|
180
|
+
|
181
|
+
```ruby
|
182
|
+
"Let's eat すし".contains_japanese? #=> true
|
183
|
+
"It's Friday, Friday".contains_japanese? #=> false
|
184
|
+
```
|
185
|
+
|
186
|
+
### MOAR detection with `moji_type?` and `contains_moji_type?`
|
187
|
+
|
188
|
+
The `moji` gem provides a customizable detection method. Mojinizer exposes this method as `moji_type?` and `contains_moji_type?`, the former checks the entire string and the latter checks if the string contains a character of the requested type. Combine types using the pipe character.
|
189
|
+
|
190
|
+
```ruby
|
191
|
+
"アロハー!".moji_type?(Moji::KATA) #=> false
|
192
|
+
"アロハー!".moji_type?(Moji::KATA | Moji::SYMBOL) #=> true
|
193
|
+
"アロハー!".contains_moji_type?(Moji::ZEN_SYMBOL) #=> true
|
194
|
+
"アロハー!".contains_moji_type?(Moji::HIRA) #=> false
|
195
|
+
```
|
196
|
+
|
197
|
+
Here are the types that can be used or combined (copied from the `moji` gem's source code).
|
198
|
+
|
199
|
+
```
|
200
|
+
==定数:
|
201
|
+
|
202
|
+
以下の定数は、文字種の一番細かい分類です。
|
203
|
+
(({Moji.type})) が返すのは、以下の定数のうちの1つです。
|
204
|
+
|
205
|
+
--- HAN_CONTROL
|
206
|
+
制御文字。
|
207
|
+
--- HAN_ASYMBOL
|
208
|
+
ASCIIに含まれる半角記号。
|
209
|
+
--- HAN_JSYMBOL
|
210
|
+
JISに含まれるがASCIIには含まれない半角記号。
|
211
|
+
--- HAN_NUMBER
|
212
|
+
半角数字。
|
213
|
+
--- HAN_UPPER
|
214
|
+
半角アルファベット大文字。
|
215
|
+
--- HAN_LOWER
|
216
|
+
半角アルファベット小文字。
|
217
|
+
--- HAN_KATA
|
218
|
+
半角カタカナ。
|
219
|
+
--- ZEN_ASYMBOL
|
220
|
+
JISの全角記号のうち、ASCIIに対応する半角記号があるもの。
|
221
|
+
--- ZEN_JSYMBOL
|
222
|
+
JISの全角記号のうち、ASCIIに対応する半角記号がないもの。
|
223
|
+
--- ZEN_NUMBER
|
224
|
+
全角数字。
|
225
|
+
--- ZEN_UPPER
|
226
|
+
全角アルファベット大文字。
|
227
|
+
--- ZEN_LOWER
|
228
|
+
全角アルファベット小文字。
|
229
|
+
--- ZEN_HIRA
|
230
|
+
ひらがな。
|
231
|
+
--- ZEN_KATA
|
232
|
+
全角カタカナ。
|
233
|
+
--- ZEN_GREEK
|
234
|
+
ギリシャ文字。
|
235
|
+
--- ZEN_CYRILLIC
|
236
|
+
キリル文字。
|
237
|
+
--- ZEN_LINE
|
238
|
+
罫線のかけら。
|
239
|
+
--- ZEN_KANJI
|
240
|
+
漢字。
|
241
|
+
|
242
|
+
以下の定数は、上の文字種の組み合わせと別名です。
|
243
|
+
|
244
|
+
--- HAN_SYMBOL
|
245
|
+
JISに含まれる半角記号。(({HAN_ASYMBOL | HAN_JSYMBOL}))
|
246
|
+
--- HAN_ALPHA
|
247
|
+
半角アルファベット。(({HAN_UPPER | HAN_LOWER}))
|
248
|
+
--- HAN_ALNUM
|
249
|
+
半角英数字。(({HAN_ALPHA | HAN_NUMBER}))
|
250
|
+
--- HAN
|
251
|
+
全ての半角文字。(({HAN_CONTROL | HAN_SYMBOL | HAN_ALNUM | HAN_KATA}))
|
252
|
+
--- ZEN_SYMBOL
|
253
|
+
JISに含まれる全角記号。(({ZEN_ASYMBOL | ZEN_JSYMBOL}))
|
254
|
+
--- ZEN_ALPHA
|
255
|
+
全角アルファベット。(({ZEN_UPPER | ZEN_LOWER}))
|
256
|
+
--- ZEN_ALNUM
|
257
|
+
全角英数字。(({ZEN_ALPHA | ZEN_NUMBER}))
|
258
|
+
--- ZEN_KANA
|
259
|
+
全角かな/カナ。(({ZEN_KATA | ZEN_HIRA}))
|
260
|
+
--- ZEN
|
261
|
+
JISに含まれる全ての全角文字。(({ZEN_SYMBOL | ZEN_ALNUM | ZEN_KANA | ZEN_GREEK | ZEN_CYRILLIC | ZEN_LINE | ZEN_KANJI}))
|
262
|
+
--- ASYMBOL
|
263
|
+
ASCIIに含まれる半角記号とその全角版。(({HAN_ASYMBOL | ZEN_ASYMBOL}))
|
264
|
+
--- JSYMBOL
|
265
|
+
JISに含まれるが (({ASYMBOL})) には含まれない全角/半角記号。(({HAN_JSYMBOL | ZEN_JSYMBOL}))
|
266
|
+
--- SYMBOL
|
267
|
+
JISに含まれる全ての全角/半角記号。(({HAN_SYMBOL | ZEN_SYMBOL}))
|
268
|
+
--- NUMBER
|
269
|
+
全角/半角数字。(({HAN_NUMBER | ZEN_NUMBER}))
|
270
|
+
--- UPPER
|
271
|
+
全角/半角アルファベット大文字。(({HAN_UPPER | ZEN_UPPER}))
|
272
|
+
--- LOWER
|
273
|
+
全角/半角アルファベット小文字。(({HAN_LOWER | ZEN_LOWER}))
|
274
|
+
--- ALPHA
|
275
|
+
全角/半角アルファベット。(({HAN_ALPHA | ZEN_ALPHA}))
|
276
|
+
--- ALNUM
|
277
|
+
全角/半角英数字。(({HAN_ALNUM | ZEN_ALNUM}))
|
278
|
+
--- HIRA
|
279
|
+
(({ZEN_HIRA})) の別名。
|
280
|
+
--- KATA
|
281
|
+
全角/半角カタカナ。(({HAN_KATA | ZEN_KATA}))
|
282
|
+
--- KANA
|
283
|
+
全角/半角 かな/カナ。(({KATA | ZEN_HIRA}))
|
284
|
+
--- GREEK
|
285
|
+
(({ZEN_GREEK})) の別名。
|
286
|
+
--- CYRILLIC
|
287
|
+
(({ZEN_CYRILLIC})) の別名。
|
288
|
+
--- LINE
|
289
|
+
(({ZEN_LINE})) の別名。
|
290
|
+
--- KANJI
|
291
|
+
(({ZEN_KANJI})) の別名。
|
292
|
+
--- ALL
|
293
|
+
上記全ての文字。
|
294
|
+
```
|
295
|
+
|
296
|
+
|
297
|
+
## License
|
298
|
+
|
299
|
+
MIT License
|
300
|
+
|
301
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
302
|
+
a copy of this software and associated documentation files (the
|
303
|
+
"Software"), to deal in the Software without restriction, including
|
304
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
305
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
306
|
+
permit persons to whom the Software is furnished to do so, subject to
|
307
|
+
the following conditions:
|
308
|
+
|
309
|
+
The above copyright notice and this permission notice shall be
|
310
|
+
included in all copies or substantial portions of the Software.
|
311
|
+
|
312
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
313
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
314
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
315
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
316
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
317
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
318
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
319
|
+
|
320
|
+
Refer to [LICENSE.txt](LICENSE.txt) file for addtional information.
|
321
|
+
|
322
|
+
|
323
|
+
|
324
|
+
## Contributing
|
325
|
+
|
326
|
+
1. Fork it
|
327
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
328
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
329
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
330
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Mojinizer
|
3
|
+
|
4
|
+
def romaji
|
5
|
+
s=""
|
6
|
+
self.each_char do |c|
|
7
|
+
if (KANA_TO_ROM.key?(c))
|
8
|
+
s += KANA_TO_ROM[c]
|
9
|
+
else
|
10
|
+
s += c
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
s=s.gsub(/(k)([aiueo])(")/,'g\2').gsub(/(s)([aiueo])(")/,'z\2').gsub(/(t)([aiueo])(")/,'d\2')
|
15
|
+
s=s.gsub(/(h)([aiueo])(")/,'b\2').gsub(/([fh])([aiueo])(')/,'p\2').gsub(/u"/,'vu') # [半]濁点゛゜
|
16
|
+
#---------------------------------------------------------
|
17
|
+
s=s.gsub(/\s(xtsu)?\s/,'xtsu') # Remove spaces before/after hanging 'っ'
|
18
|
+
#---------------------------------------------------------
|
19
|
+
sw=s;
|
20
|
+
while nil!=sw.gsub!(/(xtsu)([ckgszjtdhfbpmyrwnv])/,'\2\2') do; s=sw; end # ッカ-->xtsuka-->kka
|
21
|
+
#---------------------------------------------------------
|
22
|
+
# Compound Phoneme Pattern Rollbacks
|
23
|
+
# NB: Uses regex backrefs like "\1y\3" where \1 = 1st capture grp, y='y' and \3 = 3rd capture grp
|
24
|
+
#---------------------------------------------------------
|
25
|
+
s=s.gsub(/( +x)(.*)/,'x\2') # Avoid hanging chisaii moji due to leading spaces
|
26
|
+
s=s.gsub(/(ch)(ixy)([aueo])/,'\1\3') # チョ-->chixyo-->cho
|
27
|
+
s=s.gsub(/([kgszjtdnhfbpmr])(ixy)([auo])/,'\1y\3') # キャ-->kixya-->kya
|
28
|
+
s=s.gsub(/([kgszjtdnhfbpmr])(ix)([ie])/,'\1y\3') # キィ-->kixi-->kyi
|
29
|
+
#---------------------------------------------------------
|
30
|
+
s=s.gsub(/(sh)(y)([aueo])/,'\1\3') # シュ-->shyu-->shu
|
31
|
+
s=s.gsub(/(j)(y)([aueo])/,'\1\3') # ジュ-->jyu-->ju
|
32
|
+
#---------------------------------------------------------
|
33
|
+
s=s.gsub(/([td])(exy)([aueo])/,'\1h\3') # テャ-->texya-->tha
|
34
|
+
s=s.gsub(/([td])(ex)([ie])/,'\1\3') # ティ-->texi-->ti
|
35
|
+
s=s.gsub(/([td])(oxu)/,'\1oo') # ドゥ-->toxu-->too
|
36
|
+
s=s.gsub(/(tsu)(x)([aiueo])/,'ts\3') # ツァ-->tsuxa-->tsa
|
37
|
+
s=s.gsub(/([d])(oxy)/,'\1o\'y') # ドュ-->doxyu-->doyu
|
38
|
+
#---------------------------------------------------------
|
39
|
+
s=s.gsub(/(vux)([aieo])/ ,'v\2') # ヴァヴィヴェヴォ, ヴァ-->vuxa-->va
|
40
|
+
s=s.gsub(/(vuxy)([aueo])/ ,'vy\2') # ヴュ-->vuxyu-->vyu
|
41
|
+
s=s.gsub(/(ixe)/ ,'iye') # イェ-->ixe-->iye
|
42
|
+
s=s.gsub(/(hoxe)/ ,'howe') # ホェ-->hoxe-->howe
|
43
|
+
s=s.gsub(/(fux)([aieo])/ ,'f\2') # ファフィフェフォ, ファ-->fuxa-->fa
|
44
|
+
s=s.gsub(/(fuxy)([aueo])/,'fy\2') # フュ-->fuxyu-->fyu
|
45
|
+
s=s.gsub(/(ux)([ieo])/, 'w\2') # ウァウィウェ, ウァ-->uxa-->wa
|
46
|
+
#---------------------------------------------------------
|
47
|
+
s=s.strip.gsub(/(xtsu)$/,'h!') # Recombine hanging 'っ' followed by EOL
|
48
|
+
s=s.gsub(/([aiueo]?)(\-)/, '\1\1') # Replace boubiki chars and double preceding vowel
|
49
|
+
#---------------------------------------------------------
|
50
|
+
# Cleanup specifically for source strings that contain spaces!
|
51
|
+
s=s.gsub(/( +)([^a-z|A-z])/, '\2') # Remove spaces before any non-alphabetical char
|
52
|
+
s=s.gsub(/(n')/,'n') # ン-->nn-->n
|
53
|
+
s=s.gsub(/(nn)/,'n') # ン-->nn-->n
|
54
|
+
s=s.gsub(/( n)[^a-z|A-Z]?$/,'n') # Fix "n" appearing as separate word
|
55
|
+
s=s.gsub(/\s{2,}/, ' ') # Remove duplicate spaces!
|
56
|
+
#---------------------------------------------------------
|
57
|
+
return s
|
58
|
+
end
|
59
|
+
|
60
|
+
def hiragana
|
61
|
+
self.roma_to_kata.kata_to_hira
|
62
|
+
end
|
63
|
+
|
64
|
+
def katakana
|
65
|
+
self.hira_to_kata.roma_to_kata
|
66
|
+
end
|
67
|
+
|
68
|
+
def hira_to_kata
|
69
|
+
Moji.hira_to_kata(self)
|
70
|
+
end
|
71
|
+
|
72
|
+
def kata_to_hira
|
73
|
+
Moji.kata_to_hira(self)
|
74
|
+
end
|
75
|
+
|
76
|
+
def han_to_zen
|
77
|
+
Moji.han_to_zen(self)
|
78
|
+
end
|
79
|
+
|
80
|
+
def zen_to_han
|
81
|
+
Moji.zen_to_han(self)
|
82
|
+
end
|
83
|
+
|
84
|
+
def roma_to_kata
|
85
|
+
|
86
|
+
result=""
|
87
|
+
word_buffer=[]
|
88
|
+
chars=self.each_char.collect{|c| c}
|
89
|
+
loop do
|
90
|
+
case word_buffer.size
|
91
|
+
##### When 0 characters in the buffer
|
92
|
+
when 0 then
|
93
|
+
if chars.size > 0
|
94
|
+
word_buffer.push(chars.shift)
|
95
|
+
else
|
96
|
+
return result
|
97
|
+
end
|
98
|
+
##### Patterns with 1 roman character
|
99
|
+
when 1 then
|
100
|
+
if word_buffer[0] =~ /[aiueo-]/
|
101
|
+
result += ROM_TO_KATA1[word_buffer[0]]
|
102
|
+
word_buffer = [] # a-->ア
|
103
|
+
elsif word_buffer[0] =~ /[xkcgszjtdnhbpvfmyrlw']/
|
104
|
+
if chars.size > 0
|
105
|
+
word_buffer.push(chars.shift)
|
106
|
+
else
|
107
|
+
return result + (word_buffer[0].gsub(/n/,"ン"))
|
108
|
+
end
|
109
|
+
else
|
110
|
+
result += word_buffer.shift
|
111
|
+
end
|
112
|
+
##### Patterns with 2 roman characters
|
113
|
+
when 2 then
|
114
|
+
if ROM_TO_KATA2.key?(word_buffer.join)
|
115
|
+
result += ROM_TO_KATA2[word_buffer.join]
|
116
|
+
word_buffer = []
|
117
|
+
elsif word_buffer.join =~ /([kgszjtcdnhbpmrl]y)|([stcd]h)|ts|(x[wytk])/ # goto 3
|
118
|
+
if chars.size > 0
|
119
|
+
# Consume next letter from source array
|
120
|
+
word_buffer.push(chars.shift)
|
121
|
+
else
|
122
|
+
return result + (word_buffer.join.gsub(/n/,"ン"))
|
123
|
+
end
|
124
|
+
elsif word_buffer.join == "n'"
|
125
|
+
result += "ン"
|
126
|
+
word_buffer.shift(2) # n'--> ン
|
127
|
+
elsif word_buffer[0] == "n"
|
128
|
+
result += "ン"
|
129
|
+
word_buffer.shift # nk-->ンk
|
130
|
+
elsif word_buffer[0] == word_buffer[1]
|
131
|
+
result += "ッ"
|
132
|
+
word_buffer.shift # kk-->ッk
|
133
|
+
else
|
134
|
+
result += word_buffer.shift;
|
135
|
+
end
|
136
|
+
##### Patterns with 3 roman characters
|
137
|
+
when 3 then
|
138
|
+
if ROM_TO_KATA3.key?(word_buffer.join)
|
139
|
+
result += ROM_TO_KATA3[word_buffer.join]
|
140
|
+
word_buffer=[]
|
141
|
+
elsif word_buffer[0] == "n"
|
142
|
+
result += "ン"
|
143
|
+
word_buffer.shift
|
144
|
+
else
|
145
|
+
result += word_buffer.shift
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Mojinizer
|
3
|
+
def hiragana?
|
4
|
+
moji_type?(Moji::HIRA)
|
5
|
+
end
|
6
|
+
|
7
|
+
def katakana?
|
8
|
+
moji_type?(Moji::KATA)
|
9
|
+
end
|
10
|
+
|
11
|
+
def kana?
|
12
|
+
return (hiragana? || katakana?)
|
13
|
+
end
|
14
|
+
|
15
|
+
def kanji?
|
16
|
+
moji_type?(Moji::KANJI)
|
17
|
+
end
|
18
|
+
|
19
|
+
def hankaku?
|
20
|
+
moji_type?(Moji::HAN_KATA | Moji::HAN_JSYMBOL)
|
21
|
+
end
|
22
|
+
|
23
|
+
def zenkaku?
|
24
|
+
moji_type?(Moji::ZEN)
|
25
|
+
end
|
26
|
+
|
27
|
+
def japanese?
|
28
|
+
moji_type?(Moji::ZEN | Moji::JSYMBOL | Moji::HAN_KATA)
|
29
|
+
end
|
30
|
+
|
31
|
+
def moji_type?(type)
|
32
|
+
self.each_char { |c| return false unless Moji.type?(c, type) }
|
33
|
+
return true
|
34
|
+
end
|
35
|
+
|
36
|
+
def contains_hiragana?
|
37
|
+
contains_moji_type?(Moji::HIRA)
|
38
|
+
end
|
39
|
+
|
40
|
+
def contains_kana?
|
41
|
+
contains_moji_type?(Moji::KANA)
|
42
|
+
end
|
43
|
+
|
44
|
+
def contains_katakana?
|
45
|
+
contains_moji_type?(Moji::KATA)
|
46
|
+
end
|
47
|
+
|
48
|
+
def contains_kanji?
|
49
|
+
contains_moji_type?(Moji::KANJI)
|
50
|
+
end
|
51
|
+
|
52
|
+
def contains_hankaku?
|
53
|
+
contains_moji_type?(Moji::HAN_KATA | Moji::HAN_JSYMBOL)
|
54
|
+
end
|
55
|
+
|
56
|
+
def contains_zenkaku?
|
57
|
+
contains_moji_type?(Moji::ZEN)
|
58
|
+
end
|
59
|
+
|
60
|
+
def contains_japanese?
|
61
|
+
contains_moji_type?(Moji::ZEN | Moji::JSYMBOL | Moji::HAN_KATA)
|
62
|
+
end
|
63
|
+
|
64
|
+
def contains_moji_type?(type)
|
65
|
+
self.each_char { |c| return true if Moji.type?(c, type) }
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Mojinizer
|
3
|
+
|
4
|
+
KANA_TO_ROM={
|
5
|
+
"ア"=>"a", "イ"=>"i", "ウ"=>"u", "エ"=>"e","オ"=>"o",
|
6
|
+
"あ"=>"a", "い"=>"i", "う"=>"u", "え"=>"e","お"=>"o",
|
7
|
+
"カ"=>"ka", "キ"=>"ki", "ク"=>"ku", "ケ"=>"ke", "コ"=>"ko",
|
8
|
+
"か"=>"ka", "き"=>"ki", "く"=>"ku", "け"=>"ke", "こ"=>"ko",
|
9
|
+
"ガ"=>"ga", "ギ"=>"gi", "グ"=>"gu", "ゲ"=>"ge", "ゴ"=>"go",
|
10
|
+
"が"=>"ga", "ぎ"=>"gi", "ぐ"=>"gu", "げ"=>"ge", "ご"=>"go",
|
11
|
+
"サ"=>"sa", "シ"=>"si", "ス"=>"su", "セ"=>"se", "ソ"=>"so",
|
12
|
+
"さ"=>"sa", "し"=>"shi","す"=>"su", "せ"=>"se", "そ"=>"so",
|
13
|
+
"ザ"=>"za", "ジ"=>"ji", "ズ"=>"zu", "ゼ"=>"ze", "ゾ"=>"zo",
|
14
|
+
"ざ"=>"za", "じ"=>"ji", "ず"=>"zu", "ぜ"=>"ze", "ぞ"=>"zo",
|
15
|
+
"タ"=>"ta", "チ"=>"chi","ツ"=>"tsu","テ"=>"te", "ト"=>"to",
|
16
|
+
"た"=>"ta", "ち"=>"chi","つ"=>"tsu","て"=>"te", "と"=>"to",
|
17
|
+
"ダ"=>"da", "ヂ"=>"dji","ヅ"=>"dzu","デ"=>"de", "ド"=>"do",
|
18
|
+
"だ"=>"da", "ぢ"=>"dji","づ"=>"dzu","で"=>"de", "ど"=>"do",
|
19
|
+
"ナ"=>"na", "ニ"=>"ni", "ヌ"=>"nu", "ネ"=>"ne", "ノ"=>"no",
|
20
|
+
"な"=>"na", "に"=>"ni", "ぬ"=>"nu", "ね"=>"ne", "の"=>"no",
|
21
|
+
"ハ"=>"ha", "ヒ"=>"hi", "フ"=>"fu", "ヘ"=>"he", "ホ"=>"ho",
|
22
|
+
"は"=>"ha", "ひ"=>"hi", "ふ"=>"fu", "へ"=>"he", "ほ"=>"ho",
|
23
|
+
"バ"=>"ba", "ビ"=>"bi", "ブ"=>"bu", "ベ"=>"be", "ボ"=>"bo",
|
24
|
+
"ば"=>"ba", "び"=>"bi", "ぶ"=>"bu", "べ"=>"be", "ぼ"=>"bo",
|
25
|
+
"パ"=>"pa", "ピ"=>"pi", "プ"=>"pu", "ペ"=>"pe", "ポ"=>"po",
|
26
|
+
"ぱ"=>"pa", "ぴ"=>"pi", "ぷ"=>"pu", "ぺ"=>"pe", "ぽ"=>"po",
|
27
|
+
"マ"=>"ma", "ミ"=>"mi", "ム"=>"mu", "メ"=>"me", "モ"=>"mo",
|
28
|
+
"ま"=>"ma", "み"=>"mi", "む"=>"mu", "め"=>"me", "も"=>"mo",
|
29
|
+
"ヤ"=>"ya", "ユ"=>"yu", "ヨ"=>"yo",
|
30
|
+
"や"=>"ya", "ゆ"=>"yu", "よ"=>"yo",
|
31
|
+
"ラ"=>"ra", "リ"=>"ri", "ル"=>"ru","レ"=>"re","ロ"=>"ro",
|
32
|
+
"ら"=>"ra", "り"=>"ri", "る"=>"ru","れ"=>"re","ろ"=>"ro",
|
33
|
+
"ワ"=>"wa", "ヰ"=>"wi", "ヱ"=>"we", "ヲ"=>"wo", "ン"=>"nn",
|
34
|
+
"わ"=>"wa", "ゐ"=>"wi", "ゑ"=>"we", "を"=>"wo", "ん"=>"nn",
|
35
|
+
"ァ"=>"xa", "ィ"=>"xi", "ゥ"=>"xu", "ェ"=>"xe", "ォ"=>"xo",
|
36
|
+
"ぁ"=>"xa", "ぃ"=>"xi", "ぅ"=>"xu", "ぇ"=>"xe", "ぉ"=>"xo",
|
37
|
+
"ッ"=>"xtsu","ャ"=>"xya", "ュ"=>"xyu", "ョ"=>"xyo",
|
38
|
+
"っ"=>"xtsu","ゃ"=>"xya", "ゅ"=>"xyu", "ょ"=>"xyo",
|
39
|
+
"ヴ"=>"vu", "ヵ"=>"xka","ヶ"=>"ga","ヮ"=>"xwa",
|
40
|
+
"ゎ"=>"xwa",
|
41
|
+
"ー"=>"-", "−"=>"-", "゛"=>'"', "゜"=>"'", "、"=>",", "。"=>".",
|
42
|
+
":"=>":", " " => " ", "@" => "@", "(" => "(", ")" => ")",
|
43
|
+
" " => " "
|
44
|
+
}
|
45
|
+
|
46
|
+
KANA_TO_ROM2={
|
47
|
+
"てぃ" => "ti", "でぃ" => "di"
|
48
|
+
}
|
49
|
+
# 1 character romaji patterns
|
50
|
+
ROM_TO_KATA1={
|
51
|
+
"a"=>"ア", "i"=>"イ", "u"=>"ウ", "e"=>"エ", "o"=>"オ", "-"=>"ー"
|
52
|
+
}
|
53
|
+
|
54
|
+
# 2 character romaji patterns
|
55
|
+
ROM_TO_KATA2={
|
56
|
+
"xa"=>"ァ", "xi"=>"ィ", "xu"=>"ゥ", "xe"=>"ェ", "xo"=>"ォ",
|
57
|
+
"ka"=>"カ", "ki"=>"キ", "ku"=>"ク", "ke"=>"ケ", "ko"=>"コ",
|
58
|
+
"ca"=>"カ", "cu"=>"ク", "co"=>"コ",
|
59
|
+
"ga"=>"ガ", "gi"=>"ギ", "gu"=>"グ", "ge"=>"ゲ", "go"=>"ゴ",
|
60
|
+
"sa"=>"サ", "si"=>"シ", "su"=>"ス", "se"=>"セ", "so"=>"ソ",
|
61
|
+
"za"=>"ザ", "zi"=>"ジ", "zu"=>"ズ", "ze"=>"ゼ", "zo"=>"ゾ",
|
62
|
+
"ja"=>"ジャ","ji"=>"ジ", "ju"=>"ジュ","je"=>"ジェ","jo"=>"ジョ",
|
63
|
+
"ta"=>"タ", "ti"=>"チ", "tsu"=>"ツ", "te"=>"テ", "to"=>"ト",
|
64
|
+
"da"=>"ダ", "di"=>"ヂ", "du"=>"ヅ", "de"=>"デ", "do"=>"ド",
|
65
|
+
"na"=>"ナ", "ni"=>"ニ", "nu"=>"ヌ", "ne"=>"ネ", "no"=>"ノ",
|
66
|
+
"ha"=>"ハ", "hi"=>"ヒ", "hu"=>"フ", "he"=>"ヘ", "ho"=>"ホ",
|
67
|
+
"ba"=>"バ", "bi"=>"ビ", "bu"=>"ブ", "be"=>"ベ", "bo"=>"ボ",
|
68
|
+
"pa"=>"パ", "pi"=>"ピ", "pu"=>"プ", "pe"=>"ペ", "po"=>"ポ",
|
69
|
+
"va"=>"ヴァ","vi"=>"ヴィ","vu"=>"ヴ", "ve"=>"ヴェ","vo"=>"ヴォ",
|
70
|
+
"fa"=>"ファ","fi"=>"フィ","fu"=>"フ", "fe"=>"フェ","fo"=>"フォ",
|
71
|
+
"ma"=>"マ", "mi"=>"ミ", "mu"=>"ム", "me"=>"メ", "mo"=>"モ",
|
72
|
+
"ya"=>"ヤ", "yi"=>"イ", "yu"=>"ユ", "ye"=>"イェ", "yo"=>"ヨ",
|
73
|
+
"ra"=>"ラ", "ri"=>"リ", "ru"=>"ル", "re"=>"レ", "ro"=>"ロ",
|
74
|
+
"la"=>"ラ", "li"=>"リ", "lu"=>"ル", "le"=>"レ", "lo"=>"ロ",
|
75
|
+
"wa"=>"ワ", "wi"=>"ヰ", "wu"=>"ウ", "we"=>"ヱ", "wo"=>"ヲ",
|
76
|
+
"nn"=>"ン"
|
77
|
+
}
|
78
|
+
|
79
|
+
# 3 character romaji patterns
|
80
|
+
ROM_TO_KATA3={
|
81
|
+
"tsu"=>"ツ",
|
82
|
+
"xka"=>"ヵ", "xke"=>"ヶ",
|
83
|
+
"xwa"=>"ヮ", "xtsu"=>"ッ", "xya"=>"ャ", "xyu"=>"ュ", "xyo"=>"ョ",
|
84
|
+
"kya"=>"キャ", "kyi"=>"キィ", "kyu"=>"キュ", "kye"=>"キェ", "kyo"=>"キョ",
|
85
|
+
"gya"=>"ギャ", "gyi"=>"ギィ", "gyu"=>"ギュ", "gye"=>"ギェ", "gyo"=>"ギョ",
|
86
|
+
"sya"=>"シャ", "syi"=>"シィ", "syu"=>"シュ", "sye"=>"シェ", "syo"=>"ショ",
|
87
|
+
"sha"=>"シャ", "shi"=>"シ", "shu"=>"シュ", "she"=>"シェ", "sho"=>"ショ",
|
88
|
+
"zya"=>"ジャ", "zyi"=>"ジィ", "zyu"=>"ジュ", "zye"=>"ジェ", "zyo"=>"ジョ",
|
89
|
+
"jya"=>"ジャ", "jyi"=>"ジィ", "jyu"=>"ジュ", "jye"=>"ジェ", "jyo"=>"ジョ",
|
90
|
+
"tya"=>"チャ", "tyi"=>"チィ", "tyu"=>"チュ", "tye"=>"チェ", "tyo"=>"チョ",
|
91
|
+
"cya"=>"チャ", "cyi"=>"チィ", "cyu"=>"チュ", "cye"=>"チェ", "cyo"=>"チョ",
|
92
|
+
"cha"=>"チャ", "chi"=>"チ", "chu"=>"チュ", "che"=>"チェ", "cho"=>"チョ",
|
93
|
+
"tha"=>"テャ", "thi"=>"ティ", "thu"=>"テュ", "the"=>"テェ", "tho"=>"テョ",
|
94
|
+
"dya"=>"ヂャ", "dyi"=>"ヂィ", "dyu"=>"ヂュ", "dye"=>"ヂェ", "dyo"=>"ヂョ",
|
95
|
+
"dha"=>"デャ", "dhi"=>"ディ", "dhu"=>"デュ", "dhe"=>"デェ", "dho"=>"デョ",
|
96
|
+
"nya"=>"ニャ", "nyi"=>"ニィ", "nyu"=>"ニュ", "nye"=>"ニェ", "nyo"=>"ニョ",
|
97
|
+
"hya"=>"ヒャ", "hyi"=>"ヒィ", "hyu"=>"ヒュ", "hye"=>"ヒェ", "hyo"=>"ヒョ",
|
98
|
+
"bya"=>"ビャ", "byi"=>"ビィ", "byu"=>"ビュ", "bye"=>"ビェ", "byo"=>"ビョ",
|
99
|
+
"pya"=>"ピャ", "pyi"=>"ピィ", "pyu"=>"ピュ", "pye"=>"ピェ", "pyo"=>"ピョ",
|
100
|
+
"mya"=>"ミャ", "myi"=>"ミィ", "myu"=>"ミュ", "mye"=>"ミェ", "myo"=>"ミョ",
|
101
|
+
"rya"=>"リャ", "ryi"=>"リィ", "ryu"=>"リュ", "rye"=>"リェ", "ryo"=>"リョ",
|
102
|
+
"lya"=>"リャ", "lyi"=>"リィ", "lyu"=>"リュ", "lye"=>"リェ", "lyo"=>"リョ"
|
103
|
+
}
|
104
|
+
|
105
|
+
end
|
data/lib/mojinizer.rb
ADDED
data/mojinizer.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'mojinizer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "mojinizer"
|
8
|
+
spec.version = Mojinizer::VERSION
|
9
|
+
spec.authors = ["Ikayzo"]
|
10
|
+
spec.email = ["ckobayashi@ikayzo.com"]
|
11
|
+
spec.description = %q{Combines the functionality of the Moji and Romajinizer gems. And adds Japanese kana detection and conversion methods to the String class.}
|
12
|
+
spec.summary = %q{A gem for converting between hiragana, katakana, and romaji. This gem does not convert kanji characters.}
|
13
|
+
spec.homepage = "https://github.com/ikayzo/mojinizer"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_runtime_dependency "moji", "~> 1.6"
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
23
|
+
spec.add_development_dependency "rake", "~> 10.0.4"
|
24
|
+
spec.add_development_dependency "rspec", "~> 2.13.0"
|
25
|
+
spec.add_development_dependency "simplecov", "~> 0.7.1"
|
26
|
+
end
|
@@ -0,0 +1,218 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Mojinizer do
|
5
|
+
context "should convert" do
|
6
|
+
it "romaji or katakana to hiragana properly" do
|
7
|
+
"tsukue".hiragana.should == "つくえ"
|
8
|
+
"kinnyoubi".hiragana.should == "きんようび"
|
9
|
+
"kin'youbi".hiragana.should == "きんようび"
|
10
|
+
"konnya".hiragana.should == "こんや"
|
11
|
+
"konnnichi".hiragana.should == "こんにち"
|
12
|
+
"kaetta".hiragana.should == "かえった"
|
13
|
+
"ツクエ".hiragana.should == "つくえ"
|
14
|
+
"こんばn".hiragana.should == "こんばん"
|
15
|
+
"konnbann".hiragana.should == "こんばん"
|
16
|
+
"".hiragana.should == ""
|
17
|
+
end
|
18
|
+
|
19
|
+
it "romaji or hiragana to katakana properly" do
|
20
|
+
"tsukue".katakana.should == "ツクエ"
|
21
|
+
"kinnyoubi".katakana.should == "キンヨウビ"
|
22
|
+
"kin'youbi".katakana.should == "キンヨウビ"
|
23
|
+
"konnya".katakana.should == "コンヤ"
|
24
|
+
"konnnichi".katakana.should == "コンニチ"
|
25
|
+
"kaetta".katakana.should == "カエッタ"
|
26
|
+
"つくえ".katakana.should == "ツクエ"
|
27
|
+
"行きます".katakana.should == "行キマス"
|
28
|
+
"こんばn".katakana.should == "コンバン"
|
29
|
+
"konnbann".katakana.should == "コンバン"
|
30
|
+
"aloha元気?".katakana.should == "アロハ元気?"
|
31
|
+
"アロハ、げんき?".katakana.should == "アロハ、ゲンキ?"
|
32
|
+
"aloha まはろ".katakana.should == "アロハ マハロ"
|
33
|
+
"".katakana.should == ""
|
34
|
+
end
|
35
|
+
|
36
|
+
it "kana to romaji properly" do
|
37
|
+
"つくえ".romaji.should == "tsukue"
|
38
|
+
"きんようび".romaji.should == "kinyoubi"
|
39
|
+
"こんや".romaji.should == "konya"
|
40
|
+
"こんにち".romaji.should == "konnichi"
|
41
|
+
"ツクエ".romaji.should == "tsukue"
|
42
|
+
"キンヨウビ".romaji.should == "kinyoubi"
|
43
|
+
"コンヤ".romaji.should == "konya"
|
44
|
+
"コンニチ".romaji.should == "konnichi"
|
45
|
+
"today is きんようび".romaji.should == "today is kinyoubi"
|
46
|
+
"today is キンヨウビ".romaji.should == "today is kinyoubi"
|
47
|
+
"".romaji.should == ""
|
48
|
+
end
|
49
|
+
|
50
|
+
it "romaji or kana to hankaku properly" do
|
51
|
+
"あろは".zen_to_han.should == "あろは"
|
52
|
+
"アロハ!".zen_to_han.should == "アロハ!"
|
53
|
+
"aloha!".zen_to_han.should == "aloha!"
|
54
|
+
"ALOHA!".zen_to_han.should == "ALOHA!"
|
55
|
+
"".zen_to_han.should == ""
|
56
|
+
end
|
57
|
+
|
58
|
+
it "romaji or kana to zenkaku properly" do
|
59
|
+
"アロハ!".han_to_zen.should == "アロハ!"
|
60
|
+
"あろは!".han_to_zen.should == "あろは!"
|
61
|
+
"aloha!".han_to_zen.should == "aloha!"
|
62
|
+
"ALOHA!".han_to_zen.should == "ALOHA!"
|
63
|
+
"".han_to_zen.should == ""
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
context "should be able to tell if a string contains" do
|
68
|
+
it "kana" do
|
69
|
+
"行きます".contains_kana?.should == true
|
70
|
+
"abcdefg!".contains_kana?.should == false
|
71
|
+
"アロハeverybody".contains_kana?.should == true
|
72
|
+
"あろはeverybody".contains_kana?.should == true
|
73
|
+
"アロハeverybody".contains_kana?.should == true
|
74
|
+
end
|
75
|
+
|
76
|
+
it "hiragana" do
|
77
|
+
"行きます".contains_hiragana?.should == true
|
78
|
+
"abcdefg!".contains_hiragana?.should == false
|
79
|
+
"アロハeverybody".contains_hiragana?.should == false
|
80
|
+
"あろはeverybody".contains_hiragana?.should == true
|
81
|
+
"アロハeverybody".contains_hiragana?.should == false
|
82
|
+
end
|
83
|
+
|
84
|
+
it "katakana" do
|
85
|
+
"行きます".contains_katakana?.should == false
|
86
|
+
"abcdefg!".contains_katakana?.should == false
|
87
|
+
"アロハeverybody".contains_katakana?.should == true
|
88
|
+
"アロハeverybody".contains_katakana?.should == true
|
89
|
+
end
|
90
|
+
|
91
|
+
it "kanji" do
|
92
|
+
"行きます".contains_kanji?.should == true
|
93
|
+
"abcdefg!".contains_kanji?.should == false
|
94
|
+
"アロハeverybody".contains_kanji?.should == false
|
95
|
+
"アロハeverybody".contains_kanji?.should == false
|
96
|
+
end
|
97
|
+
|
98
|
+
it "hankaku" do
|
99
|
+
"あ".contains_hankaku?.should == false
|
100
|
+
"ア".contains_hankaku?.should == false
|
101
|
+
"a".contains_hankaku?.should == false
|
102
|
+
"A".contains_hankaku?.should == false
|
103
|
+
"ALOHA".contains_hankaku?.should == false
|
104
|
+
"アろは".contains_hankaku?.should == false
|
105
|
+
"aloha".contains_hankaku?.should == false
|
106
|
+
"aloは".contains_hankaku?.should == false
|
107
|
+
"ア".contains_hankaku?.should == true
|
108
|
+
"アロハ".contains_hankaku?.should == true
|
109
|
+
"アロハeverybody".contains_hankaku?.should == true
|
110
|
+
end
|
111
|
+
|
112
|
+
it "zenkaku" do
|
113
|
+
"あ".contains_zenkaku?.should == true
|
114
|
+
"ア".contains_zenkaku?.should == true
|
115
|
+
"a".contains_zenkaku?.should == false
|
116
|
+
"A".contains_zenkaku?.should == true
|
117
|
+
"ALOHA".contains_zenkaku?.should == true
|
118
|
+
"アろは".contains_zenkaku?.should == true
|
119
|
+
"aloha".contains_zenkaku?.should == false
|
120
|
+
"aloは".contains_zenkaku?.should == true
|
121
|
+
"ア".contains_zenkaku?.should == false
|
122
|
+
"アロハ".contains_zenkaku?.should == false
|
123
|
+
"アロハeverybody".contains_zenkaku?.should == false
|
124
|
+
end
|
125
|
+
|
126
|
+
it "Japanese characters" do
|
127
|
+
"行きます".contains_japanese?.should == true
|
128
|
+
"abcdefg!".contains_japanese?.should == false
|
129
|
+
"アロハeverybody".contains_japanese?.should == true
|
130
|
+
"everybody、行きます".contains_japanese?.should == true
|
131
|
+
"aloha〜!".contains_japanese?.should == true
|
132
|
+
"アロハeverybody".contains_japanese?.should == true
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
context "should be able to tell if a character or the entire string is" do
|
137
|
+
it "kana" do
|
138
|
+
"す".kana?.should == true
|
139
|
+
"すし".kana?.should == true
|
140
|
+
"アロハ".kana?.should == true
|
141
|
+
"行".kana?.should == false
|
142
|
+
"sushi".kana?.should == false
|
143
|
+
"アロハeverybody".kana?.should == false
|
144
|
+
end
|
145
|
+
|
146
|
+
it "hiragana" do
|
147
|
+
"あ".hiragana?.should == true
|
148
|
+
"ア".hiragana?.should == false
|
149
|
+
"a".hiragana?.should == false
|
150
|
+
"あろは".hiragana?.should == true
|
151
|
+
"あロは".hiragana?.should == false
|
152
|
+
end
|
153
|
+
|
154
|
+
it "katakana" do
|
155
|
+
"あ".katakana?.should == false
|
156
|
+
"ア".katakana?.should == true
|
157
|
+
"a".katakana?.should == false
|
158
|
+
"アろは".katakana?.should == false
|
159
|
+
"アロハ".katakana?.should == true
|
160
|
+
"アロは".katakana?.should == false
|
161
|
+
end
|
162
|
+
|
163
|
+
it "kanji" do
|
164
|
+
"行".kanji?.should == true
|
165
|
+
"あ".kanji?.should == false
|
166
|
+
"ア".kanji?.should == false
|
167
|
+
"〜".kanji?.should == false
|
168
|
+
"a".kanji?.should == false
|
169
|
+
"アロハ".kanji?.should == false
|
170
|
+
"ALOHA".kanji?.should == false
|
171
|
+
"金曜日".kanji?.should == true
|
172
|
+
"金曜日だ〜".kanji?.should == false
|
173
|
+
"金曜日FRIDAY".kanji?.should == false
|
174
|
+
end
|
175
|
+
|
176
|
+
it "hankaku" do
|
177
|
+
"あ".hankaku?.should == false
|
178
|
+
"ア".hankaku?.should == false
|
179
|
+
"a".hankaku?.should == false
|
180
|
+
"A".hankaku?.should == false
|
181
|
+
"ALOHA".hankaku?.should == false
|
182
|
+
"アろは".hankaku?.should == false
|
183
|
+
"aloha".hankaku?.should == false
|
184
|
+
"aloは".hankaku?.should == false
|
185
|
+
"ア".hankaku?.should == true
|
186
|
+
"アロハ".hankaku?.should == true
|
187
|
+
"。".hankaku?.should == true
|
188
|
+
end
|
189
|
+
|
190
|
+
it "zenkaku" do
|
191
|
+
"あ".zenkaku?.should == true
|
192
|
+
"ア".zenkaku?.should == true
|
193
|
+
"a".zenkaku?.should == false
|
194
|
+
"A".zenkaku?.should == true
|
195
|
+
"ALOHA".zenkaku?.should == true
|
196
|
+
"アろは".zenkaku?.should == true
|
197
|
+
"アロハ".zenkaku?.should == true
|
198
|
+
"aloha".zenkaku?.should == false
|
199
|
+
"aloは".zenkaku?.should == false
|
200
|
+
"ア".zenkaku?.should == false
|
201
|
+
"アロハ".zenkaku?.should == false
|
202
|
+
end
|
203
|
+
|
204
|
+
it "Japanese characters" do
|
205
|
+
"あ".japanese?.should == true
|
206
|
+
"ア".japanese?.should == true
|
207
|
+
"a".japanese?.should == false
|
208
|
+
"アろは".japanese?.should == true
|
209
|
+
"アロハ".japanese?.should == true
|
210
|
+
"aloha".japanese?.should == false
|
211
|
+
"aloは".japanese?.should == false
|
212
|
+
"ア".japanese?.should == true
|
213
|
+
"アロハ".japanese?.should == true
|
214
|
+
"アロハeverybody".japanese?.should == false
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
|
8
|
+
require 'simplecov'
|
9
|
+
SimpleCov.start
|
10
|
+
|
11
|
+
require 'rubygems'
|
12
|
+
require 'bundler/setup'
|
13
|
+
|
14
|
+
require 'mojinizer'
|
15
|
+
|
16
|
+
RSpec.configure do |config|
|
17
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
18
|
+
config.run_all_when_everything_filtered = true
|
19
|
+
config.filter_run :focus
|
20
|
+
|
21
|
+
# Run specs in random order to surface order dependencies. If you find an
|
22
|
+
# order dependency and want to debug it, you can fix the order by providing
|
23
|
+
# the seed, which is printed after each run.
|
24
|
+
# --seed 1234
|
25
|
+
config.order = 'random'
|
26
|
+
end
|
metadata
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mojinizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ikayzo
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: moji
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.3'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.3'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 10.0.4
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 10.0.4
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.13.0
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.13.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: simplecov
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.7.1
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.7.1
|
83
|
+
description: Combines the functionality of the Moji and Romajinizer gems. And adds
|
84
|
+
Japanese kana detection and conversion methods to the String class.
|
85
|
+
email:
|
86
|
+
- ckobayashi@ikayzo.com
|
87
|
+
executables: []
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- .gitignore
|
92
|
+
- .rspec
|
93
|
+
- Gemfile
|
94
|
+
- LICENSE.txt
|
95
|
+
- README.md
|
96
|
+
- Rakefile
|
97
|
+
- lib/mojinizer.rb
|
98
|
+
- lib/mojinizer/conversion.rb
|
99
|
+
- lib/mojinizer/detection.rb
|
100
|
+
- lib/mojinizer/romaji_tables.rb
|
101
|
+
- lib/mojinizer/version.rb
|
102
|
+
- mojinizer.gemspec
|
103
|
+
- spec/mojinizer_spec.rb
|
104
|
+
- spec/spec_helper.rb
|
105
|
+
homepage: https://github.com/ikayzo/mojinizer
|
106
|
+
licenses:
|
107
|
+
- MIT
|
108
|
+
metadata: {}
|
109
|
+
post_install_message:
|
110
|
+
rdoc_options: []
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - '>='
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
requirements: []
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 2.0.3
|
126
|
+
signing_key:
|
127
|
+
specification_version: 4
|
128
|
+
summary: A gem for converting between hiragana, katakana, and romaji. This gem does
|
129
|
+
not convert kanji characters.
|
130
|
+
test_files:
|
131
|
+
- spec/mojinizer_spec.rb
|
132
|
+
- spec/spec_helper.rb
|