nihonjin 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/README.md +94 -0
- data/Rakefile +6 -0
- data/bin/console +7 -0
- data/bin/setup +6 -0
- data/lib/nihonjin.rb +4 -0
- data/lib/nihonjin/moji.rb +371 -0
- data/lib/nihonjin/options.rb +10 -0
- data/lib/nihonjin/suji.rb +253 -0
- data/lib/nihonjin/version.rb +3 -0
- data/nihonjin.gemspec +32 -0
- metadata +102 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4d3c0b5e0e4fdf2e779166d6d9b7b2832f34278c
|
4
|
+
data.tar.gz: 753f1727a235b355b18671c3a02be23e3fb96385
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4b9b4e2da01af0af7ea473a09a10618edbe6c2519c9f084beb4f5ff6ab3efd909668263ce4a4751213b87f92db053237e12b92b0a69dbfab4701e9ceeb5b5c0a
|
7
|
+
data.tar.gz: feef1600e228ac646106d80db82b0e890245f56cfd5a293f83a54fdd5ee2fe5e628acc413c765fd2b0ccd3b7c44a15b4d81c053efc1134a32def0dc6a43dc1ff
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.2.2
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
#nihonjin 日本人 :sushi:
|
2
|
+
|
3
|
+
日本語をもっと気楽に扱うためのgemです:octocat:
|
4
|
+
|
5
|
+
※まだ公開していません
|
6
|
+
|
7
|
+
## Moji 文字
|
8
|
+
|
9
|
+
### いちいちnkfのオプションを調べるのは面倒くさいから、`Moji`で簡単に定義することができます
|
10
|
+
```ruby
|
11
|
+
moji = Nihonjin::Moji.new
|
12
|
+
moji.hiragana("hiragana ni naru.")
|
13
|
+
#=> "ひらがな に なる。"
|
14
|
+
moji.hiragana("ヒラガナ ニ ナル。")
|
15
|
+
#=> "ひらがな に なる。"
|
16
|
+
moji.katakana("かたかな に なる")
|
17
|
+
#=> "カタカナ ニ ナル"
|
18
|
+
moji.hankaku_katakana("hankaku katakana ni naru")
|
19
|
+
#=> "ハンカク カタカナ ニ ナル"
|
20
|
+
|
21
|
+
# Rubyではstripはありますが、日本語の文字に対応していないです
|
22
|
+
# 次のメソッドを使うと便利です
|
23
|
+
moji.hashigiri(" 端 に ある 空白 を 切り落とす ")
|
24
|
+
#=> "端 に ある 空白 を 切り落とす"
|
25
|
+
|
26
|
+
# すべての空白を切り落とすこともできる
|
27
|
+
moji.kiru("余計な 空白 を 切り落とす")
|
28
|
+
#=> "余計な空白を切り落とす"
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
# 出力コードもシンボルとして定義できます
|
33
|
+
moji.hiragana("hiragana ni naru.", :shift_jis)
|
34
|
+
#=> "\x{82D0}\x{82E7}\x{82AA}\x{82C8}\x{8140}\x{82C9}\x{8140}\x{82C8}\x{82E9}\x{8142}"
|
35
|
+
Nihonjin::EncodingTypes
|
36
|
+
#=> {:utf_8=>"-w", :shift_jis=>"-s", :iso_2022_jp=>"-j", :euc=>"-e"}
|
37
|
+
|
38
|
+
# リテラルも渡すことができます
|
39
|
+
moji.hiragana("hiragana ni naru.", "-w", "--mac")
|
40
|
+
#=> "ひらがな に なる。"
|
41
|
+
moji.hiragana("hiragana ni naru.", "-w --mac")
|
42
|
+
#=> "ひらがな に なる。"
|
43
|
+
```
|
44
|
+
|
45
|
+
|
46
|
+
##Suji 数字
|
47
|
+
今はSujiが少しだけできてる<br/>
|
48
|
+
次のメソッドでは、どんな値でも入れていい<br/><br/>
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
suji = Nihonjin::Suji.new
|
52
|
+
# 今のところは、kanji_henkanは20桁まで変換できます
|
53
|
+
p suji.kanji_henkan(150)
|
54
|
+
#=> "百五十"
|
55
|
+
p suji.kanji_henkan(3521)
|
56
|
+
#=> "三千五百二十一"
|
57
|
+
p suji.kanji_henkan(27825672)
|
58
|
+
#=> "二千七百八十二万五千六百七十二"
|
59
|
+
p suji.kanji_henkan(623_367_289_348)
|
60
|
+
#=> "六千二百三十三億六千七百二十八万九千三百四十八"
|
61
|
+
p suji.kanji_henkan(56_004_223_746_273_373_565)
|
62
|
+
#=> "五千六百京四千二百二十三兆七千四百六十二億七千三百三十七万三千五百六十五"
|
63
|
+
|
64
|
+
|
65
|
+
p suji.zenkaku(300)
|
66
|
+
#=> "300"
|
67
|
+
p suji.hankaku("三〇〇")
|
68
|
+
#=> "300"
|
69
|
+
|
70
|
+
# kanji_henkanと違って、普通に数字をそのまま漢字に変換する
|
71
|
+
p suji.kanji(800)
|
72
|
+
#=> "八〇〇"
|
73
|
+
p suji.daiji("三")
|
74
|
+
#=> "参"
|
75
|
+
|
76
|
+
p Suji.type?(10)
|
77
|
+
#=> "半角"
|
78
|
+
p Suji.type?("10")
|
79
|
+
#=> "全角"
|
80
|
+
p Suji.type?("十")
|
81
|
+
#=> "漢字"
|
82
|
+
p Suji.type?("壱")
|
83
|
+
#=> "大字"
|
84
|
+
|
85
|
+
|
86
|
+
# to_iも使えます
|
87
|
+
# 尚、上にあるkanji_henkan()が返すような値ではto_iは使えません
|
88
|
+
p suji.to_i("三〇一")
|
89
|
+
#=> 301
|
90
|
+
p suji.to_i("301")
|
91
|
+
#=> 301
|
92
|
+
p suji.to_i("参零壱")
|
93
|
+
#=> 301
|
94
|
+
```
|
data/Rakefile
ADDED
data/bin/console
ADDED
data/bin/setup
ADDED
data/lib/nihonjin.rb
ADDED
@@ -0,0 +1,371 @@
|
|
1
|
+
require 'nkf'
|
2
|
+
require 'nihonjin/options'
|
3
|
+
|
4
|
+
module Nihonjin
|
5
|
+
|
6
|
+
class Moji
|
7
|
+
|
8
|
+
Hiragana = {
|
9
|
+
kya: "きゃ", kyu: "きゅ", kyo: "きょ",
|
10
|
+
sha: "しゃ", shu: "しゅ", she: "しぇ", sho: "しょ",
|
11
|
+
cha: "ちゃ", chu: "ちゅ", cho: "ちょ",
|
12
|
+
dya: "ぢゃ", dyu: "ぢゅ", dyo: "ぢょ",
|
13
|
+
nya: "にゃ", nyu: "にゅ", nyo: "にょ",
|
14
|
+
hya: "ひゃ", hyu: "ひゅ", hyo: "ひょ",
|
15
|
+
mya: "みゃ", myu: "みゅ", myo: "みょ",
|
16
|
+
rya: "りゃ", ryu: "りゅ", ryo: "りょ",
|
17
|
+
|
18
|
+
gya: "ぎゃ", gyu: "ぎゅ", gyo: "ぎょ",
|
19
|
+
ja: "じゃ", ju: "じゅ", je: "じぇ", jo: "じょ",
|
20
|
+
jya: "じゃ", jyu: "じゅ", jyo: "じょ",
|
21
|
+
bya: "びゃ", byu: "びゅ", byo: "びょ",
|
22
|
+
|
23
|
+
pya: "ぴゃ", pyu: "ピュ", pyo: "ぴょ",
|
24
|
+
|
25
|
+
tsu: "つ",
|
26
|
+
tu: "つ",
|
27
|
+
|
28
|
+
ka: "か", ki: "き", ku: "く", ke: "け", ko: "こ",
|
29
|
+
sa: "さ", shi: "し", si: "し", su: "す", se: "せ", so: "そ",
|
30
|
+
ta: "た", chi: "ち", te: "て", to: "と",
|
31
|
+
na: "な", ni: "に", nu: "ぬ", ne: "ね", no: "の",
|
32
|
+
ha: "は", hi: "ひ", fu: "ふ", he: "へ", ho: "ほ",
|
33
|
+
ma: "ま", mi: "み", mu: "む", me: "め", mo: "も",
|
34
|
+
ya: "や", yu: "ゆ", yo: "よ",
|
35
|
+
ra: "ら", ri: "り", ru: "る", re: "れ", ro: "ろ",
|
36
|
+
wa: "わ", wi: "ゐ", we: "ゑ", wo: "を",
|
37
|
+
n: "ん", n_: "ん", # 「n_」というのは、「はんい」みたいな言葉を書くためです。Issue #16を見てください
|
38
|
+
|
39
|
+
va: "ゔぁ", vi: "ゔぃ", vu: "ゔ", ve: "ゔぇ", vo: "ゔぉ",
|
40
|
+
ga: "が", gi: "ぎ", gu: "ぐ", ge: "げ", go: "ご",
|
41
|
+
za: "ざ", ji: "じ", zu: "ず", ze: "ぜ", zo: "ぞ",
|
42
|
+
da: "だ", di: "ぢ", du: "づ", de: "で", do: "ど",
|
43
|
+
dzu: "づ",
|
44
|
+
ba: "ば", bi: "び", bu: "ぶ", be: "べ", bo: "ぼ",
|
45
|
+
pa: "ぱ", pi: "ぴ", pu: "ぷ", pe: "ぺ", po: "ぽ",
|
46
|
+
|
47
|
+
fa: "ふぁ", fi: "ふぃ", fe: "ふぇ", fo: "ふぉ",
|
48
|
+
di_: "でぃ",
|
49
|
+
a: "あ", i: "い", u: "う", e: "え", o: "お",
|
50
|
+
wu: "う"
|
51
|
+
}
|
52
|
+
|
53
|
+
Small_hiragana = {
|
54
|
+
ya: "ゃ", yu: "ゅ", yo: "ょ",
|
55
|
+
a: "ぁ", i: "ぃ", u: "ぅ", e: "ぇ", o: "ぉ",
|
56
|
+
tsu: "っ"
|
57
|
+
}
|
58
|
+
|
59
|
+
Consonants = ["bb", "cc", "dd", "ff", "gg", "hh", "jj", "kk", "ll", "pp", "qq", "rr", "ss", "tt", "vv", "ww", "yy", "zz"]
|
60
|
+
|
61
|
+
# これはちょっと見にくいから直せばいい
|
62
|
+
# ところで[0]の方は英字で[1]の方は日本語
|
63
|
+
Symbols = [[".", "。"], ["!", "!"], ["?", "?"], [",", "、"], ["~", "〜"]]
|
64
|
+
|
65
|
+
# 対象の文字列をnkfで、ひらがなに変換します。#nkf_passと「たのしいRuby」299ページを参照してください
|
66
|
+
def hiragana(str, *options)
|
67
|
+
|
68
|
+
need_to_change_encoding = check_encoding(options)
|
69
|
+
options = setup options
|
70
|
+
str_data = utf_8_pass(str)
|
71
|
+
str = str_data[:string]
|
72
|
+
str = str.downcase
|
73
|
+
|
74
|
+
# "matte"みたいな文字列を「まって」に変換します
|
75
|
+
Consonants.each do |c|
|
76
|
+
if str.match(c)
|
77
|
+
str = str.gsub(c, ("っ" + c[0]))
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# 対象の文字列にはローマ字がある場合ひらがなに変換します
|
82
|
+
Hiragana.each do |key, value|
|
83
|
+
re = Regexp.new(key.to_s)
|
84
|
+
if str.match(re)
|
85
|
+
str = str.gsub(re, Hiragana[key])
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# 「x」を文字の前に入れることで、小さいひらがなを定義することができます
|
90
|
+
if str =~ /x/
|
91
|
+
i = 0
|
92
|
+
str_ary = str.split("")
|
93
|
+
str_ary.each do |s|
|
94
|
+
if s =~ /x/
|
95
|
+
str[i + 1] = Small_hiragana[Hiragana.key(str[i + 1])]
|
96
|
+
end
|
97
|
+
i += 1
|
98
|
+
end
|
99
|
+
str = str.gsub("x", "")
|
100
|
+
end
|
101
|
+
|
102
|
+
# 上記Consonants.eachのコードで重なっている文字は小さい「っ」に変換されるけど、全部は変換されません
|
103
|
+
# 子音が残ってしまえば、変換されます。
|
104
|
+
if str =~ /[a-z]/
|
105
|
+
i = 0
|
106
|
+
str_ary = str.split("")
|
107
|
+
str_ary.each do |s|
|
108
|
+
if s =~ /っ/
|
109
|
+
str[i + 1] = "っ" if str[i + 1] =~ /[a-z]/
|
110
|
+
end
|
111
|
+
i += 1
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# びっくりマークなどの記号を日本語の文字にします
|
116
|
+
Symbols.each do |symbol|
|
117
|
+
str = str.gsub(symbol[0], symbol[1])
|
118
|
+
end
|
119
|
+
|
120
|
+
# この時点で if /[a-z]/、エラーをthrowしてください
|
121
|
+
# raise error if str =~ /[a-zA-Z]/
|
122
|
+
|
123
|
+
# これは要るかどうか工夫すること
|
124
|
+
# またオプションとしては定義できるようにしたらいいかどうか工夫すること
|
125
|
+
str = kuhaku(str, :zenkaku)
|
126
|
+
|
127
|
+
str = NKF.nkf(('-h1 ' + options), str)
|
128
|
+
str = str.encode(str_data[:encoding].name) if need_to_change_encoding
|
129
|
+
str
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
def hiragana!(str, *options)
|
134
|
+
str.sub!(str, (hiragana(str, options)))
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
def katakana(str, *options)
|
140
|
+
str = hiragana(str, options)
|
141
|
+
str = nkf_pass(str, '-h2', options)
|
142
|
+
end
|
143
|
+
|
144
|
+
def katakana!(str, *options)
|
145
|
+
str.sub!(str, (katakana(str, options)))
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
def hankaku_katakana(str, *options)
|
151
|
+
str = katakana(str, options)
|
152
|
+
str = nkf_pass(str, '-Z4', options)
|
153
|
+
end
|
154
|
+
|
155
|
+
def hankaku_katakana!(str, *options)
|
156
|
+
str.sub!(str, (hankaku_katakana(str, options)))
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
def kana_invert(str, *options)
|
162
|
+
options = setup options
|
163
|
+
str = nkf_pass(str, '-h3', options)
|
164
|
+
end
|
165
|
+
|
166
|
+
def kana_invert!(str, *options)
|
167
|
+
str.sub!(str, (kana_invert(str, options)))
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
|
172
|
+
def romaji(str, encoding=:utf_8)
|
173
|
+
|
174
|
+
need_to_change_encoding = check_encoding(encoding)
|
175
|
+
str_data = utf_8_pass(str)
|
176
|
+
|
177
|
+
# すべての文字をひらがなに統一してからローマ字に変換されます。カタカナなどが入っている時の対応
|
178
|
+
str = hiragana(str, encoding)
|
179
|
+
str = str.downcase
|
180
|
+
|
181
|
+
str_data = utf_8_pass(str)
|
182
|
+
str = str_data[:string]
|
183
|
+
|
184
|
+
change_to = Proc.new do |hash|
|
185
|
+
hash.each do |key, value|
|
186
|
+
re = Regexp.new(value)
|
187
|
+
if str.match(re)
|
188
|
+
str = str.gsub(re, key.to_s)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
change_to.call(Hiragana)
|
194
|
+
|
195
|
+
# while文はちょっと気になる
|
196
|
+
while str =~ /っ/
|
197
|
+
place = str =~ /っ/
|
198
|
+
small_tsu_to_romaji(str, place)
|
199
|
+
end
|
200
|
+
|
201
|
+
# ダブっている文字が小さい「っ」に変換されてからしないといけません
|
202
|
+
change_to.call(Small_hiragana)
|
203
|
+
|
204
|
+
Symbols.each do |symbol|
|
205
|
+
str = str.gsub(symbol[1], symbol[0])
|
206
|
+
end
|
207
|
+
|
208
|
+
str = kuhaku(str)
|
209
|
+
str = str.encode(str_data[:encoding].name) if need_to_change_encoding
|
210
|
+
str
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
def romaji!(str, encoding=:utf_8)
|
215
|
+
str.sub!(str, (romaji(str, encoding)))
|
216
|
+
end
|
217
|
+
|
218
|
+
|
219
|
+
#################################
|
220
|
+
# #
|
221
|
+
# 以降は空白を扱うためのメソッドです #
|
222
|
+
# #
|
223
|
+
#################################
|
224
|
+
|
225
|
+
# 対象の文字列のすべての空白を切り落とす
|
226
|
+
def kiru(str)
|
227
|
+
if str.match(/ /)
|
228
|
+
str = str.gsub(/ /, " ")
|
229
|
+
end
|
230
|
+
str.gsub(/\s/, "")
|
231
|
+
end
|
232
|
+
|
233
|
+
def kiru!(str)
|
234
|
+
str.sub!(str, kiru(str))
|
235
|
+
end
|
236
|
+
|
237
|
+
|
238
|
+
# 対象の文字列の端にある空白を切り落とす
|
239
|
+
def hashigiri(str)
|
240
|
+
if str.match(/^ /)
|
241
|
+
str = str.sub(/^ /, " ")
|
242
|
+
end
|
243
|
+
if str.match(/ $/)
|
244
|
+
str = str.sub(/ $/, " ")
|
245
|
+
end
|
246
|
+
str = str.strip
|
247
|
+
end
|
248
|
+
|
249
|
+
def hashigiri!(str)
|
250
|
+
str.sub!(str, hashigiri(str))
|
251
|
+
end
|
252
|
+
|
253
|
+
|
254
|
+
|
255
|
+
# 対象の文字列の空白を半角の空白にします
|
256
|
+
# :zenkakuをoptionとして渡せば、すべての空白は全角の空白に変換されます
|
257
|
+
def kuhaku(str, option=nil)
|
258
|
+
str_data = utf_8_pass(str)
|
259
|
+
str = str_data[:string]
|
260
|
+
# :double というオプションを入れたい。nkfの-Z2のこと
|
261
|
+
if option == :zenkaku
|
262
|
+
str = str.gsub(/\s/, " ") # 全角に変える
|
263
|
+
else
|
264
|
+
str = str.gsub(/ /, " ") # 普通の空白に変える
|
265
|
+
end
|
266
|
+
str.encode(str_data[:encoding].name)
|
267
|
+
end
|
268
|
+
|
269
|
+
# #kuhakuの文字列を破壊的に変換します
|
270
|
+
def kuhaku!(str, option=nil)
|
271
|
+
str.sub!(str, kuhaku(str))
|
272
|
+
end
|
273
|
+
|
274
|
+
|
275
|
+
# 対象の文字列の全角と半角の空白を逆にします
|
276
|
+
def kuhaku_invert(str)
|
277
|
+
str_data = utf_8_pass(str)
|
278
|
+
str = str_data[:string]
|
279
|
+
str = str.split("")
|
280
|
+
str = str.map do |s|
|
281
|
+
if s =~ /\s/ # 半角であれば
|
282
|
+
s = " " # 全角に
|
283
|
+
elsif s =~ / / # 全角であれば
|
284
|
+
s = " " # 半角に
|
285
|
+
else
|
286
|
+
s
|
287
|
+
end
|
288
|
+
end
|
289
|
+
new_str = String.new
|
290
|
+
str.each do |s|
|
291
|
+
new_str += s
|
292
|
+
end
|
293
|
+
new_str = new_str.encode(str_data[:encoding].name)
|
294
|
+
end
|
295
|
+
|
296
|
+
# #kuhaku_invertの文字列を破壊的に変換します
|
297
|
+
def kuhaku_invert!(str)
|
298
|
+
str.sub!(str, kuhaku_invert(str))
|
299
|
+
end
|
300
|
+
|
301
|
+
|
302
|
+
|
303
|
+
private
|
304
|
+
|
305
|
+
# optionsの中で新しい文字コードを定義すれば、nkfは文字列をそのエンコーディングにします
|
306
|
+
# 定義していなければ、元のエンコーディングに変える必要がありますとtrueを返します
|
307
|
+
def check_encoding(*options)
|
308
|
+
options = options.flatten if options.class == Array
|
309
|
+
need_to_change_encoding = true
|
310
|
+
options.each do |option|
|
311
|
+
EncodingTypes.each do |key, val|
|
312
|
+
if option == key || option =~ Regexp.new(val)
|
313
|
+
need_to_change_encoding = false
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
317
|
+
need_to_change_encoding
|
318
|
+
end
|
319
|
+
|
320
|
+
def setup(options)
|
321
|
+
options = options.flatten
|
322
|
+
options = options.map do |option|
|
323
|
+
option = EncodingTypes[option] if option.class == Symbol
|
324
|
+
option
|
325
|
+
end
|
326
|
+
options = options.join(' ')
|
327
|
+
options = EncodingTypes[:utf_8] if options.empty?
|
328
|
+
options
|
329
|
+
end
|
330
|
+
|
331
|
+
# utf-8でない文字列の対応としては、元のエンコーディングとutf-8バージョンの文字列を配列に格納して返します。
|
332
|
+
# #hiraganaとかのメソッドの処理が終われば、必要であれば、文字列の元のエンコーディングに戻します
|
333
|
+
def utf_8_pass(str)
|
334
|
+
original_encoding = str.encoding
|
335
|
+
str = str.encode("UTF-8")
|
336
|
+
str_data = {
|
337
|
+
string: str,
|
338
|
+
encoding: original_encoding
|
339
|
+
}
|
340
|
+
end
|
341
|
+
|
342
|
+
# specific_optionは'-h2'、'-Z4'などのことを差します
|
343
|
+
# 上記のメソッドで文字列、オプションのリテラル、そしてそれ以外のオプションを定義するだけで、
|
344
|
+
# nkfの関数が呼び出されます
|
345
|
+
def nkf_pass(str, specific_option, *options)
|
346
|
+
need_to_change_encoding = check_encoding(options)
|
347
|
+
options = setup options
|
348
|
+
str_data = utf_8_pass(str)
|
349
|
+
str = str_data[:string]
|
350
|
+
str = NKF.nkf((specific_option + ' ' + options), str)
|
351
|
+
str = str.encode(str_data[:encoding].name) if need_to_change_encoding
|
352
|
+
str
|
353
|
+
end
|
354
|
+
|
355
|
+
# 再帰的に対象の文字列に「っ」が何個か続いたら、「っ」の次の文字の(ローマ字の)子音を見つけて「っ」と代えます。
|
356
|
+
# 「どっっっかん!」を書いたら「dokkkkan!」になる
|
357
|
+
def small_tsu_to_romaji(str, place)
|
358
|
+
if str[place + 1] =~ /[a-zA-Z]/
|
359
|
+
str[place] = str[place + 1]
|
360
|
+
else
|
361
|
+
if str[place + 1] == "っ"
|
362
|
+
str[place] = small_tsu_to_romaji(str, (place + 1))
|
363
|
+
else # びっくりマークなどの場合
|
364
|
+
str[place] = ""
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
end
|
370
|
+
|
371
|
+
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
module Nihonjin
|
2
|
+
|
3
|
+
class Suji
|
4
|
+
|
5
|
+
Hankaku = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
6
|
+
Zenkaku = ["0", "1" , "2", "3", "4", "5", "6", "7", "8", "9"]
|
7
|
+
Kanji = ["〇", "一", "二", "三", "四", "五", "六", "七", "八", "九"]
|
8
|
+
Daiji = ["零", "壱", "弐", "参", "肆", "伍", "陸", "漆", "捌", "玖"]
|
9
|
+
Daisu = ["万", "億", "兆", "京", "垓", "𥝱", "穣", "溝", "澗", "正", "載", "極"]
|
10
|
+
# "恒河沙", "阿僧祇", "那由他", "不可思議", "無量大数" => こういうのを入れたかったら、扱いが変わらないとダメだ(長さは1じゃないから)
|
11
|
+
|
12
|
+
# 大数の読み方もあればいいかな
|
13
|
+
# メソッドの引数にはカンマが入っている時の対応
|
14
|
+
|
15
|
+
# ユーザも使えるようにpublicにしました
|
16
|
+
def type?(num)
|
17
|
+
num = num.to_s
|
18
|
+
constants = [Hankaku, Zenkaku, Kanji, Daiji]
|
19
|
+
type = nil
|
20
|
+
constants.each do |constant|
|
21
|
+
10.times do |n|
|
22
|
+
regexp = Regexp.new(constant[n].to_s)
|
23
|
+
if num.match(regexp)
|
24
|
+
type = constant
|
25
|
+
break
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
if type == Zenkaku
|
30
|
+
"全角"
|
31
|
+
elsif type == Kanji
|
32
|
+
"漢字"
|
33
|
+
elsif type == Daiji
|
34
|
+
"大字"
|
35
|
+
elsif type == Hankaku
|
36
|
+
"半角"
|
37
|
+
else
|
38
|
+
""
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_i(num)
|
43
|
+
return num unless !(num.instance_of? Integer)
|
44
|
+
num = hankaku(num)
|
45
|
+
num.to_i
|
46
|
+
end
|
47
|
+
|
48
|
+
def hankaku(num)
|
49
|
+
type = type?(num)
|
50
|
+
return num.to_i if type == "半角"
|
51
|
+
num = converter(num, type, Hankaku)
|
52
|
+
end
|
53
|
+
|
54
|
+
def zenkaku(num)
|
55
|
+
type = type?(num)
|
56
|
+
return num if type == "全角"
|
57
|
+
num = converter(num, type, Zenkaku)
|
58
|
+
end
|
59
|
+
|
60
|
+
def kanji(num)
|
61
|
+
type = type?(num)
|
62
|
+
return num if type == "漢字"
|
63
|
+
num = converter(num, type, Kanji)
|
64
|
+
end
|
65
|
+
|
66
|
+
def daiji(num)
|
67
|
+
type = type?(num)
|
68
|
+
return num if type == "大字"
|
69
|
+
num = converter(num, type, Daiji)
|
70
|
+
end
|
71
|
+
|
72
|
+
def kanji_henkan(num)
|
73
|
+
num = num.to_s
|
74
|
+
if num == "0"
|
75
|
+
return kanji(num)
|
76
|
+
end
|
77
|
+
comma_place = 4
|
78
|
+
comma_counter = 0
|
79
|
+
loop do
|
80
|
+
if comma_place > (num.length)
|
81
|
+
break
|
82
|
+
else
|
83
|
+
num[-(comma_place + comma_counter), 0] = "," if num[-(comma_place + comma_counter)] != nil
|
84
|
+
comma_place += 4
|
85
|
+
comma_counter += 1
|
86
|
+
end
|
87
|
+
end
|
88
|
+
split_num = num.split(",")
|
89
|
+
split_num.shift if split_num[0] == ""
|
90
|
+
|
91
|
+
kanji_array = split_num.map do |n|
|
92
|
+
case n.length
|
93
|
+
when 1
|
94
|
+
ichi(n)
|
95
|
+
when 2
|
96
|
+
ju(n)
|
97
|
+
when 3
|
98
|
+
hyaku(n)
|
99
|
+
when 4
|
100
|
+
sen(n)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
num = String.new
|
105
|
+
kanji_array.length.times do |n|
|
106
|
+
num[0, 0] += Daisu[n] + kanji_array[-(n + 1)]
|
107
|
+
end
|
108
|
+
Daisu.each do |ds|
|
109
|
+
regexp_str = "^" + ds
|
110
|
+
regexp = Regexp.new(regexp_str)
|
111
|
+
if num =~ regexp
|
112
|
+
num[0] = ""
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# 数字は「一兆億万」とかにならなくて「一兆」みたいになるように
|
117
|
+
num.length.times do |n|
|
118
|
+
Daisu.each do |ds|
|
119
|
+
if num[n] == ds
|
120
|
+
if num[n + 1] == Daisu[(Daisu.index(ds)) - 1]
|
121
|
+
num[n + 1] = ""
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# p split_num
|
128
|
+
num
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def converter(num, type, result_type)
|
137
|
+
num = num.to_s
|
138
|
+
case type
|
139
|
+
when "全角"
|
140
|
+
type = Zenkaku
|
141
|
+
when "漢字"
|
142
|
+
type = Kanji
|
143
|
+
when "大字"
|
144
|
+
type = Daiji
|
145
|
+
else
|
146
|
+
type = Hankaku
|
147
|
+
end
|
148
|
+
10.times do |n|
|
149
|
+
regexp = Regexp.new(type[n].to_s)
|
150
|
+
num = num.gsub(regexp, result_type[n].to_s)
|
151
|
+
end
|
152
|
+
num
|
153
|
+
end
|
154
|
+
|
155
|
+
def ichi(num)
|
156
|
+
if num == "0"
|
157
|
+
""
|
158
|
+
else
|
159
|
+
num = kanji(num)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# 1〜2桁の数字を正しい漢字に変換する
|
164
|
+
def ju(num)
|
165
|
+
num = num.split("")
|
166
|
+
|
167
|
+
# 次の条件が満たされます
|
168
|
+
# (尚、1桁の場合もjuで対応します。空というのは「00」の場合です)
|
169
|
+
# 十、十一、二十、二十一、一、空
|
170
|
+
if num[0] == "1" && num[1] == "0" # 十
|
171
|
+
num = "十"
|
172
|
+
elsif num[0] == "1" && num[1] != "0" # 十一
|
173
|
+
num[0] = "十"
|
174
|
+
num[1] = kanji(num[1])
|
175
|
+
elsif (num[0] != "0" && num != "1") && num[1] == "0" # 二十
|
176
|
+
num[0] = kanji(num[0])
|
177
|
+
num[1] = "十"
|
178
|
+
elsif (num[0] != "0" && num != "1") && num[1] != "0" # 二十一
|
179
|
+
num = num.unshift(num[0])
|
180
|
+
num[0] = kanji(num[0])
|
181
|
+
num[1] = "十"
|
182
|
+
num[2] = kanji(num[2])
|
183
|
+
elsif num[0] == "0" && num[1] != "0" # 一
|
184
|
+
num[0] = ""
|
185
|
+
num[1] = kanji(num[1])
|
186
|
+
elsif num[0] == "0" && num[1] == "0" # 空
|
187
|
+
num = ""
|
188
|
+
end
|
189
|
+
|
190
|
+
new_str = String.new
|
191
|
+
if num.instance_of? Array
|
192
|
+
num.each do |digit|
|
193
|
+
new_str += digit
|
194
|
+
end
|
195
|
+
num = new_str
|
196
|
+
end
|
197
|
+
|
198
|
+
num
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
# 3桁の数字を正しい漢字に変換する
|
203
|
+
def hyaku(num)
|
204
|
+
|
205
|
+
num = num.split("")
|
206
|
+
tenth_place = (num[-2] + num[-1])
|
207
|
+
tenth_place = ju(tenth_place)
|
208
|
+
# 次の条件が満たされます
|
209
|
+
# 百、二百、空
|
210
|
+
# 尚、「十」の位置以降はju()で計算されますので、先にそのメソッドを呼んで、戻り値を「百」の尾に連結します
|
211
|
+
if num[0] == "1" # 百
|
212
|
+
num[0] = "百"
|
213
|
+
num = num[0] + tenth_place
|
214
|
+
elsif num[0] == "0" # 空 (num[0]が要らなくなるのでtenth_placeだけをnumに代入します)
|
215
|
+
num = tenth_place
|
216
|
+
elsif (num[0] != "1" && num[0] != "0") # 二百
|
217
|
+
num = num.unshift(num[0])
|
218
|
+
num[0] = kanji(num[0])
|
219
|
+
num[1] = "百"
|
220
|
+
num = num[0] + num[1] + tenth_place
|
221
|
+
end
|
222
|
+
|
223
|
+
num
|
224
|
+
|
225
|
+
end
|
226
|
+
|
227
|
+
# 4桁の数字を正しい漢字に変換する
|
228
|
+
def sen(num)
|
229
|
+
num = num.split("")
|
230
|
+
hundreth_place = (num[-3] + num[-2] + num[-1])
|
231
|
+
hundreth_place = hyaku(hundreth_place)
|
232
|
+
|
233
|
+
# 次の条件が満たされます(hyaku(num)と同じく)
|
234
|
+
# 千、一千、空
|
235
|
+
if num[0] == "1" # 千
|
236
|
+
num[0] = "千"
|
237
|
+
num = num[0] + hundreth_place
|
238
|
+
elsif num[0] == "0" # 空(num[0]が要らなくなるのでhundredth_placeだけをnumに代入します)
|
239
|
+
num = hundreth_place
|
240
|
+
else # 一千など
|
241
|
+
num = num.unshift(num[0])
|
242
|
+
num[0] = kanji(num[0])
|
243
|
+
num[1] = "千"
|
244
|
+
num = num[0] + num[1] + hundreth_place
|
245
|
+
end
|
246
|
+
|
247
|
+
num
|
248
|
+
|
249
|
+
end
|
250
|
+
|
251
|
+
end
|
252
|
+
|
253
|
+
end
|
data/nihonjin.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'nihonjin/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "nihonjin"
|
8
|
+
spec.version = Nihonjin::VERSION
|
9
|
+
spec.authors = ["gazayas"]
|
10
|
+
spec.email = ["g-zayas@hotmail.com"]
|
11
|
+
|
12
|
+
spec.summary = "handle Japanese<=>English letter strings with ease"
|
13
|
+
spec.description = "Ruby has the NKF.nkf() method, but the options are unclear by just looking at them, so this gem has methods like hiragana() so you know what you're changing your strings to"
|
14
|
+
spec.homepage = "https://github.com/gazayas/nihonjin"
|
15
|
+
|
16
|
+
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
17
|
+
# delete this section to allow pushing this gem to any host.
|
18
|
+
# if spec.respond_to?(:metadata)
|
19
|
+
# spec.metadata['allowed_push_host'] = "http://rubygems.org/gems/nihonjin"
|
20
|
+
# else
|
21
|
+
# raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
22
|
+
# end
|
23
|
+
|
24
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
+
spec.bindir = "exe"
|
26
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
+
spec.require_paths = ["lib"]
|
28
|
+
|
29
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
30
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
31
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
32
|
+
end
|
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nihonjin
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- gazayas
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-12-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.11'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description: Ruby has the NKF.nkf() method, but the options are unclear by just looking
|
56
|
+
at them, so this gem has methods like hiragana() so you know what you're changing
|
57
|
+
your strings to
|
58
|
+
email:
|
59
|
+
- g-zayas@hotmail.com
|
60
|
+
executables: []
|
61
|
+
extensions: []
|
62
|
+
extra_rdoc_files: []
|
63
|
+
files:
|
64
|
+
- ".gitignore"
|
65
|
+
- ".rspec"
|
66
|
+
- ".ruby-version"
|
67
|
+
- ".travis.yml"
|
68
|
+
- Gemfile
|
69
|
+
- README.md
|
70
|
+
- Rakefile
|
71
|
+
- bin/console
|
72
|
+
- bin/setup
|
73
|
+
- lib/nihonjin.rb
|
74
|
+
- lib/nihonjin/moji.rb
|
75
|
+
- lib/nihonjin/options.rb
|
76
|
+
- lib/nihonjin/suji.rb
|
77
|
+
- lib/nihonjin/version.rb
|
78
|
+
- nihonjin.gemspec
|
79
|
+
homepage: https://github.com/gazayas/nihonjin
|
80
|
+
licenses: []
|
81
|
+
metadata: {}
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
requirements: []
|
97
|
+
rubyforge_project:
|
98
|
+
rubygems_version: 2.4.5
|
99
|
+
signing_key:
|
100
|
+
specification_version: 4
|
101
|
+
summary: handle Japanese<=>English letter strings with ease
|
102
|
+
test_files: []
|