gimchi 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/config/default.yml +8 -0
- data/lib/gimchi/korean.rb +82 -63
- data/test/test_gimchi.rb +18 -5
- metadata +13 -10
data/config/default.yml
CHANGED
@@ -84,14 +84,22 @@ pronouncer:
|
|
84
84
|
rule_16: [rule_30]
|
85
85
|
|
86
86
|
number:
|
87
|
+
positive: 플러스
|
87
88
|
negative: 마이너스
|
88
89
|
decimal point: 점
|
89
90
|
units: ["", 만, 억, 조, 경, 해, 자, 양, 구, 간, 정, 재, 극, 항하사, 아승기, 나유타, 불가사의, 무량대수]
|
90
91
|
digits: [영, 일, 이, 삼, 사, 오, 육, 칠, 팔, 구]
|
92
|
+
post substitution:
|
93
|
+
? !ruby/regexp /^일만/
|
94
|
+
: 만
|
91
95
|
|
92
96
|
# 정수형일 때 또다른 표현법 (나이, 시간)
|
93
97
|
alt notation:
|
94
98
|
when suffix:
|
99
|
+
개:
|
100
|
+
max:
|
101
|
+
명:
|
102
|
+
max:
|
95
103
|
살:
|
96
104
|
max:
|
97
105
|
시:
|
data/lib/gimchi/korean.rb
CHANGED
@@ -72,8 +72,8 @@ class Korean
|
|
72
72
|
def read_number str
|
73
73
|
nconfig = config['number']
|
74
74
|
|
75
|
-
str.to_s.gsub(/([+-]\s*)?[0-9,]*,*[0-9]+(\.[0-9]+)?(\s*.)?/) {
|
76
|
-
read_number_sub($&, $
|
75
|
+
str.to_s.gsub(/([+-]\s*)?[0-9,]*,*[0-9]+(\.[0-9]+(e[+-][0-9]+)?)?(\s*.)?/) {
|
76
|
+
read_number_sub($&, $4)
|
77
77
|
}
|
78
78
|
end
|
79
79
|
|
@@ -171,101 +171,120 @@ private
|
|
171
171
|
def read_number_sub num, next_char = nil
|
172
172
|
nconfig = config['number']
|
173
173
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
raise ArgumentError.new("Invalid number format") unless num =~ /[-+]?[0-9,]*\.?[0-9]*/
|
178
|
-
num = num.to_f == num.to_i ? num.to_i : num.to_f
|
179
|
-
end
|
174
|
+
num = num.gsub(',', '')
|
175
|
+
num = num.sub(/#{next_char}$/, '') if next_char
|
176
|
+
is_float = num.match(/[\.e]/) != nil
|
180
177
|
|
181
178
|
# Alternative notation for integers with proper suffix
|
182
179
|
alt = false
|
183
|
-
if
|
180
|
+
if is_float == false &&
|
181
|
+
nconfig['alt notation']['when suffix'].keys.include?(next_char.to_s.strip)
|
184
182
|
max = nconfig['alt notation']['when suffix'][next_char.strip]['max']
|
185
183
|
|
186
|
-
if max.nil? || num <= max
|
184
|
+
if max.nil? || num.to_i <= max
|
187
185
|
alt = true
|
188
186
|
end
|
189
187
|
end
|
190
188
|
|
191
189
|
# Sign
|
192
|
-
|
193
|
-
|
190
|
+
sign = []
|
191
|
+
negative = false
|
192
|
+
if num =~ /^-/
|
193
|
+
num = num.sub(/^-\s*/, '')
|
194
|
+
sign << nconfig['negative']
|
194
195
|
negative = true
|
195
|
-
|
196
|
-
|
196
|
+
elsif num =~ /^\+/
|
197
|
+
num = num.sub(/^\+\s*/, '')
|
198
|
+
sign << nconfig['positive']
|
197
199
|
end
|
198
200
|
|
199
|
-
if
|
201
|
+
if is_float
|
200
202
|
below = nconfig['decimal point']
|
201
|
-
below = nconfig['digits'][0] + below if num < 1
|
203
|
+
below = nconfig['digits'][0] + below if num.to_f < 1
|
202
204
|
|
203
|
-
|
204
|
-
|
205
|
-
|
205
|
+
if md = num.match(/(.*)e(.*)/)
|
206
|
+
dp = md[1].index('.')
|
207
|
+
num = md[1].tr '.', ''
|
206
208
|
exp = md[2].to_i
|
207
|
-
|
208
|
-
|
209
|
+
|
210
|
+
dp += exp
|
211
|
+
if dp > num.length
|
212
|
+
num = num.ljust(dp, '0')
|
213
|
+
num = num.sub(/^0+([1-9])/, "\\1")
|
214
|
+
|
215
|
+
below = ""
|
216
|
+
elsif dp < 0
|
217
|
+
num = '0.' + '0' * (-dp) + num
|
209
218
|
else
|
210
|
-
|
219
|
+
num[dp] = '.' + num[dp]
|
211
220
|
end
|
212
221
|
end
|
213
|
-
|
222
|
+
num.sub(/.*\./, '').each_char do | char |
|
214
223
|
below += nconfig['digits'][char.to_i]
|
215
|
-
end
|
216
|
-
num = num.
|
224
|
+
end if num.include? '.'
|
225
|
+
num = num.sub(/\..*/, '')
|
217
226
|
else
|
218
227
|
below = ""
|
219
228
|
end
|
220
229
|
|
221
230
|
tokens = []
|
222
231
|
unit_idx = -1
|
232
|
+
num = num.to_i
|
223
233
|
while num > 0
|
224
234
|
v = num % 10000
|
225
235
|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
236
|
+
unit_idx += 1
|
237
|
+
if v > 0
|
238
|
+
if alt == false || unit_idx >= 1
|
239
|
+
str = ""
|
240
|
+
# Cannot use hash as they're unordered in 1.8
|
241
|
+
[[1000, '천'],
|
242
|
+
[100, '백'],
|
243
|
+
[10, '십']].each do | arr |
|
244
|
+
u, sub_unit = arr
|
245
|
+
str += (nconfig['digits'][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
|
246
|
+
v %= u
|
247
|
+
end
|
248
|
+
str += nconfig['digits'][v] if v > 0
|
249
|
+
|
250
|
+
tokens << str.sub(/ $/, '') + nconfig['units'][unit_idx]
|
251
|
+
else
|
252
|
+
str = ""
|
253
|
+
tenfolds = nconfig['alt notation']['tenfolds']
|
254
|
+
digits = nconfig['alt notation']['digits']
|
255
|
+
alt_post_subs = nconfig['alt notation']['post substitution']
|
256
|
+
|
257
|
+
# Likewise.
|
258
|
+
[[1000, '천'],
|
259
|
+
[100, '백']].each do | u, sub_unit |
|
260
|
+
str += (nconfig['digits'][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
|
261
|
+
v %= u
|
262
|
+
end
|
263
|
+
|
264
|
+
str += tenfolds[(v / 10) - 1] if v / 10 > 0
|
265
|
+
v %= 10
|
266
|
+
str += digits[v] if v > 0
|
267
|
+
|
268
|
+
if alt
|
269
|
+
suffix = next_char.strip
|
270
|
+
str = str + suffix
|
271
|
+
alt_post_subs.each do | k, v |
|
272
|
+
str.gsub!(k, v)
|
273
|
+
end
|
274
|
+
str.sub!(/#{suffix}$/, '')
|
275
|
+
end
|
276
|
+
tokens << str.sub(/ $/, '') + nconfig['units'][unit_idx]
|
260
277
|
end
|
261
|
-
str.sub!(/#{suffix}$/, '')
|
262
|
-
tokens << str.sub(/ $/, '') + nconfig['units'][unit_idx += 1]
|
263
278
|
end
|
264
279
|
num /= 10000
|
265
280
|
end
|
266
281
|
|
267
|
-
tokens
|
268
|
-
tokens.reverse.join(' ') + next_char.to_s
|
282
|
+
tokens += sign unless sign.empty?
|
283
|
+
ret = tokens.reverse.join(' ') + below + next_char.to_s
|
284
|
+
nconfig['post substitution'].each do | k, v |
|
285
|
+
ret.gsub!(k, v)
|
286
|
+
end
|
287
|
+
ret
|
269
288
|
end
|
270
289
|
end#Korean
|
271
290
|
end#Gimchi
|
data/test/test_gimchi.rb
CHANGED
@@ -77,12 +77,25 @@ class TestGimchi < Test::Unit::TestCase
|
|
77
77
|
ko = Gimchi::Korean.new
|
78
78
|
assert_equal "천 구백 구십 구", ko.read_number(1999)
|
79
79
|
assert_equal "마이너스 백점일이삼", ko.read_number(- 100.123)
|
80
|
-
assert_equal "오백 삼십 일억 구천 백 십만 육백 칠십
|
81
|
-
ko.read_number("53,191,100,678.
|
82
|
-
|
83
|
-
|
80
|
+
assert_equal "오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사오육칠",
|
81
|
+
ko.read_number("53,191,100,678.3214567")
|
82
|
+
assert_equal "영점영영영영영일이삼사오", ko.read_number("1.2345e-06")
|
83
|
+
assert_equal "일해 이천 삼백 사십 오경", ko.read_number("1.2345e+20")
|
84
|
+
assert_equal "플러스 일해 이천 삼백 사십 오경", ko.read_number("+ 1.2345e+20")
|
85
|
+
assert_equal "마이너스 일해 이천 삼백 사십 오경", ko.read_number("- 1.2345e+20")
|
86
|
+
assert_equal "만 십 이점삼", ko.read_number("100.123e+2")
|
87
|
+
assert_equal "십만 십 이점삼", ko.read_number("1000.123e+2")
|
88
|
+
assert_equal "백 일만 십 이점삼", ko.read_number("10100.123e+2")
|
89
|
+
assert_equal "천 십 이점삼", ko.read_number("10.123e+2")
|
90
|
+
assert_equal "십점영", ko.read_number("10.0")
|
91
|
+
assert_equal "플러스 십점영", ko.read_number("+ 10.0")
|
92
|
+
|
93
|
+
# 나이, 시간, 개수, 명 ( -살, -시, -개, -명 )
|
84
94
|
assert_equal "나는 스무살", ko.read_number("나는 20살")
|
95
|
+
assert_equal "이십 칠점일살", ko.read_number("27.1살")
|
85
96
|
assert_equal "너는 열세 살", ko.read_number("너는 13 살")
|
97
|
+
assert_equal "백 서른두명", ko.read_number("132명")
|
98
|
+
assert_equal "이천 오백 아흔아홉개", ko.read_number("2,599개")
|
86
99
|
assert_equal "지금은 일곱시 삼십분", ko.read_number("지금은 7시 30분")
|
87
100
|
end
|
88
101
|
|
@@ -118,7 +131,7 @@ class TestGimchi < Test::Unit::TestCase
|
|
118
131
|
assert s >= 411
|
119
132
|
end
|
120
133
|
|
121
|
-
def
|
134
|
+
def test_romanize_preserve_non_korean
|
122
135
|
ko = Gimchi::Korean.new
|
123
136
|
assert_equal 'ttok-kkateun kkk', ko.romanize('똑같은 kkk')
|
124
137
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gimchi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-04-
|
12
|
+
date: 2011-04-12 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
|
-
requirement: &
|
16
|
+
requirement: &2152493140 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.0.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2152493140
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: jeweler
|
27
|
-
requirement: &
|
27
|
+
requirement: &2152491800 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.5.2
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2152491800
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rcov
|
38
|
-
requirement: &
|
38
|
+
requirement: &2152490620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2152490620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: ansi
|
49
|
-
requirement: &
|
49
|
+
requirement: &2152489680 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: 1.2.2
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2152489680
|
58
58
|
description: Gimchi knows how to pronounce Korean strings and how to write them in
|
59
59
|
roman alphabet.
|
60
60
|
email: junegunn.c@gmail.com
|
@@ -91,6 +91,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
91
91
|
- - ! '>='
|
92
92
|
- !ruby/object:Gem::Version
|
93
93
|
version: '0'
|
94
|
+
segments:
|
95
|
+
- 0
|
96
|
+
hash: -4096346844211308775
|
94
97
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
98
|
none: false
|
96
99
|
requirements:
|