gimchi 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/config/default.yml +8 -0
- data/lib/gimchi/korean.rb +82 -63
- data/test/test_gimchi.rb +18 -5
- metadata +13 -10
data/config/default.yml
CHANGED
@@ -84,14 +84,22 @@ pronouncer:
|
|
84
84
|
rule_16: [rule_30]
|
85
85
|
|
86
86
|
number:
|
87
|
+
positive: 플러스
|
87
88
|
negative: 마이너스
|
88
89
|
decimal point: 점
|
89
90
|
units: ["", 만, 억, 조, 경, 해, 자, 양, 구, 간, 정, 재, 극, 항하사, 아승기, 나유타, 불가사의, 무량대수]
|
90
91
|
digits: [영, 일, 이, 삼, 사, 오, 육, 칠, 팔, 구]
|
92
|
+
post substitution:
|
93
|
+
? !ruby/regexp /^일만/
|
94
|
+
: 만
|
91
95
|
|
92
96
|
# 정수형일 때 또다른 표현법 (나이, 시간)
|
93
97
|
alt notation:
|
94
98
|
when suffix:
|
99
|
+
개:
|
100
|
+
max:
|
101
|
+
명:
|
102
|
+
max:
|
95
103
|
살:
|
96
104
|
max:
|
97
105
|
시:
|
data/lib/gimchi/korean.rb
CHANGED
@@ -72,8 +72,8 @@ class Korean
|
|
72
72
|
def read_number str
|
73
73
|
nconfig = config['number']
|
74
74
|
|
75
|
-
str.to_s.gsub(/([+-]\s*)?[0-9,]*,*[0-9]+(\.[0-9]+)?(\s*.)?/) {
|
76
|
-
read_number_sub($&, $
|
75
|
+
str.to_s.gsub(/([+-]\s*)?[0-9,]*,*[0-9]+(\.[0-9]+(e[+-][0-9]+)?)?(\s*.)?/) {
|
76
|
+
read_number_sub($&, $4)
|
77
77
|
}
|
78
78
|
end
|
79
79
|
|
@@ -171,101 +171,120 @@ private
|
|
171
171
|
def read_number_sub num, next_char = nil
|
172
172
|
nconfig = config['number']
|
173
173
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
raise ArgumentError.new("Invalid number format") unless num =~ /[-+]?[0-9,]*\.?[0-9]*/
|
178
|
-
num = num.to_f == num.to_i ? num.to_i : num.to_f
|
179
|
-
end
|
174
|
+
num = num.gsub(',', '')
|
175
|
+
num = num.sub(/#{next_char}$/, '') if next_char
|
176
|
+
is_float = num.match(/[\.e]/) != nil
|
180
177
|
|
181
178
|
# Alternative notation for integers with proper suffix
|
182
179
|
alt = false
|
183
|
-
if
|
180
|
+
if is_float == false &&
|
181
|
+
nconfig['alt notation']['when suffix'].keys.include?(next_char.to_s.strip)
|
184
182
|
max = nconfig['alt notation']['when suffix'][next_char.strip]['max']
|
185
183
|
|
186
|
-
if max.nil? || num <= max
|
184
|
+
if max.nil? || num.to_i <= max
|
187
185
|
alt = true
|
188
186
|
end
|
189
187
|
end
|
190
188
|
|
191
189
|
# Sign
|
192
|
-
|
193
|
-
|
190
|
+
sign = []
|
191
|
+
negative = false
|
192
|
+
if num =~ /^-/
|
193
|
+
num = num.sub(/^-\s*/, '')
|
194
|
+
sign << nconfig['negative']
|
194
195
|
negative = true
|
195
|
-
|
196
|
-
|
196
|
+
elsif num =~ /^\+/
|
197
|
+
num = num.sub(/^\+\s*/, '')
|
198
|
+
sign << nconfig['positive']
|
197
199
|
end
|
198
200
|
|
199
|
-
if
|
201
|
+
if is_float
|
200
202
|
below = nconfig['decimal point']
|
201
|
-
below = nconfig['digits'][0] + below if num < 1
|
203
|
+
below = nconfig['digits'][0] + below if num.to_f < 1
|
202
204
|
|
203
|
-
|
204
|
-
|
205
|
-
|
205
|
+
if md = num.match(/(.*)e(.*)/)
|
206
|
+
dp = md[1].index('.')
|
207
|
+
num = md[1].tr '.', ''
|
206
208
|
exp = md[2].to_i
|
207
|
-
|
208
|
-
|
209
|
+
|
210
|
+
dp += exp
|
211
|
+
if dp > num.length
|
212
|
+
num = num.ljust(dp, '0')
|
213
|
+
num = num.sub(/^0+([1-9])/, "\\1")
|
214
|
+
|
215
|
+
below = ""
|
216
|
+
elsif dp < 0
|
217
|
+
num = '0.' + '0' * (-dp) + num
|
209
218
|
else
|
210
|
-
|
219
|
+
num[dp] = '.' + num[dp]
|
211
220
|
end
|
212
221
|
end
|
213
|
-
|
222
|
+
num.sub(/.*\./, '').each_char do | char |
|
214
223
|
below += nconfig['digits'][char.to_i]
|
215
|
-
end
|
216
|
-
num = num.
|
224
|
+
end if num.include? '.'
|
225
|
+
num = num.sub(/\..*/, '')
|
217
226
|
else
|
218
227
|
below = ""
|
219
228
|
end
|
220
229
|
|
221
230
|
tokens = []
|
222
231
|
unit_idx = -1
|
232
|
+
num = num.to_i
|
223
233
|
while num > 0
|
224
234
|
v = num % 10000
|
225
235
|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
236
|
+
unit_idx += 1
|
237
|
+
if v > 0
|
238
|
+
if alt == false || unit_idx >= 1
|
239
|
+
str = ""
|
240
|
+
# Cannot use hash as they're unordered in 1.8
|
241
|
+
[[1000, '천'],
|
242
|
+
[100, '백'],
|
243
|
+
[10, '십']].each do | arr |
|
244
|
+
u, sub_unit = arr
|
245
|
+
str += (nconfig['digits'][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
|
246
|
+
v %= u
|
247
|
+
end
|
248
|
+
str += nconfig['digits'][v] if v > 0
|
249
|
+
|
250
|
+
tokens << str.sub(/ $/, '') + nconfig['units'][unit_idx]
|
251
|
+
else
|
252
|
+
str = ""
|
253
|
+
tenfolds = nconfig['alt notation']['tenfolds']
|
254
|
+
digits = nconfig['alt notation']['digits']
|
255
|
+
alt_post_subs = nconfig['alt notation']['post substitution']
|
256
|
+
|
257
|
+
# Likewise.
|
258
|
+
[[1000, '천'],
|
259
|
+
[100, '백']].each do | u, sub_unit |
|
260
|
+
str += (nconfig['digits'][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
|
261
|
+
v %= u
|
262
|
+
end
|
263
|
+
|
264
|
+
str += tenfolds[(v / 10) - 1] if v / 10 > 0
|
265
|
+
v %= 10
|
266
|
+
str += digits[v] if v > 0
|
267
|
+
|
268
|
+
if alt
|
269
|
+
suffix = next_char.strip
|
270
|
+
str = str + suffix
|
271
|
+
alt_post_subs.each do | k, v |
|
272
|
+
str.gsub!(k, v)
|
273
|
+
end
|
274
|
+
str.sub!(/#{suffix}$/, '')
|
275
|
+
end
|
276
|
+
tokens << str.sub(/ $/, '') + nconfig['units'][unit_idx]
|
260
277
|
end
|
261
|
-
str.sub!(/#{suffix}$/, '')
|
262
|
-
tokens << str.sub(/ $/, '') + nconfig['units'][unit_idx += 1]
|
263
278
|
end
|
264
279
|
num /= 10000
|
265
280
|
end
|
266
281
|
|
267
|
-
tokens
|
268
|
-
tokens.reverse.join(' ') + next_char.to_s
|
282
|
+
tokens += sign unless sign.empty?
|
283
|
+
ret = tokens.reverse.join(' ') + below + next_char.to_s
|
284
|
+
nconfig['post substitution'].each do | k, v |
|
285
|
+
ret.gsub!(k, v)
|
286
|
+
end
|
287
|
+
ret
|
269
288
|
end
|
270
289
|
end#Korean
|
271
290
|
end#Gimchi
|
data/test/test_gimchi.rb
CHANGED
@@ -77,12 +77,25 @@ class TestGimchi < Test::Unit::TestCase
|
|
77
77
|
ko = Gimchi::Korean.new
|
78
78
|
assert_equal "천 구백 구십 구", ko.read_number(1999)
|
79
79
|
assert_equal "마이너스 백점일이삼", ko.read_number(- 100.123)
|
80
|
-
assert_equal "오백 삼십 일억 구천 백 십만 육백 칠십
|
81
|
-
ko.read_number("53,191,100,678.
|
82
|
-
|
83
|
-
|
80
|
+
assert_equal "오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사오육칠",
|
81
|
+
ko.read_number("53,191,100,678.3214567")
|
82
|
+
assert_equal "영점영영영영영일이삼사오", ko.read_number("1.2345e-06")
|
83
|
+
assert_equal "일해 이천 삼백 사십 오경", ko.read_number("1.2345e+20")
|
84
|
+
assert_equal "플러스 일해 이천 삼백 사십 오경", ko.read_number("+ 1.2345e+20")
|
85
|
+
assert_equal "마이너스 일해 이천 삼백 사십 오경", ko.read_number("- 1.2345e+20")
|
86
|
+
assert_equal "만 십 이점삼", ko.read_number("100.123e+2")
|
87
|
+
assert_equal "십만 십 이점삼", ko.read_number("1000.123e+2")
|
88
|
+
assert_equal "백 일만 십 이점삼", ko.read_number("10100.123e+2")
|
89
|
+
assert_equal "천 십 이점삼", ko.read_number("10.123e+2")
|
90
|
+
assert_equal "십점영", ko.read_number("10.0")
|
91
|
+
assert_equal "플러스 십점영", ko.read_number("+ 10.0")
|
92
|
+
|
93
|
+
# 나이, 시간, 개수, 명 ( -살, -시, -개, -명 )
|
84
94
|
assert_equal "나는 스무살", ko.read_number("나는 20살")
|
95
|
+
assert_equal "이십 칠점일살", ko.read_number("27.1살")
|
85
96
|
assert_equal "너는 열세 살", ko.read_number("너는 13 살")
|
97
|
+
assert_equal "백 서른두명", ko.read_number("132명")
|
98
|
+
assert_equal "이천 오백 아흔아홉개", ko.read_number("2,599개")
|
86
99
|
assert_equal "지금은 일곱시 삼십분", ko.read_number("지금은 7시 30분")
|
87
100
|
end
|
88
101
|
|
@@ -118,7 +131,7 @@ class TestGimchi < Test::Unit::TestCase
|
|
118
131
|
assert s >= 411
|
119
132
|
end
|
120
133
|
|
121
|
-
def
|
134
|
+
def test_romanize_preserve_non_korean
|
122
135
|
ko = Gimchi::Korean.new
|
123
136
|
assert_equal 'ttok-kkateun kkk', ko.romanize('똑같은 kkk')
|
124
137
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gimchi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-04-
|
12
|
+
date: 2011-04-12 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
|
-
requirement: &
|
16
|
+
requirement: &2152493140 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.0.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2152493140
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: jeweler
|
27
|
-
requirement: &
|
27
|
+
requirement: &2152491800 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.5.2
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2152491800
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rcov
|
38
|
-
requirement: &
|
38
|
+
requirement: &2152490620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2152490620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: ansi
|
49
|
-
requirement: &
|
49
|
+
requirement: &2152489680 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: 1.2.2
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2152489680
|
58
58
|
description: Gimchi knows how to pronounce Korean strings and how to write them in
|
59
59
|
roman alphabet.
|
60
60
|
email: junegunn.c@gmail.com
|
@@ -91,6 +91,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
91
91
|
- - ! '>='
|
92
92
|
- !ruby/object:Gem::Version
|
93
93
|
version: '0'
|
94
|
+
segments:
|
95
|
+
- 0
|
96
|
+
hash: -4096346844211308775
|
94
97
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
98
|
none: false
|
96
99
|
requirements:
|