gimchi 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/config/default.yml +0 -1
- data/lib/gimchi.rb +1 -0
- data/lib/gimchi/korean.rb +10 -8
- data/lib/gimchi/patch_1.8.rb +29 -0
- data/lib/gimchi/pronouncer.rb +111 -121
- data/test/romanization.yml +1 -0
- data/test/test_gimchi.rb +3 -3
- metadata +10 -10
data/config/default.yml
CHANGED
data/lib/gimchi.rb
CHANGED
data/lib/gimchi/korean.rb
CHANGED
@@ -140,8 +140,8 @@ class Korean
|
|
140
140
|
kc.to_a.each_with_index do | comp, idx |
|
141
141
|
next if comp.nil?
|
142
142
|
comp = rdata[idx][comp] || comp
|
143
|
-
comp = comp[1..-1] if comp[0] == dash &&
|
144
|
-
(romanization.empty? || romanization[-1] =~ /\s/ || comp[1] == 'w')
|
143
|
+
comp = comp[1..-1] if comp[0, 1] == dash &&
|
144
|
+
(romanization.empty? || romanization[-1] =~ /\s/ || comp[1, 1] == 'w')
|
145
145
|
romanization += comp
|
146
146
|
end
|
147
147
|
end
|
@@ -225,9 +225,11 @@ private
|
|
225
225
|
|
226
226
|
if alt == false || unit_idx >= 0
|
227
227
|
str = ""
|
228
|
-
|
229
|
-
|
230
|
-
|
228
|
+
# Cannot use hash as they're unordered in 1.8
|
229
|
+
[[1000, '천'],
|
230
|
+
[100, '백'],
|
231
|
+
[10, '십']].each do | arr |
|
232
|
+
u, sub_unit = arr
|
231
233
|
str += (nconfig['digits'][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
|
232
234
|
v %= u
|
233
235
|
end
|
@@ -240,9 +242,9 @@ private
|
|
240
242
|
digits = nconfig['alt notation']['digits']
|
241
243
|
post_subs = nconfig['alt notation']['post substitution']
|
242
244
|
|
243
|
-
|
244
|
-
|
245
|
-
|
245
|
+
# Likewise.
|
246
|
+
[[1000, '천'],
|
247
|
+
[100, '백']].each do | u, sub_unit |
|
246
248
|
str += (nconfig['digits'][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
|
247
249
|
v %= u
|
248
250
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
$KCODE = 'U'
|
2
|
+
|
3
|
+
module Gimchi
|
4
|
+
class Korean
|
5
|
+
# Checks if the given character is a korean character.
|
6
|
+
# @param [String] ch A string of size 1
|
7
|
+
def korean_char? ch
|
8
|
+
raise ArgumentError.new('Lengthy input') if str_length(ch) > 1
|
9
|
+
|
10
|
+
complete_korean_char?(ch) ||
|
11
|
+
(chosungs + jungsungs + jongsungs).include?(ch)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Checks if the given character is a "complete" korean character.
|
15
|
+
# "Complete" Korean character must have chosung and jungsung, with optional jongsung.
|
16
|
+
# @param [String] ch A string of size 1
|
17
|
+
def complete_korean_char? ch
|
18
|
+
raise ArgumentError.new('Lengthy input') if str_length(ch) > 1
|
19
|
+
|
20
|
+
# Range of Korean chracters in Unicode 2.0: AC00(가) ~ D7A3(힣)
|
21
|
+
ch.unpack('U').all? { | c | c >= 0xAC00 && c <= 0xD7A3 }
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def str_length str
|
26
|
+
str.scan(/./mu).length
|
27
|
+
end
|
28
|
+
end#Korean
|
29
|
+
end#Gimchi
|
data/lib/gimchi/pronouncer.rb
CHANGED
@@ -27,7 +27,7 @@ class Korean
|
|
27
27
|
# - For `slur'
|
28
28
|
applied = []
|
29
29
|
2.times do | phase |
|
30
|
-
@chars = @chars.reject { |c| c =~ /\s/ } if phase == 1
|
30
|
+
@chars = @chars.reject { |c| c =~ /\s/ } if phase == 1 # slur-phase
|
31
31
|
|
32
32
|
# Deep-fried...no copied backup
|
33
33
|
@initial_chars = @chars.map { |c| c.dup }
|
@@ -49,12 +49,19 @@ class Korean
|
|
49
49
|
private
|
50
50
|
def transform idx
|
51
51
|
@cursor = idx
|
52
|
+
kc = @chars[@cursor]
|
52
53
|
|
53
54
|
# Not korean
|
54
55
|
return [] unless kc.is_a? Korean::Char
|
55
56
|
|
57
|
+
# Setting up variables for fast lookup
|
58
|
+
@kc = kc
|
59
|
+
@next_kc = (nkc = @chars[@cursor + 1]).is_a?(Korean::Char) ? nkc : nil
|
60
|
+
@kc_org = @initial_chars[@cursor]
|
61
|
+
@next_kc_org = (nkco = @initial_chars[@cursor + 1]).is_a?(Korean::Char) ? nkco : nil
|
62
|
+
|
56
63
|
# Cannot properly pronounce
|
57
|
-
return [] if kc.chosung.nil? && kc.jungsung.nil? && kc.jongsung.nil?
|
64
|
+
return [] if @kc.chosung.nil? && @kc.jungsung.nil? && @kc.jongsung.nil?
|
58
65
|
|
59
66
|
applied = []
|
60
67
|
not_todo = []
|
@@ -77,23 +84,6 @@ class Korean
|
|
77
84
|
c.jungsung = 'ㅡ' if c.jungsung.nil?
|
78
85
|
end
|
79
86
|
|
80
|
-
def kc
|
81
|
-
@chars[@cursor]
|
82
|
-
end
|
83
|
-
|
84
|
-
def next_kc
|
85
|
-
nkc = @chars[@cursor + 1]
|
86
|
-
nkc.is_a?(Korean::Char) ? nkc : nil
|
87
|
-
end
|
88
|
-
|
89
|
-
def kc_org
|
90
|
-
@initial_chars[@cursor]
|
91
|
-
end
|
92
|
-
|
93
|
-
def next_kc_org
|
94
|
-
@initial_chars[@cursor + 1]
|
95
|
-
end
|
96
|
-
|
97
87
|
# shortcut
|
98
88
|
def fortis_map
|
99
89
|
@korean.config['structure']['fortis map']
|
@@ -108,16 +98,16 @@ class Korean
|
|
108
98
|
# 다만 1. 용언의 활용형에 나타나는 ‘져, 쪄, 쳐’는 [저, 쩌, 처]로 발음한다.
|
109
99
|
# 다만 3. 자음을 첫소리로 가지고 있는 음절의 ‘ㅢ’는 [ㅣ]로 발음한다.
|
110
100
|
def rule_5_1
|
111
|
-
if %w[져 쪄 쳐].include? kc.to_s
|
112
|
-
kc.jungsung = 'ㅓ'
|
101
|
+
if %w[져 쪄 쳐].include? @kc.to_s
|
102
|
+
@kc.jungsung = 'ㅓ'
|
113
103
|
|
114
104
|
true
|
115
105
|
end
|
116
106
|
end
|
117
107
|
|
118
108
|
def rule_5_3
|
119
|
-
if kc.jungsung == 'ㅢ' && kc_org.chosung.consonant?
|
120
|
-
kc.jungsung = 'ㅣ'
|
109
|
+
if @kc.jungsung == 'ㅢ' && @kc_org.chosung.consonant?
|
110
|
+
@kc.jungsung = 'ㅣ'
|
121
111
|
|
122
112
|
true
|
123
113
|
end
|
@@ -131,8 +121,8 @@ class Korean
|
|
131
121
|
%w[ㅅ ㅆ ㅈ ㅊ ㅌ] => 'ㄷ',
|
132
122
|
%w[ㅍ] => 'ㅂ'
|
133
123
|
}
|
134
|
-
if map.keys.flatten.include?(kc.jongsung) && (next_kc.nil? || next_kc.chosung.consonant?)
|
135
|
-
kc.jongsung = map[ map.keys.find { |e| e.include? kc.jongsung } ]
|
124
|
+
if map.keys.flatten.include?(@kc.jongsung) && (@next_kc.nil? || @next_kc.chosung.consonant?)
|
125
|
+
@kc.jongsung = map[ map.keys.find { |e| e.include? @kc.jongsung } ]
|
136
126
|
|
137
127
|
true
|
138
128
|
end
|
@@ -147,14 +137,14 @@ class Korean
|
|
147
137
|
%w[ㄼ ㄽ ㄾ] => 'ㄹ',
|
148
138
|
%w[ㅄ] => 'ㅂ'
|
149
139
|
}
|
150
|
-
if map.keys.flatten.include?(kc.jongsung) && (next_kc.nil? || next_kc.chosung.consonant?)
|
140
|
+
if map.keys.flatten.include?(@kc.jongsung) && (@next_kc.nil? || @next_kc.chosung.consonant?)
|
151
141
|
# Exceptions
|
152
|
-
if next_kc && (
|
153
|
-
(kc.to_s == '밟' && next_kc.chosung.consonant?) ||
|
154
|
-
(kc.to_s == '넓' && next_kc && %w[적 죽 둥].include?(next_kc_org.to_s))) # PATCH
|
155
|
-
kc.jongsung = 'ㅂ'
|
142
|
+
if @next_kc && (
|
143
|
+
(@kc.to_s == '밟' && @next_kc.chosung.consonant?) ||
|
144
|
+
(@kc.to_s == '넓' && @next_kc && %w[적 죽 둥].include?(@next_kc_org.to_s))) # PATCH
|
145
|
+
@kc.jongsung = 'ㅂ'
|
156
146
|
else
|
157
|
-
kc.jongsung = map[ map.keys.find { |e| e.include? kc.jongsung } ]
|
147
|
+
@kc.jongsung = map[ map.keys.find { |e| e.include? @kc.jongsung } ]
|
158
148
|
end
|
159
149
|
|
160
150
|
true
|
@@ -168,15 +158,15 @@ class Korean
|
|
168
158
|
'ㄻ' => 'ㅁ',
|
169
159
|
'ㄿ' => 'ㅂ'
|
170
160
|
}
|
171
|
-
if map.keys.include?(kc.jongsung) && (next_kc.nil? || next_kc.chosung.consonant?)
|
161
|
+
if map.keys.include?(@kc.jongsung) && (@next_kc.nil? || @next_kc.chosung.consonant?)
|
172
162
|
# 다만, 용언의 어간 말음 ‘ㄺ’은 ‘ㄱ’ 앞에서 [ㄹ]로 발음한다.
|
173
163
|
# - 용언 여부 판단은?: 중성으로 판단 (PATCH)
|
174
|
-
if next_kc && kc.jongsung == 'ㄺ' &&
|
175
|
-
next_kc_org.chosung == 'ㄱ' &&
|
176
|
-
%w[맑 얽 섥 밝 늙 묽 넓].include?(kc.to_s) # PATCH
|
177
|
-
kc.jongsung = 'ㄹ'
|
164
|
+
if @next_kc && @kc.jongsung == 'ㄺ' &&
|
165
|
+
@next_kc_org.chosung == 'ㄱ' &&
|
166
|
+
%w[맑 얽 섥 밝 늙 묽 넓].include?(@kc.to_s) # PATCH
|
167
|
+
@kc.jongsung = 'ㄹ'
|
178
168
|
else
|
179
|
-
kc.jongsung = map[kc.jongsung]
|
169
|
+
@kc.jongsung = map[@kc.jongsung]
|
180
170
|
end
|
181
171
|
|
182
172
|
true
|
@@ -197,41 +187,41 @@ class Korean
|
|
197
187
|
#
|
198
188
|
# 4. ‘ㅎ(ㄶ, ㅀ)’ 뒤에 모음으로 시작된 어미나 접미사가 결합되는 경우에는, ‘ㅎ’을 발음하지 않는다.
|
199
189
|
def rule_12
|
200
|
-
return if next_kc.nil?
|
190
|
+
return if @next_kc.nil?
|
201
191
|
|
202
192
|
map_12_1 = {
|
203
193
|
'ㄱ' => 'ㅋ',
|
204
194
|
'ㄷ' => 'ㅌ',
|
205
195
|
'ㅈ' => 'ㅊ' }
|
206
|
-
if %w[ㅎ ㄶ ㅀ].include?(kc.jongsung)
|
196
|
+
if %w[ㅎ ㄶ ㅀ].include?(@kc.jongsung)
|
207
197
|
# 12-1
|
208
|
-
if map_12_1.keys.include?(next_kc.chosung)
|
209
|
-
next_kc.chosung = map_12_1[next_kc.chosung]
|
210
|
-
kc.jongsung = (dc = double_consonant_map[kc.jongsung]) && dc.first
|
198
|
+
if map_12_1.keys.include?(@next_kc.chosung)
|
199
|
+
@next_kc.chosung = map_12_1[@next_kc.chosung]
|
200
|
+
@kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first
|
211
201
|
|
212
202
|
# 12-2
|
213
|
-
elsif next_kc.chosung == 'ㅅ'
|
214
|
-
kc.jongsung = (dc = double_consonant_map[kc.jongsung]) && dc.first
|
215
|
-
next_kc.chosung = 'ㅆ'
|
203
|
+
elsif @next_kc.chosung == 'ㅅ'
|
204
|
+
@kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first
|
205
|
+
@next_kc.chosung = 'ㅆ'
|
216
206
|
|
217
207
|
# 12-3
|
218
|
-
elsif next_kc.chosung == 'ㄴ'
|
219
|
-
if dc = double_consonant_map[kc.jongsung]
|
220
|
-
kc.jongsung = dc.first
|
208
|
+
elsif @next_kc.chosung == 'ㄴ'
|
209
|
+
if dc = double_consonant_map[@kc.jongsung]
|
210
|
+
@kc.jongsung = dc.first
|
221
211
|
else
|
222
|
-
kc.jongsung = 'ㄴ'
|
212
|
+
@kc.jongsung = 'ㄴ'
|
223
213
|
end
|
224
214
|
|
225
215
|
# 12-4
|
226
|
-
elsif next_kc.chosung == 'ㅇ'
|
227
|
-
kc.jongsung = (dc = double_consonant_map[kc.jongsung]) && dc.first
|
216
|
+
elsif @next_kc.chosung == 'ㅇ'
|
217
|
+
@kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first
|
228
218
|
end
|
229
219
|
|
230
220
|
true
|
231
221
|
end
|
232
222
|
|
233
223
|
# 12-1 붙임
|
234
|
-
if next_kc.chosung == 'ㅎ'
|
224
|
+
if @next_kc.chosung == 'ㅎ'
|
235
225
|
map_jongsung = {
|
236
226
|
# 붙임 1
|
237
227
|
'ㄱ' => [nil, 'ㅋ'],
|
@@ -248,9 +238,9 @@ class Korean
|
|
248
238
|
'ㅊ' => [nil, 'ㅌ'],
|
249
239
|
'ㅌ' => [nil, 'ㅌ'],
|
250
240
|
}
|
251
|
-
if trans1 = map_jongsung[kc.jongsung]
|
252
|
-
kc.jongsung = trans1.first
|
253
|
-
next_kc.chosung = trans1.last
|
241
|
+
if trans1 = map_jongsung[@kc.jongsung]
|
242
|
+
@kc.jongsung = trans1.first
|
243
|
+
@next_kc.chosung = trans1.last
|
254
244
|
|
255
245
|
true
|
256
246
|
end
|
@@ -260,9 +250,9 @@ class Korean
|
|
260
250
|
# 제13항: 홑받침이나 쌍받침이 모음으로 시작된 조사나 어미, 접미사와
|
261
251
|
# 결합되는 경우에는, 제 음가대로 뒤 음절 첫소리로 옮겨 발음한다.
|
262
252
|
def rule_13
|
263
|
-
return if kc.jongsung.nil? || kc.jongsung == 'ㅇ' || next_kc.nil? || next_kc.chosung != 'ㅇ'
|
264
|
-
next_kc.chosung = kc.jongsung
|
265
|
-
kc.jongsung = nil
|
253
|
+
return if @kc.jongsung.nil? || @kc.jongsung == 'ㅇ' || @next_kc.nil? || @next_kc.chosung != 'ㅇ'
|
254
|
+
@next_kc.chosung = @kc.jongsung
|
255
|
+
@kc.jongsung = nil
|
266
256
|
|
267
257
|
true
|
268
258
|
end
|
@@ -271,10 +261,10 @@ class Korean
|
|
271
261
|
# 뒤엣것만을 뒤 음절 첫소리로 옮겨 발음한다.(이 경우, ‘ㅅ’은 된소리로 발음함.)
|
272
262
|
#
|
273
263
|
def rule_14
|
274
|
-
return if kc.jongsung.nil? || kc.jongsung == 'ㅇ' || next_kc.nil? || next_kc.chosung != 'ㅇ'
|
275
|
-
if consonants = double_consonant_map[kc.jongsung]
|
264
|
+
return if @kc.jongsung.nil? || @kc.jongsung == 'ㅇ' || @next_kc.nil? || @next_kc.chosung != 'ㅇ'
|
265
|
+
if consonants = double_consonant_map[@kc.jongsung]
|
276
266
|
consonants[1] = 'ㅆ' if consonants[1] == 'ㅅ'
|
277
|
-
kc.jongsung, next_kc.chosung = consonants
|
267
|
+
@kc.jongsung, @next_kc.chosung = consonants
|
278
268
|
|
279
269
|
true
|
280
270
|
end
|
@@ -283,12 +273,12 @@ class Korean
|
|
283
273
|
# 제15항: 받침 뒤에 모음 ‘ㅏ, ㅓ, ㅗ, ㅜ, ㅟ’들로 시작되는 __실질 형태소__가 연결되는
|
284
274
|
# 경우에는, 대표음으로 바꾸어서 뒤 음절 첫소리로 옮겨 발음한다.
|
285
275
|
def rule_15
|
286
|
-
return if kc.jongsung.nil? || kc.jongsung == 'ㅇ' || next_kc.nil? || next_kc.chosung != 'ㅇ'
|
276
|
+
return if @kc.jongsung.nil? || @kc.jongsung == 'ㅇ' || @next_kc.nil? || @next_kc.chosung != 'ㅇ'
|
287
277
|
|
288
|
-
if false && %w[ㅏ ㅓ ㅗ ㅜ ㅟ].include?(next_kc.jungsung) &&
|
289
|
-
%[ㅆ ㄲ ㅈ ㅊ ㄵ ㄻ ㄾ ㄿ ㄺ].include?(kc.jongsung) == false # PATCH
|
290
|
-
next_kc.chosung = @pconfig['jongsung sound'][ kc.jongsung ]
|
291
|
-
kc.jongsung = nil
|
278
|
+
if false && %w[ㅏ ㅓ ㅗ ㅜ ㅟ].include?(@next_kc.jungsung) &&
|
279
|
+
%[ㅆ ㄲ ㅈ ㅊ ㄵ ㄻ ㄾ ㄿ ㄺ].include?(@kc.jongsung) == false # PATCH
|
280
|
+
@next_kc.chosung = @pconfig['jongsung sound'][ @kc.jongsung ]
|
281
|
+
@kc.jongsung = nil
|
292
282
|
|
293
283
|
true
|
294
284
|
end
|
@@ -297,7 +287,7 @@ class Korean
|
|
297
287
|
# 제16항: 한글 자모의 이름은 그 받침소리를 연음하되, ‘ㄷ, ㅈ, ㅊ, ㅋ, ㅌ,
|
298
288
|
# ㅍ, ㅎ’의 경우에는 특별히 다음과 같이 발음한다.
|
299
289
|
def rule_16
|
300
|
-
return if next_kc.nil?
|
290
|
+
return if @next_kc.nil?
|
301
291
|
|
302
292
|
map = {'디귿' => '디긋',
|
303
293
|
'지읒' => '지읏',
|
@@ -307,11 +297,11 @@ class Korean
|
|
307
297
|
'피읖' => '피읍',
|
308
298
|
'히읗' => '히읏'}
|
309
299
|
|
310
|
-
word = kc.to_s + next_kc.to_s
|
300
|
+
word = @kc.to_s + @next_kc.to_s
|
311
301
|
if map.keys.include? word
|
312
|
-
new_char = @korean.dissect(map[word][1])[0]
|
313
|
-
next_kc.chosung = new_char.chosung
|
314
|
-
next_kc.jongsung = new_char.jongsung
|
302
|
+
new_char = @korean.dissect(map[word].scan(/./mu)[1])[0]
|
303
|
+
@next_kc.chosung = new_char.chosung
|
304
|
+
@next_kc.jongsung = new_char.jongsung
|
315
305
|
|
316
306
|
true
|
317
307
|
end
|
@@ -322,16 +312,16 @@ class Korean
|
|
322
312
|
#
|
323
313
|
# [붙임] ‘ㄷ’ 뒤에 접미사 ‘히’가 결합되어 ‘티’를 이루는 것은 [치]로 발음한다.
|
324
314
|
def rule_17
|
325
|
-
return if next_kc.nil? || %w[ㄷ ㅌ ㄾ].include?(kc.jongsung) == false
|
315
|
+
return if @next_kc.nil? || %w[ㄷ ㅌ ㄾ].include?(@kc.jongsung) == false
|
326
316
|
|
327
|
-
if next_kc.to_s == '이'
|
328
|
-
next_kc.chosung = kc.jongsung == 'ㄷ' ? 'ㅈ' : 'ㅊ'
|
329
|
-
kc.jongsung = (dc = double_consonant_map[kc.jongsung]) && dc.first
|
317
|
+
if @next_kc.to_s == '이'
|
318
|
+
@next_kc.chosung = @kc.jongsung == 'ㄷ' ? 'ㅈ' : 'ㅊ'
|
319
|
+
@kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first
|
330
320
|
|
331
321
|
true
|
332
|
-
elsif next_kc.to_s == '히'
|
333
|
-
next_kc.chosung = 'ㅊ'
|
334
|
-
kc.jongsung = (dc = double_consonant_map[kc.jongsung]) && dc.first
|
322
|
+
elsif @next_kc.to_s == '히'
|
323
|
+
@next_kc.chosung = 'ㅊ'
|
324
|
+
@kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first
|
335
325
|
|
336
326
|
true
|
337
327
|
end
|
@@ -345,8 +335,8 @@ class Korean
|
|
345
335
|
%w[ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅎ] => 'ㄴ',
|
346
336
|
%w[ㅂ ㅍ ㄼ ㄿ ㅄ] => 'ㅁ'
|
347
337
|
}
|
348
|
-
if next_kc && map.keys.flatten.include?(kc.jongsung) && %w[ㄴ ㅁ].include?(next_kc.chosung)
|
349
|
-
kc.jongsung = map[ map.keys.find { |e| e.include? kc.jongsung } ]
|
338
|
+
if @next_kc && map.keys.flatten.include?(@kc.jongsung) && %w[ㄴ ㅁ].include?(@next_kc.chosung)
|
339
|
+
@kc.jongsung = map[ map.keys.find { |e| e.include? @kc.jongsung } ]
|
350
340
|
|
351
341
|
true
|
352
342
|
end
|
@@ -355,12 +345,12 @@ class Korean
|
|
355
345
|
# 제19항: 받침 ‘ㅁ, ㅇ’ 뒤에 연결되는 ‘ㄹ’은 [ㄴ]으로 발음한다.
|
356
346
|
# [붙임]받침 ‘ㄱ, ㅂ’ 뒤에 연결되는 ‘ㄹ’도 [ㄴ]으로 발음한다.
|
357
347
|
def rule_19
|
358
|
-
if next_kc && next_kc.chosung == 'ㄹ' && %w[ㅁ ㅇ ㄱ ㅂ].include?(kc.jongsung)
|
359
|
-
next_kc.chosung = 'ㄴ'
|
348
|
+
if @next_kc && @next_kc.chosung == 'ㄹ' && %w[ㅁ ㅇ ㄱ ㅂ].include?(@kc.jongsung)
|
349
|
+
@next_kc.chosung = 'ㄴ'
|
360
350
|
|
361
|
-
case kc.jongsung
|
362
|
-
when 'ㄱ' then kc.jongsung = 'ㅇ'
|
363
|
-
when 'ㅂ' then kc.jongsung = 'ㅁ'
|
351
|
+
case @kc.jongsung
|
352
|
+
when 'ㄱ' then @kc.jongsung = 'ㅇ'
|
353
|
+
when 'ㅂ' then @kc.jongsung = 'ㅁ'
|
364
354
|
end
|
365
355
|
|
366
356
|
true
|
@@ -369,21 +359,21 @@ class Korean
|
|
369
359
|
|
370
360
|
# 제20항: ‘ㄴ’은 ‘ㄹ’의 앞이나 뒤에서 [ㄹ]로 발음한다.
|
371
361
|
def rule_20
|
372
|
-
return if next_kc.nil?
|
362
|
+
return if @next_kc.nil?
|
373
363
|
|
374
364
|
to = if %w[견란 진란 산량 단력 권력 원령 견례
|
375
|
-
문로 단로 원론 원료 근류].include?(kc_org.to_s + next_kc_org.to_s)
|
365
|
+
문로 단로 원론 원료 근류].include?(@kc_org.to_s + @next_kc_org.to_s)
|
376
366
|
'ㄴ'
|
377
367
|
else
|
378
368
|
'ㄹ'
|
379
369
|
end
|
380
370
|
|
381
|
-
if kc.jongsung == 'ㄹ' && next_kc.chosung == 'ㄴ'
|
382
|
-
kc.jongsung = next_kc.chosung = to
|
371
|
+
if @kc.jongsung == 'ㄹ' && @next_kc.chosung == 'ㄴ'
|
372
|
+
@kc.jongsung = @next_kc.chosung = to
|
383
373
|
|
384
374
|
true
|
385
|
-
elsif kc.jongsung == 'ㄴ' && next_kc.chosung == 'ㄹ'
|
386
|
-
kc.jongsung = next_kc.chosung = to
|
375
|
+
elsif @kc.jongsung == 'ㄴ' && @next_kc.chosung == 'ㄹ'
|
376
|
+
@kc.jongsung = @next_kc.chosung = to
|
387
377
|
|
388
378
|
true
|
389
379
|
end
|
@@ -392,10 +382,10 @@ class Korean
|
|
392
382
|
# 제23항: 받침 ‘ㄱ(ㄲ, ㅋ, ㄳ, ㄺ), ㄷ(ㅅ, ㅆ, ㅈ, ㅊ, ㅌ), ㅂ(ㅍ, ㄼ, ㄿ,ㅄ)’
|
393
383
|
# 뒤에 연결되는 ‘ㄱ, ㄷ, ㅂ, ㅅ, ㅈ’은 된소리로 발음한다.
|
394
384
|
def rule_23
|
395
|
-
return if next_kc.nil?
|
396
|
-
if fortis_map.keys.include?(next_kc.chosung) &&
|
397
|
-
%w[ㄱ ㄲ ㅋ ㄳ ㄺ ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅂ ㅍ ㄼ ㄿ ㅄ].include?(kc.jongsung)
|
398
|
-
next_kc.chosung = fortis_map[next_kc.chosung]
|
385
|
+
return if @next_kc.nil?
|
386
|
+
if fortis_map.keys.include?(@next_kc.chosung) &&
|
387
|
+
%w[ㄱ ㄲ ㅋ ㄳ ㄺ ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅂ ㅍ ㄼ ㄿ ㅄ].include?(@kc.jongsung)
|
388
|
+
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
399
389
|
|
400
390
|
true
|
401
391
|
end
|
@@ -405,22 +395,22 @@ class Korean
|
|
405
395
|
# 다만, 피동, 사동의 접미사 ‘-기-’는 된소리로 발음하지 않는다.
|
406
396
|
# 용언 어간에만 적용.
|
407
397
|
def rule_24
|
408
|
-
return if next_kc.nil? ||
|
409
|
-
next_kc.to_s == '기' # FIXME 피동/사동 여부 판단 불가. e.g. 줄넘기
|
398
|
+
return if @next_kc.nil? ||
|
399
|
+
@next_kc.to_s == '기' # FIXME 피동/사동 여부 판단 불가. e.g. 줄넘기
|
410
400
|
|
411
401
|
# FIXME 용언 여부를 판단. 정확한 판단 불가.
|
412
|
-
return unless case kc.jongsung
|
402
|
+
return unless case @kc.jongsung
|
413
403
|
when 'ㄵ'
|
414
|
-
%w[앉 얹].include? kc.to_s
|
404
|
+
%w[앉 얹].include? @kc.to_s
|
415
405
|
when 'ㄻ'
|
416
|
-
%w[젊 닮].include? kc.to_s
|
406
|
+
%w[젊 닮].include? @kc.to_s
|
417
407
|
else
|
418
408
|
false # XXX 일반적인 경우 사전 없이 판단 불가
|
419
409
|
end
|
420
410
|
|
421
|
-
if %w[ㄱ ㄷ ㅅ ㅈ].include?(next_kc.chosung) &&
|
422
|
-
%w[ㄴ ㄵ ㅁ ㄻ ㄼ ㄾ].include?(kc.jongsung)
|
423
|
-
next_kc.chosung = fortis_map[next_kc.chosung]
|
411
|
+
if %w[ㄱ ㄷ ㅅ ㅈ].include?(@next_kc.chosung) &&
|
412
|
+
%w[ㄴ ㄵ ㅁ ㄻ ㄼ ㄾ].include?(@kc.jongsung)
|
413
|
+
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
424
414
|
|
425
415
|
true
|
426
416
|
end
|
@@ -429,11 +419,11 @@ class Korean
|
|
429
419
|
# 제25항: 어간 받침 ‘ㄼ, ㄾ’ 뒤에 결합되는 어미의 첫소리 ‘ㄱ, ㄷ, ㅅ, ㅈ’은
|
430
420
|
# 된소리로 발음한다.
|
431
421
|
def rule_25
|
432
|
-
return if next_kc.nil?
|
422
|
+
return if @next_kc.nil?
|
433
423
|
|
434
|
-
if %w[ㄱ ㄷ ㅅ ㅈ].include?(next_kc.chosung) &&
|
435
|
-
%w[ㄼ ㄾ].include?(kc.jongsung)
|
436
|
-
next_kc.chosung = fortis_map[next_kc.chosung]
|
424
|
+
if %w[ㄱ ㄷ ㅅ ㅈ].include?(@next_kc.chosung) &&
|
425
|
+
%w[ㄼ ㄾ].include?(@kc.jongsung)
|
426
|
+
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
437
427
|
|
438
428
|
true
|
439
429
|
end
|
@@ -448,12 +438,12 @@ class Korean
|
|
448
438
|
# - ‘-(으)ㄹ’로 시작되는 어미의 경우에도 이에 준한다.
|
449
439
|
def rule_27
|
450
440
|
# FIXME: NOT PROPERLY IMPLEMENTED
|
451
|
-
return if next_kc.nil?
|
441
|
+
return if @next_kc.nil?
|
452
442
|
|
453
443
|
# 비교적 확률이 높은 경우들에 대해서만 처리. "일" 은 제외.
|
454
|
-
if %w[할 갈 날 볼 을 앨 말 힐].include?(kc.to_s) && # kc.jongsung == 'ㄹ' &&
|
455
|
-
%w[ㄱ ㄷ ㅂ ㅅ ㅈ].include?(next_kc.chosung)
|
456
|
-
next_kc.chosung = fortis_map[next_kc.chosung]
|
444
|
+
if %w[할 갈 날 볼 을 앨 말 힐].include?(@kc.to_s) && # @kc.jongsung == 'ㄹ' &&
|
445
|
+
%w[ㄱ ㄷ ㅂ ㅅ ㅈ].include?(@next_kc.chosung)
|
446
|
+
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
457
447
|
true
|
458
448
|
end
|
459
449
|
end
|
@@ -480,21 +470,21 @@ class Korean
|
|
480
470
|
# 2. 사이시옷 뒤에 ‘ㄴ, ㅁ’이 결합되는 경우에는 [ㄴ]으로 발음한다.
|
481
471
|
# 3. 사이시옷 뒤에 ‘이’ 음이 결합되는 경우에는 [ㄴㄴ]으로 발음한다.
|
482
472
|
def rule_30
|
483
|
-
return if next_kc.nil? || kc.jongsung != 'ㅅ'
|
473
|
+
return if @next_kc.nil? || @kc.jongsung != 'ㅅ'
|
484
474
|
|
485
|
-
if %w[ㄱ ㄷ ㅂ ㅅ ㅈ].include? next_kc.chosung
|
486
|
-
kc.jongsung = 'ㄷ' # or nil
|
487
|
-
next_kc.chosung = fortis_map[next_kc.chosung]
|
475
|
+
if %w[ㄱ ㄷ ㅂ ㅅ ㅈ].include? @next_kc.chosung
|
476
|
+
@kc.jongsung = 'ㄷ' # or nil
|
477
|
+
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
488
478
|
|
489
479
|
true
|
490
|
-
elsif %w[ㄴ ㅁ].include? next_kc.chosung
|
491
|
-
kc.jongsung = 'ㄴ'
|
480
|
+
elsif %w[ㄴ ㅁ].include? @next_kc.chosung
|
481
|
+
@kc.jongsung = 'ㄴ'
|
492
482
|
|
493
483
|
true
|
494
|
-
elsif next_kc.chosung == 'ㅇ' &&
|
495
|
-
%w[ㅣ ㅒ ㅖ ㅑ ㅕ ㅛ ㅠ].include?(next_kc.jungsung) &&
|
496
|
-
next_kc.jongsung # PATCH
|
497
|
-
kc.jongsung = next_kc.chosung = 'ㄴ'
|
484
|
+
elsif @next_kc.chosung == 'ㅇ' &&
|
485
|
+
%w[ㅣ ㅒ ㅖ ㅑ ㅕ ㅛ ㅠ].include?(@next_kc.jungsung) &&
|
486
|
+
@next_kc.jongsung # PATCH
|
487
|
+
@kc.jongsung = @next_kc.chosung = 'ㄴ'
|
498
488
|
|
499
489
|
true
|
500
490
|
end
|
data/test/romanization.yml
CHANGED
data/test/test_gimchi.rb
CHANGED
@@ -77,8 +77,8 @@ class TestGimchi < Test::Unit::TestCase
|
|
77
77
|
ko = Gimchi::Korean.new
|
78
78
|
assert_equal "천 구백 구십 구", ko.read_number(1999)
|
79
79
|
assert_equal "마이너스 백점일이삼", ko.read_number(- 100.123)
|
80
|
-
assert_equal "
|
81
|
-
ko.read_number("
|
80
|
+
assert_equal "오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사",
|
81
|
+
ko.read_number("53,191,100,678.3214")
|
82
82
|
|
83
83
|
# 나이, 시간 ( -살, -시 )
|
84
84
|
assert_equal "나는 스무살", ko.read_number("나는 20살")
|
@@ -142,6 +142,6 @@ class TestGimchi < Test::Unit::TestCase
|
|
142
142
|
end
|
143
143
|
puts "#{s} / #{cnt}"
|
144
144
|
# FIXME
|
145
|
-
assert s >=
|
145
|
+
assert s >= 58
|
146
146
|
end
|
147
147
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gimchi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2011-04-08 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
|
-
requirement: &
|
16
|
+
requirement: &2157290340 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.0.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2157290340
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: jeweler
|
27
|
-
requirement: &
|
27
|
+
requirement: &2157306240 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.5.2
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2157306240
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rcov
|
38
|
-
requirement: &
|
38
|
+
requirement: &2157305760 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2157305760
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: ansi
|
49
|
-
requirement: &
|
49
|
+
requirement: &2157305280 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: 1.2.2
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2157305280
|
58
58
|
description: Gimchi knows how to pronounce Korean strings and how to write them in
|
59
59
|
roman alphabet.
|
60
60
|
email: junegunn.c@gmail.com
|
@@ -69,6 +69,7 @@ files:
|
|
69
69
|
- lib/gimchi.rb
|
70
70
|
- lib/gimchi/char.rb
|
71
71
|
- lib/gimchi/korean.rb
|
72
|
+
- lib/gimchi/patch_1.8.rb
|
72
73
|
- lib/gimchi/pronouncer.rb
|
73
74
|
- LICENSE.txt
|
74
75
|
- README.ko.rdoc
|
@@ -107,4 +108,3 @@ test_files:
|
|
107
108
|
- test/pronunciation.yml
|
108
109
|
- test/romanization.yml
|
109
110
|
- test/test_gimchi.rb
|
110
|
-
has_rdoc:
|