gimchi 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/config/default.yml +0 -1
- data/lib/gimchi.rb +1 -0
- data/lib/gimchi/korean.rb +10 -8
- data/lib/gimchi/patch_1.8.rb +29 -0
- data/lib/gimchi/pronouncer.rb +111 -121
- data/test/romanization.yml +1 -0
- data/test/test_gimchi.rb +3 -3
- metadata +10 -10
data/config/default.yml
CHANGED
data/lib/gimchi.rb
CHANGED
data/lib/gimchi/korean.rb
CHANGED
@@ -140,8 +140,8 @@ class Korean
|
|
140
140
|
kc.to_a.each_with_index do | comp, idx |
|
141
141
|
next if comp.nil?
|
142
142
|
comp = rdata[idx][comp] || comp
|
143
|
-
comp = comp[1..-1] if comp[0] == dash &&
|
144
|
-
(romanization.empty? || romanization[-1] =~ /\s/ || comp[1] == 'w')
|
143
|
+
comp = comp[1..-1] if comp[0, 1] == dash &&
|
144
|
+
(romanization.empty? || romanization[-1] =~ /\s/ || comp[1, 1] == 'w')
|
145
145
|
romanization += comp
|
146
146
|
end
|
147
147
|
end
|
@@ -225,9 +225,11 @@ private
|
|
225
225
|
|
226
226
|
if alt == false || unit_idx >= 0
|
227
227
|
str = ""
|
228
|
-
|
229
|
-
|
230
|
-
|
228
|
+
# Cannot use hash as they're unordered in 1.8
|
229
|
+
[[1000, '천'],
|
230
|
+
[100, '백'],
|
231
|
+
[10, '십']].each do | arr |
|
232
|
+
u, sub_unit = arr
|
231
233
|
str += (nconfig['digits'][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
|
232
234
|
v %= u
|
233
235
|
end
|
@@ -240,9 +242,9 @@ private
|
|
240
242
|
digits = nconfig['alt notation']['digits']
|
241
243
|
post_subs = nconfig['alt notation']['post substitution']
|
242
244
|
|
243
|
-
|
244
|
-
|
245
|
-
|
245
|
+
# Likewise.
|
246
|
+
[[1000, '천'],
|
247
|
+
[100, '백']].each do | u, sub_unit |
|
246
248
|
str += (nconfig['digits'][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
|
247
249
|
v %= u
|
248
250
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
$KCODE = 'U'
|
2
|
+
|
3
|
+
module Gimchi
|
4
|
+
class Korean
|
5
|
+
# Checks if the given character is a korean character.
|
6
|
+
# @param [String] ch A string of size 1
|
7
|
+
def korean_char? ch
|
8
|
+
raise ArgumentError.new('Lengthy input') if str_length(ch) > 1
|
9
|
+
|
10
|
+
complete_korean_char?(ch) ||
|
11
|
+
(chosungs + jungsungs + jongsungs).include?(ch)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Checks if the given character is a "complete" korean character.
|
15
|
+
# "Complete" Korean character must have chosung and jungsung, with optional jongsung.
|
16
|
+
# @param [String] ch A string of size 1
|
17
|
+
def complete_korean_char? ch
|
18
|
+
raise ArgumentError.new('Lengthy input') if str_length(ch) > 1
|
19
|
+
|
20
|
+
# Range of Korean chracters in Unicode 2.0: AC00(가) ~ D7A3(힣)
|
21
|
+
ch.unpack('U').all? { | c | c >= 0xAC00 && c <= 0xD7A3 }
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def str_length str
|
26
|
+
str.scan(/./mu).length
|
27
|
+
end
|
28
|
+
end#Korean
|
29
|
+
end#Gimchi
|
data/lib/gimchi/pronouncer.rb
CHANGED
@@ -27,7 +27,7 @@ class Korean
|
|
27
27
|
# - For `slur'
|
28
28
|
applied = []
|
29
29
|
2.times do | phase |
|
30
|
-
@chars = @chars.reject { |c| c =~ /\s/ } if phase == 1
|
30
|
+
@chars = @chars.reject { |c| c =~ /\s/ } if phase == 1 # slur-phase
|
31
31
|
|
32
32
|
# Deep-fried...no copied backup
|
33
33
|
@initial_chars = @chars.map { |c| c.dup }
|
@@ -49,12 +49,19 @@ class Korean
|
|
49
49
|
private
|
50
50
|
def transform idx
|
51
51
|
@cursor = idx
|
52
|
+
kc = @chars[@cursor]
|
52
53
|
|
53
54
|
# Not korean
|
54
55
|
return [] unless kc.is_a? Korean::Char
|
55
56
|
|
57
|
+
# Setting up variables for fast lookup
|
58
|
+
@kc = kc
|
59
|
+
@next_kc = (nkc = @chars[@cursor + 1]).is_a?(Korean::Char) ? nkc : nil
|
60
|
+
@kc_org = @initial_chars[@cursor]
|
61
|
+
@next_kc_org = (nkco = @initial_chars[@cursor + 1]).is_a?(Korean::Char) ? nkco : nil
|
62
|
+
|
56
63
|
# Cannot properly pronounce
|
57
|
-
return [] if kc.chosung.nil? && kc.jungsung.nil? && kc.jongsung.nil?
|
64
|
+
return [] if @kc.chosung.nil? && @kc.jungsung.nil? && @kc.jongsung.nil?
|
58
65
|
|
59
66
|
applied = []
|
60
67
|
not_todo = []
|
@@ -77,23 +84,6 @@ class Korean
|
|
77
84
|
c.jungsung = 'ㅡ' if c.jungsung.nil?
|
78
85
|
end
|
79
86
|
|
80
|
-
def kc
|
81
|
-
@chars[@cursor]
|
82
|
-
end
|
83
|
-
|
84
|
-
def next_kc
|
85
|
-
nkc = @chars[@cursor + 1]
|
86
|
-
nkc.is_a?(Korean::Char) ? nkc : nil
|
87
|
-
end
|
88
|
-
|
89
|
-
def kc_org
|
90
|
-
@initial_chars[@cursor]
|
91
|
-
end
|
92
|
-
|
93
|
-
def next_kc_org
|
94
|
-
@initial_chars[@cursor + 1]
|
95
|
-
end
|
96
|
-
|
97
87
|
# shortcut
|
98
88
|
def fortis_map
|
99
89
|
@korean.config['structure']['fortis map']
|
@@ -108,16 +98,16 @@ class Korean
|
|
108
98
|
# 다만 1. 용언의 활용형에 나타나는 ‘져, 쪄, 쳐’는 [저, 쩌, 처]로 발음한다.
|
109
99
|
# 다만 3. 자음을 첫소리로 가지고 있는 음절의 ‘ㅢ’는 [ㅣ]로 발음한다.
|
110
100
|
def rule_5_1
|
111
|
-
if %w[져 쪄 쳐].include? kc.to_s
|
112
|
-
kc.jungsung = 'ㅓ'
|
101
|
+
if %w[져 쪄 쳐].include? @kc.to_s
|
102
|
+
@kc.jungsung = 'ㅓ'
|
113
103
|
|
114
104
|
true
|
115
105
|
end
|
116
106
|
end
|
117
107
|
|
118
108
|
def rule_5_3
|
119
|
-
if kc.jungsung == 'ㅢ' && kc_org.chosung.consonant?
|
120
|
-
kc.jungsung = 'ㅣ'
|
109
|
+
if @kc.jungsung == 'ㅢ' && @kc_org.chosung.consonant?
|
110
|
+
@kc.jungsung = 'ㅣ'
|
121
111
|
|
122
112
|
true
|
123
113
|
end
|
@@ -131,8 +121,8 @@ class Korean
|
|
131
121
|
%w[ㅅ ㅆ ㅈ ㅊ ㅌ] => 'ㄷ',
|
132
122
|
%w[ㅍ] => 'ㅂ'
|
133
123
|
}
|
134
|
-
if map.keys.flatten.include?(kc.jongsung) && (next_kc.nil? || next_kc.chosung.consonant?)
|
135
|
-
kc.jongsung = map[ map.keys.find { |e| e.include? kc.jongsung } ]
|
124
|
+
if map.keys.flatten.include?(@kc.jongsung) && (@next_kc.nil? || @next_kc.chosung.consonant?)
|
125
|
+
@kc.jongsung = map[ map.keys.find { |e| e.include? @kc.jongsung } ]
|
136
126
|
|
137
127
|
true
|
138
128
|
end
|
@@ -147,14 +137,14 @@ class Korean
|
|
147
137
|
%w[ㄼ ㄽ ㄾ] => 'ㄹ',
|
148
138
|
%w[ㅄ] => 'ㅂ'
|
149
139
|
}
|
150
|
-
if map.keys.flatten.include?(kc.jongsung) && (next_kc.nil? || next_kc.chosung.consonant?)
|
140
|
+
if map.keys.flatten.include?(@kc.jongsung) && (@next_kc.nil? || @next_kc.chosung.consonant?)
|
151
141
|
# Exceptions
|
152
|
-
if next_kc && (
|
153
|
-
(kc.to_s == '밟' && next_kc.chosung.consonant?) ||
|
154
|
-
(kc.to_s == '넓' && next_kc && %w[적 죽 둥].include?(next_kc_org.to_s))) # PATCH
|
155
|
-
kc.jongsung = 'ㅂ'
|
142
|
+
if @next_kc && (
|
143
|
+
(@kc.to_s == '밟' && @next_kc.chosung.consonant?) ||
|
144
|
+
(@kc.to_s == '넓' && @next_kc && %w[적 죽 둥].include?(@next_kc_org.to_s))) # PATCH
|
145
|
+
@kc.jongsung = 'ㅂ'
|
156
146
|
else
|
157
|
-
kc.jongsung = map[ map.keys.find { |e| e.include? kc.jongsung } ]
|
147
|
+
@kc.jongsung = map[ map.keys.find { |e| e.include? @kc.jongsung } ]
|
158
148
|
end
|
159
149
|
|
160
150
|
true
|
@@ -168,15 +158,15 @@ class Korean
|
|
168
158
|
'ㄻ' => 'ㅁ',
|
169
159
|
'ㄿ' => 'ㅂ'
|
170
160
|
}
|
171
|
-
if map.keys.include?(kc.jongsung) && (next_kc.nil? || next_kc.chosung.consonant?)
|
161
|
+
if map.keys.include?(@kc.jongsung) && (@next_kc.nil? || @next_kc.chosung.consonant?)
|
172
162
|
# 다만, 용언의 어간 말음 ‘ㄺ’은 ‘ㄱ’ 앞에서 [ㄹ]로 발음한다.
|
173
163
|
# - 용언 여부 판단은?: 중성으로 판단 (PATCH)
|
174
|
-
if next_kc && kc.jongsung == 'ㄺ' &&
|
175
|
-
next_kc_org.chosung == 'ㄱ' &&
|
176
|
-
%w[맑 얽 섥 밝 늙 묽 넓].include?(kc.to_s) # PATCH
|
177
|
-
kc.jongsung = 'ㄹ'
|
164
|
+
if @next_kc && @kc.jongsung == 'ㄺ' &&
|
165
|
+
@next_kc_org.chosung == 'ㄱ' &&
|
166
|
+
%w[맑 얽 섥 밝 늙 묽 넓].include?(@kc.to_s) # PATCH
|
167
|
+
@kc.jongsung = 'ㄹ'
|
178
168
|
else
|
179
|
-
kc.jongsung = map[kc.jongsung]
|
169
|
+
@kc.jongsung = map[@kc.jongsung]
|
180
170
|
end
|
181
171
|
|
182
172
|
true
|
@@ -197,41 +187,41 @@ class Korean
|
|
197
187
|
#
|
198
188
|
# 4. ‘ㅎ(ㄶ, ㅀ)’ 뒤에 모음으로 시작된 어미나 접미사가 결합되는 경우에는, ‘ㅎ’을 발음하지 않는다.
|
199
189
|
def rule_12
|
200
|
-
return if next_kc.nil?
|
190
|
+
return if @next_kc.nil?
|
201
191
|
|
202
192
|
map_12_1 = {
|
203
193
|
'ㄱ' => 'ㅋ',
|
204
194
|
'ㄷ' => 'ㅌ',
|
205
195
|
'ㅈ' => 'ㅊ' }
|
206
|
-
if %w[ㅎ ㄶ ㅀ].include?(kc.jongsung)
|
196
|
+
if %w[ㅎ ㄶ ㅀ].include?(@kc.jongsung)
|
207
197
|
# 12-1
|
208
|
-
if map_12_1.keys.include?(next_kc.chosung)
|
209
|
-
next_kc.chosung = map_12_1[next_kc.chosung]
|
210
|
-
kc.jongsung = (dc = double_consonant_map[kc.jongsung]) && dc.first
|
198
|
+
if map_12_1.keys.include?(@next_kc.chosung)
|
199
|
+
@next_kc.chosung = map_12_1[@next_kc.chosung]
|
200
|
+
@kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first
|
211
201
|
|
212
202
|
# 12-2
|
213
|
-
elsif next_kc.chosung == 'ㅅ'
|
214
|
-
kc.jongsung = (dc = double_consonant_map[kc.jongsung]) && dc.first
|
215
|
-
next_kc.chosung = 'ㅆ'
|
203
|
+
elsif @next_kc.chosung == 'ㅅ'
|
204
|
+
@kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first
|
205
|
+
@next_kc.chosung = 'ㅆ'
|
216
206
|
|
217
207
|
# 12-3
|
218
|
-
elsif next_kc.chosung == 'ㄴ'
|
219
|
-
if dc = double_consonant_map[kc.jongsung]
|
220
|
-
kc.jongsung = dc.first
|
208
|
+
elsif @next_kc.chosung == 'ㄴ'
|
209
|
+
if dc = double_consonant_map[@kc.jongsung]
|
210
|
+
@kc.jongsung = dc.first
|
221
211
|
else
|
222
|
-
kc.jongsung = 'ㄴ'
|
212
|
+
@kc.jongsung = 'ㄴ'
|
223
213
|
end
|
224
214
|
|
225
215
|
# 12-4
|
226
|
-
elsif next_kc.chosung == 'ㅇ'
|
227
|
-
kc.jongsung = (dc = double_consonant_map[kc.jongsung]) && dc.first
|
216
|
+
elsif @next_kc.chosung == 'ㅇ'
|
217
|
+
@kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first
|
228
218
|
end
|
229
219
|
|
230
220
|
true
|
231
221
|
end
|
232
222
|
|
233
223
|
# 12-1 붙임
|
234
|
-
if next_kc.chosung == 'ㅎ'
|
224
|
+
if @next_kc.chosung == 'ㅎ'
|
235
225
|
map_jongsung = {
|
236
226
|
# 붙임 1
|
237
227
|
'ㄱ' => [nil, 'ㅋ'],
|
@@ -248,9 +238,9 @@ class Korean
|
|
248
238
|
'ㅊ' => [nil, 'ㅌ'],
|
249
239
|
'ㅌ' => [nil, 'ㅌ'],
|
250
240
|
}
|
251
|
-
if trans1 = map_jongsung[kc.jongsung]
|
252
|
-
kc.jongsung = trans1.first
|
253
|
-
next_kc.chosung = trans1.last
|
241
|
+
if trans1 = map_jongsung[@kc.jongsung]
|
242
|
+
@kc.jongsung = trans1.first
|
243
|
+
@next_kc.chosung = trans1.last
|
254
244
|
|
255
245
|
true
|
256
246
|
end
|
@@ -260,9 +250,9 @@ class Korean
|
|
260
250
|
# 제13항: 홑받침이나 쌍받침이 모음으로 시작된 조사나 어미, 접미사와
|
261
251
|
# 결합되는 경우에는, 제 음가대로 뒤 음절 첫소리로 옮겨 발음한다.
|
262
252
|
def rule_13
|
263
|
-
return if kc.jongsung.nil? || kc.jongsung == 'ㅇ' || next_kc.nil? || next_kc.chosung != 'ㅇ'
|
264
|
-
next_kc.chosung = kc.jongsung
|
265
|
-
kc.jongsung = nil
|
253
|
+
return if @kc.jongsung.nil? || @kc.jongsung == 'ㅇ' || @next_kc.nil? || @next_kc.chosung != 'ㅇ'
|
254
|
+
@next_kc.chosung = @kc.jongsung
|
255
|
+
@kc.jongsung = nil
|
266
256
|
|
267
257
|
true
|
268
258
|
end
|
@@ -271,10 +261,10 @@ class Korean
|
|
271
261
|
# 뒤엣것만을 뒤 음절 첫소리로 옮겨 발음한다.(이 경우, ‘ㅅ’은 된소리로 발음함.)
|
272
262
|
#
|
273
263
|
def rule_14
|
274
|
-
return if kc.jongsung.nil? || kc.jongsung == 'ㅇ' || next_kc.nil? || next_kc.chosung != 'ㅇ'
|
275
|
-
if consonants = double_consonant_map[kc.jongsung]
|
264
|
+
return if @kc.jongsung.nil? || @kc.jongsung == 'ㅇ' || @next_kc.nil? || @next_kc.chosung != 'ㅇ'
|
265
|
+
if consonants = double_consonant_map[@kc.jongsung]
|
276
266
|
consonants[1] = 'ㅆ' if consonants[1] == 'ㅅ'
|
277
|
-
kc.jongsung, next_kc.chosung = consonants
|
267
|
+
@kc.jongsung, @next_kc.chosung = consonants
|
278
268
|
|
279
269
|
true
|
280
270
|
end
|
@@ -283,12 +273,12 @@ class Korean
|
|
283
273
|
# 제15항: 받침 뒤에 모음 ‘ㅏ, ㅓ, ㅗ, ㅜ, ㅟ’들로 시작되는 __실질 형태소__가 연결되는
|
284
274
|
# 경우에는, 대표음으로 바꾸어서 뒤 음절 첫소리로 옮겨 발음한다.
|
285
275
|
def rule_15
|
286
|
-
return if kc.jongsung.nil? || kc.jongsung == 'ㅇ' || next_kc.nil? || next_kc.chosung != 'ㅇ'
|
276
|
+
return if @kc.jongsung.nil? || @kc.jongsung == 'ㅇ' || @next_kc.nil? || @next_kc.chosung != 'ㅇ'
|
287
277
|
|
288
|
-
if false && %w[ㅏ ㅓ ㅗ ㅜ ㅟ].include?(next_kc.jungsung) &&
|
289
|
-
%[ㅆ ㄲ ㅈ ㅊ ㄵ ㄻ ㄾ ㄿ ㄺ].include?(kc.jongsung) == false # PATCH
|
290
|
-
next_kc.chosung = @pconfig['jongsung sound'][ kc.jongsung ]
|
291
|
-
kc.jongsung = nil
|
278
|
+
if false && %w[ㅏ ㅓ ㅗ ㅜ ㅟ].include?(@next_kc.jungsung) &&
|
279
|
+
%[ㅆ ㄲ ㅈ ㅊ ㄵ ㄻ ㄾ ㄿ ㄺ].include?(@kc.jongsung) == false # PATCH
|
280
|
+
@next_kc.chosung = @pconfig['jongsung sound'][ @kc.jongsung ]
|
281
|
+
@kc.jongsung = nil
|
292
282
|
|
293
283
|
true
|
294
284
|
end
|
@@ -297,7 +287,7 @@ class Korean
|
|
297
287
|
# 제16항: 한글 자모의 이름은 그 받침소리를 연음하되, ‘ㄷ, ㅈ, ㅊ, ㅋ, ㅌ,
|
298
288
|
# ㅍ, ㅎ’의 경우에는 특별히 다음과 같이 발음한다.
|
299
289
|
def rule_16
|
300
|
-
return if next_kc.nil?
|
290
|
+
return if @next_kc.nil?
|
301
291
|
|
302
292
|
map = {'디귿' => '디긋',
|
303
293
|
'지읒' => '지읏',
|
@@ -307,11 +297,11 @@ class Korean
|
|
307
297
|
'피읖' => '피읍',
|
308
298
|
'히읗' => '히읏'}
|
309
299
|
|
310
|
-
word = kc.to_s + next_kc.to_s
|
300
|
+
word = @kc.to_s + @next_kc.to_s
|
311
301
|
if map.keys.include? word
|
312
|
-
new_char = @korean.dissect(map[word][1])[0]
|
313
|
-
next_kc.chosung = new_char.chosung
|
314
|
-
next_kc.jongsung = new_char.jongsung
|
302
|
+
new_char = @korean.dissect(map[word].scan(/./mu)[1])[0]
|
303
|
+
@next_kc.chosung = new_char.chosung
|
304
|
+
@next_kc.jongsung = new_char.jongsung
|
315
305
|
|
316
306
|
true
|
317
307
|
end
|
@@ -322,16 +312,16 @@ class Korean
|
|
322
312
|
#
|
323
313
|
# [붙임] ‘ㄷ’ 뒤에 접미사 ‘히’가 결합되어 ‘티’를 이루는 것은 [치]로 발음한다.
|
324
314
|
def rule_17
|
325
|
-
return if next_kc.nil? || %w[ㄷ ㅌ ㄾ].include?(kc.jongsung) == false
|
315
|
+
return if @next_kc.nil? || %w[ㄷ ㅌ ㄾ].include?(@kc.jongsung) == false
|
326
316
|
|
327
|
-
if next_kc.to_s == '이'
|
328
|
-
next_kc.chosung = kc.jongsung == 'ㄷ' ? 'ㅈ' : 'ㅊ'
|
329
|
-
kc.jongsung = (dc = double_consonant_map[kc.jongsung]) && dc.first
|
317
|
+
if @next_kc.to_s == '이'
|
318
|
+
@next_kc.chosung = @kc.jongsung == 'ㄷ' ? 'ㅈ' : 'ㅊ'
|
319
|
+
@kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first
|
330
320
|
|
331
321
|
true
|
332
|
-
elsif next_kc.to_s == '히'
|
333
|
-
next_kc.chosung = 'ㅊ'
|
334
|
-
kc.jongsung = (dc = double_consonant_map[kc.jongsung]) && dc.first
|
322
|
+
elsif @next_kc.to_s == '히'
|
323
|
+
@next_kc.chosung = 'ㅊ'
|
324
|
+
@kc.jongsung = (dc = double_consonant_map[@kc.jongsung]) && dc.first
|
335
325
|
|
336
326
|
true
|
337
327
|
end
|
@@ -345,8 +335,8 @@ class Korean
|
|
345
335
|
%w[ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅎ] => 'ㄴ',
|
346
336
|
%w[ㅂ ㅍ ㄼ ㄿ ㅄ] => 'ㅁ'
|
347
337
|
}
|
348
|
-
if next_kc && map.keys.flatten.include?(kc.jongsung) && %w[ㄴ ㅁ].include?(next_kc.chosung)
|
349
|
-
kc.jongsung = map[ map.keys.find { |e| e.include? kc.jongsung } ]
|
338
|
+
if @next_kc && map.keys.flatten.include?(@kc.jongsung) && %w[ㄴ ㅁ].include?(@next_kc.chosung)
|
339
|
+
@kc.jongsung = map[ map.keys.find { |e| e.include? @kc.jongsung } ]
|
350
340
|
|
351
341
|
true
|
352
342
|
end
|
@@ -355,12 +345,12 @@ class Korean
|
|
355
345
|
# 제19항: 받침 ‘ㅁ, ㅇ’ 뒤에 연결되는 ‘ㄹ’은 [ㄴ]으로 발음한다.
|
356
346
|
# [붙임]받침 ‘ㄱ, ㅂ’ 뒤에 연결되는 ‘ㄹ’도 [ㄴ]으로 발음한다.
|
357
347
|
def rule_19
|
358
|
-
if next_kc && next_kc.chosung == 'ㄹ' && %w[ㅁ ㅇ ㄱ ㅂ].include?(kc.jongsung)
|
359
|
-
next_kc.chosung = 'ㄴ'
|
348
|
+
if @next_kc && @next_kc.chosung == 'ㄹ' && %w[ㅁ ㅇ ㄱ ㅂ].include?(@kc.jongsung)
|
349
|
+
@next_kc.chosung = 'ㄴ'
|
360
350
|
|
361
|
-
case kc.jongsung
|
362
|
-
when 'ㄱ' then kc.jongsung = 'ㅇ'
|
363
|
-
when 'ㅂ' then kc.jongsung = 'ㅁ'
|
351
|
+
case @kc.jongsung
|
352
|
+
when 'ㄱ' then @kc.jongsung = 'ㅇ'
|
353
|
+
when 'ㅂ' then @kc.jongsung = 'ㅁ'
|
364
354
|
end
|
365
355
|
|
366
356
|
true
|
@@ -369,21 +359,21 @@ class Korean
|
|
369
359
|
|
370
360
|
# 제20항: ‘ㄴ’은 ‘ㄹ’의 앞이나 뒤에서 [ㄹ]로 발음한다.
|
371
361
|
def rule_20
|
372
|
-
return if next_kc.nil?
|
362
|
+
return if @next_kc.nil?
|
373
363
|
|
374
364
|
to = if %w[견란 진란 산량 단력 권력 원령 견례
|
375
|
-
문로 단로 원론 원료 근류].include?(kc_org.to_s + next_kc_org.to_s)
|
365
|
+
문로 단로 원론 원료 근류].include?(@kc_org.to_s + @next_kc_org.to_s)
|
376
366
|
'ㄴ'
|
377
367
|
else
|
378
368
|
'ㄹ'
|
379
369
|
end
|
380
370
|
|
381
|
-
if kc.jongsung == 'ㄹ' && next_kc.chosung == 'ㄴ'
|
382
|
-
kc.jongsung = next_kc.chosung = to
|
371
|
+
if @kc.jongsung == 'ㄹ' && @next_kc.chosung == 'ㄴ'
|
372
|
+
@kc.jongsung = @next_kc.chosung = to
|
383
373
|
|
384
374
|
true
|
385
|
-
elsif kc.jongsung == 'ㄴ' && next_kc.chosung == 'ㄹ'
|
386
|
-
kc.jongsung = next_kc.chosung = to
|
375
|
+
elsif @kc.jongsung == 'ㄴ' && @next_kc.chosung == 'ㄹ'
|
376
|
+
@kc.jongsung = @next_kc.chosung = to
|
387
377
|
|
388
378
|
true
|
389
379
|
end
|
@@ -392,10 +382,10 @@ class Korean
|
|
392
382
|
# 제23항: 받침 ‘ㄱ(ㄲ, ㅋ, ㄳ, ㄺ), ㄷ(ㅅ, ㅆ, ㅈ, ㅊ, ㅌ), ㅂ(ㅍ, ㄼ, ㄿ,ㅄ)’
|
393
383
|
# 뒤에 연결되는 ‘ㄱ, ㄷ, ㅂ, ㅅ, ㅈ’은 된소리로 발음한다.
|
394
384
|
def rule_23
|
395
|
-
return if next_kc.nil?
|
396
|
-
if fortis_map.keys.include?(next_kc.chosung) &&
|
397
|
-
%w[ㄱ ㄲ ㅋ ㄳ ㄺ ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅂ ㅍ ㄼ ㄿ ㅄ].include?(kc.jongsung)
|
398
|
-
next_kc.chosung = fortis_map[next_kc.chosung]
|
385
|
+
return if @next_kc.nil?
|
386
|
+
if fortis_map.keys.include?(@next_kc.chosung) &&
|
387
|
+
%w[ㄱ ㄲ ㅋ ㄳ ㄺ ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅂ ㅍ ㄼ ㄿ ㅄ].include?(@kc.jongsung)
|
388
|
+
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
399
389
|
|
400
390
|
true
|
401
391
|
end
|
@@ -405,22 +395,22 @@ class Korean
|
|
405
395
|
# 다만, 피동, 사동의 접미사 ‘-기-’는 된소리로 발음하지 않는다.
|
406
396
|
# 용언 어간에만 적용.
|
407
397
|
def rule_24
|
408
|
-
return if next_kc.nil? ||
|
409
|
-
next_kc.to_s == '기' # FIXME 피동/사동 여부 판단 불가. e.g. 줄넘기
|
398
|
+
return if @next_kc.nil? ||
|
399
|
+
@next_kc.to_s == '기' # FIXME 피동/사동 여부 판단 불가. e.g. 줄넘기
|
410
400
|
|
411
401
|
# FIXME 용언 여부를 판단. 정확한 판단 불가.
|
412
|
-
return unless case kc.jongsung
|
402
|
+
return unless case @kc.jongsung
|
413
403
|
when 'ㄵ'
|
414
|
-
%w[앉 얹].include? kc.to_s
|
404
|
+
%w[앉 얹].include? @kc.to_s
|
415
405
|
when 'ㄻ'
|
416
|
-
%w[젊 닮].include? kc.to_s
|
406
|
+
%w[젊 닮].include? @kc.to_s
|
417
407
|
else
|
418
408
|
false # XXX 일반적인 경우 사전 없이 판단 불가
|
419
409
|
end
|
420
410
|
|
421
|
-
if %w[ㄱ ㄷ ㅅ ㅈ].include?(next_kc.chosung) &&
|
422
|
-
%w[ㄴ ㄵ ㅁ ㄻ ㄼ ㄾ].include?(kc.jongsung)
|
423
|
-
next_kc.chosung = fortis_map[next_kc.chosung]
|
411
|
+
if %w[ㄱ ㄷ ㅅ ㅈ].include?(@next_kc.chosung) &&
|
412
|
+
%w[ㄴ ㄵ ㅁ ㄻ ㄼ ㄾ].include?(@kc.jongsung)
|
413
|
+
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
424
414
|
|
425
415
|
true
|
426
416
|
end
|
@@ -429,11 +419,11 @@ class Korean
|
|
429
419
|
# 제25항: 어간 받침 ‘ㄼ, ㄾ’ 뒤에 결합되는 어미의 첫소리 ‘ㄱ, ㄷ, ㅅ, ㅈ’은
|
430
420
|
# 된소리로 발음한다.
|
431
421
|
def rule_25
|
432
|
-
return if next_kc.nil?
|
422
|
+
return if @next_kc.nil?
|
433
423
|
|
434
|
-
if %w[ㄱ ㄷ ㅅ ㅈ].include?(next_kc.chosung) &&
|
435
|
-
%w[ㄼ ㄾ].include?(kc.jongsung)
|
436
|
-
next_kc.chosung = fortis_map[next_kc.chosung]
|
424
|
+
if %w[ㄱ ㄷ ㅅ ㅈ].include?(@next_kc.chosung) &&
|
425
|
+
%w[ㄼ ㄾ].include?(@kc.jongsung)
|
426
|
+
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
437
427
|
|
438
428
|
true
|
439
429
|
end
|
@@ -448,12 +438,12 @@ class Korean
|
|
448
438
|
# - ‘-(으)ㄹ’로 시작되는 어미의 경우에도 이에 준한다.
|
449
439
|
def rule_27
|
450
440
|
# FIXME: NOT PROPERLY IMPLEMENTED
|
451
|
-
return if next_kc.nil?
|
441
|
+
return if @next_kc.nil?
|
452
442
|
|
453
443
|
# 비교적 확률이 높은 경우들에 대해서만 처리. "일" 은 제외.
|
454
|
-
if %w[할 갈 날 볼 을 앨 말 힐].include?(kc.to_s) && # kc.jongsung == 'ㄹ' &&
|
455
|
-
%w[ㄱ ㄷ ㅂ ㅅ ㅈ].include?(next_kc.chosung)
|
456
|
-
next_kc.chosung = fortis_map[next_kc.chosung]
|
444
|
+
if %w[할 갈 날 볼 을 앨 말 힐].include?(@kc.to_s) && # @kc.jongsung == 'ㄹ' &&
|
445
|
+
%w[ㄱ ㄷ ㅂ ㅅ ㅈ].include?(@next_kc.chosung)
|
446
|
+
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
457
447
|
true
|
458
448
|
end
|
459
449
|
end
|
@@ -480,21 +470,21 @@ class Korean
|
|
480
470
|
# 2. 사이시옷 뒤에 ‘ㄴ, ㅁ’이 결합되는 경우에는 [ㄴ]으로 발음한다.
|
481
471
|
# 3. 사이시옷 뒤에 ‘이’ 음이 결합되는 경우에는 [ㄴㄴ]으로 발음한다.
|
482
472
|
def rule_30
|
483
|
-
return if next_kc.nil? || kc.jongsung != 'ㅅ'
|
473
|
+
return if @next_kc.nil? || @kc.jongsung != 'ㅅ'
|
484
474
|
|
485
|
-
if %w[ㄱ ㄷ ㅂ ㅅ ㅈ].include? next_kc.chosung
|
486
|
-
kc.jongsung = 'ㄷ' # or nil
|
487
|
-
next_kc.chosung = fortis_map[next_kc.chosung]
|
475
|
+
if %w[ㄱ ㄷ ㅂ ㅅ ㅈ].include? @next_kc.chosung
|
476
|
+
@kc.jongsung = 'ㄷ' # or nil
|
477
|
+
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
488
478
|
|
489
479
|
true
|
490
|
-
elsif %w[ㄴ ㅁ].include? next_kc.chosung
|
491
|
-
kc.jongsung = 'ㄴ'
|
480
|
+
elsif %w[ㄴ ㅁ].include? @next_kc.chosung
|
481
|
+
@kc.jongsung = 'ㄴ'
|
492
482
|
|
493
483
|
true
|
494
|
-
elsif next_kc.chosung == 'ㅇ' &&
|
495
|
-
%w[ㅣ ㅒ ㅖ ㅑ ㅕ ㅛ ㅠ].include?(next_kc.jungsung) &&
|
496
|
-
next_kc.jongsung # PATCH
|
497
|
-
kc.jongsung = next_kc.chosung = 'ㄴ'
|
484
|
+
elsif @next_kc.chosung == 'ㅇ' &&
|
485
|
+
%w[ㅣ ㅒ ㅖ ㅑ ㅕ ㅛ ㅠ].include?(@next_kc.jungsung) &&
|
486
|
+
@next_kc.jongsung # PATCH
|
487
|
+
@kc.jongsung = @next_kc.chosung = 'ㄴ'
|
498
488
|
|
499
489
|
true
|
500
490
|
end
|
data/test/romanization.yml
CHANGED
data/test/test_gimchi.rb
CHANGED
@@ -77,8 +77,8 @@ class TestGimchi < Test::Unit::TestCase
|
|
77
77
|
ko = Gimchi::Korean.new
|
78
78
|
assert_equal "천 구백 구십 구", ko.read_number(1999)
|
79
79
|
assert_equal "마이너스 백점일이삼", ko.read_number(- 100.123)
|
80
|
-
assert_equal "
|
81
|
-
ko.read_number("
|
80
|
+
assert_equal "오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사",
|
81
|
+
ko.read_number("53,191,100,678.3214")
|
82
82
|
|
83
83
|
# 나이, 시간 ( -살, -시 )
|
84
84
|
assert_equal "나는 스무살", ko.read_number("나는 20살")
|
@@ -142,6 +142,6 @@ class TestGimchi < Test::Unit::TestCase
|
|
142
142
|
end
|
143
143
|
puts "#{s} / #{cnt}"
|
144
144
|
# FIXME
|
145
|
-
assert s >=
|
145
|
+
assert s >= 58
|
146
146
|
end
|
147
147
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gimchi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2011-04-08 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
|
-
requirement: &
|
16
|
+
requirement: &2157290340 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.0.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2157290340
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: jeweler
|
27
|
-
requirement: &
|
27
|
+
requirement: &2157306240 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.5.2
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2157306240
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rcov
|
38
|
-
requirement: &
|
38
|
+
requirement: &2157305760 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2157305760
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: ansi
|
49
|
-
requirement: &
|
49
|
+
requirement: &2157305280 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: 1.2.2
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2157305280
|
58
58
|
description: Gimchi knows how to pronounce Korean strings and how to write them in
|
59
59
|
roman alphabet.
|
60
60
|
email: junegunn.c@gmail.com
|
@@ -69,6 +69,7 @@ files:
|
|
69
69
|
- lib/gimchi.rb
|
70
70
|
- lib/gimchi/char.rb
|
71
71
|
- lib/gimchi/korean.rb
|
72
|
+
- lib/gimchi/patch_1.8.rb
|
72
73
|
- lib/gimchi/pronouncer.rb
|
73
74
|
- LICENSE.txt
|
74
75
|
- README.ko.rdoc
|
@@ -107,4 +108,3 @@ test_files:
|
|
107
108
|
- test/pronunciation.yml
|
108
109
|
- test/romanization.yml
|
109
110
|
- test/test_gimchi.rb
|
110
|
-
has_rdoc:
|