gimchi 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +17 -0
- data/CHANGELOG.rdoc +42 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +3 -1
- data/{README.ko.markdown → README.ko.md} +68 -66
- data/README.md +162 -0
- data/Rakefile +7 -0
- data/config/default.yml +162 -162
- data/crawler/crawler.rb +49 -0
- data/gimchi.gemspec +21 -0
- data/lib/gimchi.rb +374 -4
- data/lib/gimchi/char.rb +26 -38
- data/lib/gimchi/patch_1.8.rb +9 -9
- data/lib/gimchi/pronouncer.rb +26 -27
- data/test/helper.rb +1 -0
- data/test/test_gimchi.rb +114 -86
- metadata +23 -51
- data/README.markdown +0 -155
- data/lib/gimchi/korean.rb +0 -323
data/lib/gimchi/char.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
# Class representing each Korean character. Its three components,
|
3
|
+
class Gimchi
|
4
|
+
# Class representing each Korean character. Its three components,
|
6
5
|
# `chosung', `jungsung' and `jongsung' can be get and set.
|
7
6
|
#
|
8
7
|
# `to_s' merges components into a String. `to_a' returns the three components.
|
@@ -14,13 +13,13 @@ class Korean
|
|
14
13
|
# @return [String] Jongsung component of this character.
|
15
14
|
attr_reader :jongsung
|
16
15
|
|
17
|
-
# @param [Gimchi
|
16
|
+
# @param [Gimchi] gimchi Gimchi instance
|
18
17
|
# @param [String] kchar Korean character string
|
19
|
-
def initialize
|
20
|
-
raise ArgumentError.new('Not a korean character') unless
|
18
|
+
def initialize gimchi, kchar
|
19
|
+
raise ArgumentError.new('Not a korean character') unless gimchi.korean_char? kchar
|
21
20
|
|
22
|
-
@
|
23
|
-
if @
|
21
|
+
@gimchi = gimchi
|
22
|
+
if @gimchi.complete_korean_char? kchar
|
24
23
|
c = kchar.unpack('U').first
|
25
24
|
n = c - 0xAC00
|
26
25
|
# '가' ~ '깋' -> 'ㄱ'
|
@@ -29,14 +28,14 @@ class Korean
|
|
29
28
|
n = n % (21 * 28)
|
30
29
|
n2 = n / 28;
|
31
30
|
n3 = n % 28;
|
32
|
-
self.chosung = @
|
33
|
-
self.jungsung = @
|
34
|
-
self.jongsung = ([nil] + @
|
35
|
-
elsif @
|
31
|
+
self.chosung = @gimchi.chosungs[n1]
|
32
|
+
self.jungsung = @gimchi.jungsungs[n2]
|
33
|
+
self.jongsung = ([nil] + @gimchi.jongsungs)[n3]
|
34
|
+
elsif @gimchi.chosung? kchar
|
36
35
|
self.chosung = kchar
|
37
|
-
elsif @
|
36
|
+
elsif @gimchi.jungsung? kchar
|
38
37
|
self.jungsung = kchar
|
39
|
-
elsif @
|
38
|
+
elsif @gimchi.jongsung? kchar
|
40
39
|
self.jongsung = kchar
|
41
40
|
end
|
42
41
|
end
|
@@ -44,42 +43,32 @@ class Korean
|
|
44
43
|
# Recombines components into a korean character.
|
45
44
|
# @return [String] Combined korean character
|
46
45
|
def to_s
|
47
|
-
|
48
|
-
""
|
49
|
-
elsif chosung && jungsung
|
50
|
-
n1, n2, n3 =
|
51
|
-
n1 = @kor.chosungs.index(chosung) || 0
|
52
|
-
n2 = @kor.jungsungs.index(jungsung) || 0
|
53
|
-
n3 = ([nil] + @kor.jongsungs).index(jongsung) || 0
|
54
|
-
[ 0xAC00 + n1 * (21 * 28) + n2 * 28 + n3 ].pack('U')
|
55
|
-
else
|
56
|
-
chosung || jungsung
|
57
|
-
end
|
46
|
+
@gimchi.compose chosung, jungsung, jongsung
|
58
47
|
end
|
59
48
|
|
60
49
|
# Sets the chosung component.
|
61
|
-
# @param [String]
|
50
|
+
# @param [String]
|
62
51
|
def chosung= c
|
63
52
|
raise ArgumentError.new('Invalid chosung component') if
|
64
|
-
c && @
|
65
|
-
@chosung = c && c.dup.extend(Component).tap { |e| e.kor = @
|
53
|
+
c && @gimchi.chosung?(c) == false
|
54
|
+
@chosung = c && c.dup.extend(Component).tap { |e| e.kor = @gimchi }
|
66
55
|
end
|
67
56
|
|
68
57
|
# Sets the jungsung component
|
69
|
-
# @param [String]
|
58
|
+
# @param [String]
|
70
59
|
def jungsung= c
|
71
60
|
raise ArgumentError.new('Invalid jungsung component') if
|
72
|
-
c && @
|
73
|
-
@jungsung = c && c.dup.extend(Component).tap { |e| e.kor = @
|
61
|
+
c && @gimchi.jungsung?(c) == false
|
62
|
+
@jungsung = c && c.dup.extend(Component).tap { |e| e.kor = @gimchi }
|
74
63
|
end
|
75
64
|
|
76
65
|
# Sets the jongsung component
|
77
66
|
#
|
78
|
-
# @param [String]
|
67
|
+
# @param [String]
|
79
68
|
def jongsung= c
|
80
69
|
raise ArgumentError.new('Invalid jongsung component') if
|
81
|
-
c && @
|
82
|
-
@jongsung = c && c.dup.extend(Component).tap { |e| e.kor = @
|
70
|
+
c && @gimchi.jongsung?(c) == false
|
71
|
+
@jongsung = c && c.dup.extend(Component).tap { |e| e.kor = @gimchi }
|
83
72
|
end
|
84
73
|
|
85
74
|
# Returns Array of three components.
|
@@ -105,22 +94,21 @@ class Korean
|
|
105
94
|
end
|
106
95
|
|
107
96
|
private
|
108
|
-
# Three components of
|
97
|
+
# Three components of Gimchi::Char are extended to support #vowel? and #consonant? method.
|
109
98
|
module Component
|
110
99
|
# @return [Korean] Hosting Korean instance
|
111
100
|
attr_accessor :kor
|
112
101
|
|
113
102
|
# Is this component a vowel?
|
114
103
|
def vowel?
|
115
|
-
kor.
|
104
|
+
kor.jungsung? self
|
116
105
|
end
|
117
106
|
|
118
107
|
# Is this component a consonant?
|
119
108
|
def consonant?
|
120
|
-
self != 'ㅇ' && kor.
|
109
|
+
self != 'ㅇ' && kor.chosung?(self)
|
121
110
|
end
|
122
111
|
end#Component
|
123
112
|
end#Char
|
124
|
-
end#Korean
|
125
113
|
end#Gimchi
|
126
114
|
|
data/lib/gimchi/patch_1.8.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
|
1
|
+
if RUBY_VERSION =~ /^1\.8\./
|
2
|
+
$KCODE = 'U'
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end#Gimchi
|
4
|
+
class Gimchi
|
5
|
+
private
|
6
|
+
def str_length str
|
7
|
+
str.scan(/./mu).length
|
8
|
+
end
|
9
|
+
end#Gimchi
|
10
|
+
end
|
data/lib/gimchi/pronouncer.rb
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
|
4
|
-
class Korean
|
3
|
+
class Gimchi
|
5
4
|
# Private class.
|
6
5
|
# Partial implementation of Korean pronouncement pronunciation rules specified in
|
7
6
|
# http://http://www.korean.go.kr/
|
7
|
+
# @private
|
8
8
|
class Pronouncer
|
9
9
|
private
|
10
|
-
def initialize
|
11
|
-
@
|
12
|
-
@pconfig =
|
10
|
+
def initialize gimchi
|
11
|
+
@gimchi = gimchi
|
12
|
+
@pconfig = gimchi.config[:pronouncer]
|
13
13
|
end
|
14
14
|
|
15
15
|
def pronounce! str, options = {}
|
16
|
-
@sequence = @pconfig[
|
17
|
-
|
16
|
+
@sequence = @pconfig[:transformation][
|
17
|
+
"sequence_for_#{options[:each_char] ? '1' : '2'}".to_sym] - options[:except]
|
18
18
|
|
19
19
|
# Dissecting
|
20
|
-
@chars = @
|
20
|
+
@chars = str.each_char.map { |c| @gimchi.kchar(c) rescue c }
|
21
21
|
@orig_chars = @chars.dup
|
22
22
|
|
23
23
|
# Padding
|
@@ -35,9 +35,9 @@ class Korean
|
|
35
35
|
# Transform one by one
|
36
36
|
applied += (0...@chars.length).inject([]) { | arr, i | arr + transform(i); }
|
37
37
|
|
38
|
-
# Post-processing (actually just for :
|
39
|
-
@chars.select { |c| c.is_a?(
|
40
|
-
c.jongsung = @pconfig[
|
38
|
+
# Post-processing (actually just for :each_char option)
|
39
|
+
@chars.select { |c| c.is_a?(Gimchi::Char) && c.jongsung }.each do | c |
|
40
|
+
c.jongsung = @pconfig[:jongsung_sound][c.jongsung]
|
41
41
|
end
|
42
42
|
|
43
43
|
break unless options[:slur]
|
@@ -52,20 +52,20 @@ class Korean
|
|
52
52
|
kc = @chars[@cursor]
|
53
53
|
|
54
54
|
# Not korean
|
55
|
-
return [] unless kc.is_a?
|
55
|
+
return [] unless kc.is_a? Gimchi::Char
|
56
56
|
|
57
57
|
# Setting up variables for fast lookup
|
58
58
|
@kc = kc
|
59
|
-
@next_kc = (nkc = @chars[@cursor + 1]).is_a?(
|
59
|
+
@next_kc = (nkc = @chars[@cursor + 1]).is_a?(Gimchi::Char) ? nkc : nil
|
60
60
|
@kc_org = @initial_chars[@cursor]
|
61
|
-
@next_kc_org = (nkco = @initial_chars[@cursor + 1]).is_a?(
|
61
|
+
@next_kc_org = (nkco = @initial_chars[@cursor + 1]).is_a?(Gimchi::Char) ? nkco : nil
|
62
62
|
|
63
63
|
# Cannot properly pronounce
|
64
64
|
return [] if @kc.chosung.nil? && @kc.jungsung.nil? && @kc.jongsung.nil?
|
65
65
|
|
66
66
|
applied = []
|
67
67
|
not_todo = []
|
68
|
-
blocking_rule = @pconfig[
|
68
|
+
blocking_rule = @pconfig[:transformation][:blocking_rule]
|
69
69
|
@sequence.each do | rule |
|
70
70
|
next if not_todo.include?(rule)
|
71
71
|
|
@@ -78,7 +78,7 @@ class Korean
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def pad c
|
81
|
-
return unless c.is_a?
|
81
|
+
return unless c.is_a? Gimchi::Char
|
82
82
|
|
83
83
|
c.chosung = 'ㅇ' if c.chosung.nil?
|
84
84
|
c.jungsung = 'ㅡ' if c.jungsung.nil?
|
@@ -86,12 +86,12 @@ class Korean
|
|
86
86
|
|
87
87
|
# shortcut
|
88
88
|
def fortis_map
|
89
|
-
@
|
89
|
+
@gimchi.config[:structure][:fortis_map]
|
90
90
|
end
|
91
91
|
|
92
92
|
# shortcut
|
93
93
|
def double_consonant_map
|
94
|
-
@
|
94
|
+
@gimchi.config[:structure][:double_consonant_map]
|
95
95
|
end
|
96
96
|
|
97
97
|
# 제5항: ‘ㅑ ㅒ ㅕ ㅖ ㅘ ㅙ ㅛ ㅝ ㅞ ㅠ ㅢ’는 이중 모음으로 발음한다.
|
@@ -193,7 +193,7 @@ class Korean
|
|
193
193
|
'ㄱ' => 'ㅋ',
|
194
194
|
'ㄷ' => 'ㅌ',
|
195
195
|
'ㅈ' => 'ㅊ' }
|
196
|
-
if %w[ㅎ ㄶ ㅀ].include?(@kc.jongsung)
|
196
|
+
if %w[ㅎ ㄶ ㅀ].include?(@kc.jongsung)
|
197
197
|
# 12-1
|
198
198
|
if map_12_1.keys.include?(@next_kc.chosung)
|
199
199
|
@next_kc.chosung = map_12_1[@next_kc.chosung]
|
@@ -277,7 +277,7 @@ class Korean
|
|
277
277
|
|
278
278
|
if false && %w[ㅏ ㅓ ㅗ ㅜ ㅟ].include?(@next_kc.jungsung) &&
|
279
279
|
%[ㅆ ㄲ ㅈ ㅊ ㄵ ㄻ ㄾ ㄿ ㄺ].include?(@kc.jongsung) == false # PATCH
|
280
|
-
@next_kc.chosung = @pconfig[
|
280
|
+
@next_kc.chosung = @pconfig[:jongsung_sound][ @kc.jongsung ]
|
281
281
|
@kc.jongsung = nil
|
282
282
|
|
283
283
|
true
|
@@ -299,7 +299,7 @@ class Korean
|
|
299
299
|
|
300
300
|
word = @kc.to_s + @next_kc.to_s
|
301
301
|
if map.keys.include? word
|
302
|
-
new_char = @
|
302
|
+
new_char = @gimchi.kchar(map[word].scan(/./mu)[1])
|
303
303
|
@next_kc.chosung = new_char.chosung
|
304
304
|
@next_kc.jongsung = new_char.jongsung
|
305
305
|
|
@@ -331,8 +331,8 @@ class Korean
|
|
331
331
|
# ㄿ, ㅄ)’은 ‘ㄴ, ㅁ’ 앞에서 [ㅇ, ㄴ, ㅁ]으로 발음한다.
|
332
332
|
def rule_18
|
333
333
|
map = {
|
334
|
-
%w[ㄱ ㄲ ㅋ ㄳ ㄺ] => 'ㅇ',
|
335
|
-
%w[ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅎ] => 'ㄴ',
|
334
|
+
%w[ㄱ ㄲ ㅋ ㄳ ㄺ] => 'ㅇ',
|
335
|
+
%w[ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅎ] => 'ㄴ',
|
336
336
|
%w[ㅂ ㅍ ㄼ ㄿ ㅄ] => 'ㅁ'
|
337
337
|
}
|
338
338
|
if @next_kc && map.keys.flatten.include?(@kc.jongsung) && %w[ㄴ ㅁ].include?(@next_kc.chosung)
|
@@ -395,7 +395,7 @@ class Korean
|
|
395
395
|
# 다만, 피동, 사동의 접미사 ‘-기-’는 된소리로 발음하지 않는다.
|
396
396
|
# 용언 어간에만 적용.
|
397
397
|
def rule_24
|
398
|
-
return if @next_kc.nil? ||
|
398
|
+
return if @next_kc.nil? ||
|
399
399
|
@next_kc.to_s == '기' # FIXME 피동/사동 여부 판단 불가. e.g. 줄넘기
|
400
400
|
|
401
401
|
# FIXME 용언 여부를 판단. 정확한 판단 불가.
|
@@ -441,7 +441,7 @@ class Korean
|
|
441
441
|
return if @next_kc.nil?
|
442
442
|
|
443
443
|
# 비교적 확률이 높은 경우들에 대해서만 처리. "일" 은 제외.
|
444
|
-
if %w[할 갈 날 볼 을 앨 말 힐].include?(@kc.to_s) && # @kc.jongsung == 'ㄹ' &&
|
444
|
+
if %w[할 갈 날 볼 을 앨 말 힐].include?(@kc.to_s) && # @kc.jongsung == 'ㄹ' &&
|
445
445
|
%w[ㄱ ㄷ ㅂ ㅅ ㅈ].include?(@next_kc.chosung)
|
446
446
|
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
447
447
|
true
|
@@ -467,7 +467,7 @@ class Korean
|
|
467
467
|
# 1. ‘ㄱ, ㄷ, ㅂ, ㅅ, ㅈ’으로 시작하는 단어 앞에 사이시옷이 올 때는 이들
|
468
468
|
# 자음만을 된소리로 발음하는 것을 원칙으로 하되, 사이시옷을 [ㄷ]으로
|
469
469
|
# 발음하는 것도 허용한다.
|
470
|
-
# 2. 사이시옷 뒤에 ‘ㄴ, ㅁ’이 결합되는 경우에는 [ㄴ]으로 발음한다.
|
470
|
+
# 2. 사이시옷 뒤에 ‘ㄴ, ㅁ’이 결합되는 경우에는 [ㄴ]으로 발음한다.
|
471
471
|
# 3. 사이시옷 뒤에 ‘이’ 음이 결합되는 경우에는 [ㄴㄴ]으로 발음한다.
|
472
472
|
def rule_30
|
473
473
|
return if @next_kc.nil? || @kc.jongsung != 'ㅅ'
|
@@ -490,5 +490,4 @@ class Korean
|
|
490
490
|
end
|
491
491
|
end
|
492
492
|
end#Pronouncer
|
493
|
-
end#Korean
|
494
493
|
end#Gimchi
|
data/test/helper.rb
CHANGED
data/test/test_gimchi.rb
CHANGED
@@ -2,76 +2,75 @@
|
|
2
2
|
|
3
3
|
$LOAD_PATH.unshift File.dirname(__FILE__)
|
4
4
|
require 'helper'
|
5
|
+
require 'yaml'
|
6
|
+
require 'ansi'
|
7
|
+
|
5
8
|
|
6
9
|
class TestGimchi < Test::Unit::TestCase
|
7
10
|
def test_korean_char
|
8
|
-
|
9
|
-
assert_equal true,
|
10
|
-
assert_equal true,
|
11
|
-
assert_equal true,
|
12
|
-
assert_equal true,
|
13
|
-
assert_equal true,
|
14
|
-
|
15
|
-
|
16
|
-
assert_equal false,
|
17
|
-
|
18
|
-
assert_raise(ArgumentError) {
|
19
|
-
assert_raise(ArgumentError) { ko.kchar?('두자') }
|
11
|
+
assert_equal true, Gimchi.korean_char?('ㄱ') # true
|
12
|
+
assert_equal true, Gimchi.kchar?('ㄱ') # true
|
13
|
+
assert_equal true, Gimchi.korean_char?('ㅏ') # true
|
14
|
+
assert_equal true, Gimchi.korean_char?('가') # true
|
15
|
+
assert_equal true, Gimchi.korean_char?('값') # true
|
16
|
+
assert_equal true, Gimchi.kchar?('값') # true
|
17
|
+
|
18
|
+
assert_equal false, Gimchi.korean_char?('a') # false
|
19
|
+
assert_equal false, Gimchi.korean_char?('1') # false
|
20
|
+
assert_raise(ArgumentError) { Gimchi.korean_char?('두자') }
|
21
|
+
assert_raise(ArgumentError) { Gimchi.kchar?('두자') }
|
20
22
|
end
|
21
23
|
|
22
24
|
def test_kchar
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
assert_raise(ArgumentError) { ko.kchar('A') }
|
38
|
-
|
39
|
-
assert_equal true, ko.kchar("ㅏ").partial?
|
25
|
+
kc = Gimchi::Char('한')
|
26
|
+
assert_equal Gimchi::Char, kc.class
|
27
|
+
assert_equal "ㅎ", kc.chosung
|
28
|
+
assert_equal "ㅏ", kc.jungsung
|
29
|
+
assert_equal "ㄴ", kc.jongsung
|
30
|
+
assert_equal ["ㅎ", "ㅏ", "ㄴ"], kc.to_a
|
31
|
+
assert_equal "한", kc.to_s
|
32
|
+
assert_equal true, kc.complete?
|
33
|
+
assert_equal false, kc.partial?
|
34
|
+
|
35
|
+
assert_raise(ArgumentError) { Gimchi::Char('한글') }
|
36
|
+
assert_raise(ArgumentError) { Gimchi::Char('A') }
|
37
|
+
|
38
|
+
assert_equal true, Gimchi::Char("ㅏ").partial?
|
40
39
|
end
|
41
40
|
|
42
41
|
def test_complete_korean_char
|
43
|
-
ko = Gimchi::Korean.new
|
44
42
|
|
45
|
-
assert_equal false,
|
46
|
-
assert_equal false,
|
47
|
-
assert_equal true,
|
48
|
-
assert_equal true,
|
43
|
+
assert_equal false, Gimchi.complete_korean_char?('ㄱ') # false
|
44
|
+
assert_equal false, Gimchi.complete_korean_char?('ㅏ') # false
|
45
|
+
assert_equal true, Gimchi.complete_korean_char?('가') # true
|
46
|
+
assert_equal true, Gimchi.complete_korean_char?('값') # true
|
49
47
|
|
50
|
-
assert_equal false,
|
51
|
-
assert_equal false,
|
52
|
-
assert_raise(ArgumentError) {
|
48
|
+
assert_equal false, Gimchi.korean_char?('a') # false
|
49
|
+
assert_equal false, Gimchi.korean_char?('1') # false
|
50
|
+
assert_raise(ArgumentError) { Gimchi.korean_char?('두자') }
|
53
51
|
end
|
54
52
|
|
55
53
|
def test_dissect
|
56
|
-
|
54
|
+
arr = '이것은 Hangul 입니다.'.each_char.map { |ch|
|
55
|
+
(Gimchi::Char(ch) rescue [ch]).to_a
|
56
|
+
}.flatten.compact
|
57
57
|
|
58
|
-
|
59
|
-
assert_equal ["ㅇ", "ㅣ", "ㄱ", "ㅓ", "ㅅ", "ㅇ", "ㅡ", "ㄴ", " ",
|
58
|
+
assert_equal ["ㅇ", "ㅣ", "ㄱ", "ㅓ", "ㅅ", "ㅇ", "ㅡ", "ㄴ", " ",
|
60
59
|
"H", "a", "n", "g", "u", "l", " ", "ㅇ", "ㅣ", "ㅂ",
|
61
60
|
"ㄴ", "ㅣ", "ㄷ", "ㅏ", "."], arr
|
62
61
|
end
|
63
62
|
|
64
63
|
def test_convert
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
arr = '이것은 한글입니다.'.each_char.map { |ch|
|
65
|
+
Gimchi::Char(ch) rescue ch
|
66
|
+
}
|
68
67
|
# [이, 것, 은, " ", 한, 글, 입, 니, 다, "."]
|
69
68
|
|
70
69
|
assert_equal 10, arr.length
|
71
|
-
assert_equal Gimchi::
|
72
|
-
assert_equal Gimchi::
|
73
|
-
assert_equal Gimchi::
|
74
|
-
|
70
|
+
assert_equal Gimchi::Char, arr[0].class
|
71
|
+
assert_equal Gimchi::Char, arr[1].class
|
72
|
+
assert_equal Gimchi::Char, arr[2].class
|
73
|
+
|
75
74
|
ch = arr[2]
|
76
75
|
assert_equal 'ㅇ', ch.chosung
|
77
76
|
assert_equal 'ㅡ', ch.jungsung
|
@@ -108,43 +107,41 @@ class TestGimchi < Test::Unit::TestCase
|
|
108
107
|
end
|
109
108
|
|
110
109
|
def test_read_number
|
111
|
-
|
112
|
-
assert_equal "
|
113
|
-
assert_equal "
|
114
|
-
assert_equal "구",
|
115
|
-
assert_equal "
|
116
|
-
assert_equal "마이너스 백점일이삼", ko.read_number(- 100.123)
|
110
|
+
assert_equal "영", Gimchi.read_number(0)
|
111
|
+
assert_equal "일", Gimchi.read_number(1)
|
112
|
+
assert_equal "구", Gimchi.read_number(9)
|
113
|
+
assert_equal "천 구백 구십 구", Gimchi.read_number(1999)
|
114
|
+
assert_equal "마이너스 백점일이삼", Gimchi.read_number(- 100.123)
|
117
115
|
assert_equal "오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사오육칠",
|
118
|
-
|
119
|
-
assert_equal "영점영영영영영일이삼사오",
|
120
|
-
assert_equal "일해 이천 삼백 사십 오경",
|
121
|
-
assert_equal "플러스 일해 이천 삼백 사십 오경",
|
122
|
-
assert_equal "마이너스 일해 이천 삼백 사십 오경",
|
123
|
-
assert_equal "만 십 이점삼",
|
124
|
-
assert_equal "십만 십 이점삼",
|
125
|
-
assert_equal "백 일만 십 이점삼",
|
126
|
-
assert_equal "천 십 이점삼",
|
127
|
-
assert_equal "십점영",
|
128
|
-
assert_equal "플러스 십점영",
|
116
|
+
Gimchi.read_number("53,191,100,678.3214567")
|
117
|
+
assert_equal "영점영영영영영일이삼사오", Gimchi.read_number("1.2345e-06")
|
118
|
+
assert_equal "일해 이천 삼백 사십 오경", Gimchi.read_number("1.2345e+20")
|
119
|
+
assert_equal "플러스 일해 이천 삼백 사십 오경", Gimchi.read_number("+ 1.2345e+20")
|
120
|
+
assert_equal "마이너스 일해 이천 삼백 사십 오경", Gimchi.read_number("- 1.2345e+20")
|
121
|
+
assert_equal "만 십 이점삼", Gimchi.read_number("100.123e+2")
|
122
|
+
assert_equal "십만 십 이점삼", Gimchi.read_number("1000.123e+2")
|
123
|
+
assert_equal "백 일만 십 이점삼", Gimchi.read_number("10100.123e+2")
|
124
|
+
assert_equal "천 십 이점삼", Gimchi.read_number("10.123e+2")
|
125
|
+
assert_equal "십점영", Gimchi.read_number("10.0")
|
126
|
+
assert_equal "플러스 십점영", Gimchi.read_number("+ 10.0")
|
129
127
|
|
130
128
|
# 나이, 시간, 개수, 명 ( -살, -시, -개, -명 )
|
131
|
-
assert_equal "나는 이십",
|
132
|
-
assert_equal "나는 스무살",
|
133
|
-
assert_equal "나는 스물네살",
|
134
|
-
assert_equal "스무개",
|
135
|
-
assert_equal "스무 명",
|
136
|
-
assert_equal "이십 칠점일살",
|
137
|
-
assert_equal "너는 열세 살",
|
138
|
-
assert_equal "백 서른두명",
|
139
|
-
assert_equal "이천 오백 아흔아홉개",
|
140
|
-
assert_equal "지금은 일곱시 삼십분",
|
129
|
+
assert_equal "나는 이십", Gimchi.read_number("나는 20")
|
130
|
+
assert_equal "나는 스무살", Gimchi.read_number("나는 20살")
|
131
|
+
assert_equal "나는 스물네살", Gimchi.read_number("나는 24살")
|
132
|
+
assert_equal "스무개", Gimchi.read_number("20개")
|
133
|
+
assert_equal "스무 명", Gimchi.read_number("20 명")
|
134
|
+
assert_equal "이십 칠점일살", Gimchi.read_number("27.1살")
|
135
|
+
assert_equal "너는 열세 살", Gimchi.read_number("너는 13 살")
|
136
|
+
assert_equal "백 서른두명", Gimchi.read_number("132명")
|
137
|
+
assert_equal "이천 오백 아흔아홉개", Gimchi.read_number("2,599개")
|
138
|
+
assert_equal "지금은 일곱시 삼십분", Gimchi.read_number("지금은 7시 30분")
|
139
|
+
|
140
|
+
# No way!
|
141
|
+
assert_raise(RangeError) { Gimchi.read_number 10 ** 100 }
|
141
142
|
end
|
142
143
|
|
143
144
|
def test_pronounce
|
144
|
-
require 'yaml'
|
145
|
-
require 'ansi'
|
146
|
-
|
147
|
-
ko = Gimchi::Korean.new
|
148
145
|
cnt = 0
|
149
146
|
s = 0
|
150
147
|
test_set = YAML.load File.read(File.dirname(__FILE__) + '/pronunciation.yml')
|
@@ -152,8 +149,8 @@ class TestGimchi < Test::Unit::TestCase
|
|
152
149
|
cnt += 1
|
153
150
|
k = k.gsub(/[-]/, '')
|
154
151
|
|
155
|
-
t1, tfs1 =
|
156
|
-
t2, tfs2 =
|
152
|
+
t1, tfs1 = Gimchi.pronounce(k, :each_char => false, :slur => true, :debug => true)
|
153
|
+
t2, tfs2 = Gimchi.pronounce(k, :each_char => false, :slur => false, :debug => true)
|
157
154
|
|
158
155
|
path = ""
|
159
156
|
if (with_slur = v.include?(t1.gsub(/\s/, ''))) || v.include?(t2.gsub(/\s/, ''))
|
@@ -165,7 +162,7 @@ class TestGimchi < Test::Unit::TestCase
|
|
165
162
|
r = ANSI::Code::RED + ANSI::Code::BOLD + v.join(' / ') + ANSI::Code::RESET
|
166
163
|
t = [t1, t2].join ' | '
|
167
164
|
end
|
168
|
-
puts "#{k} => #{t} (#{
|
165
|
+
puts "#{k} => #{t} (#{Gimchi.romanize t, :as_pronounced => false}) [#{path}] #{r}"
|
169
166
|
end
|
170
167
|
puts "#{s} / #{cnt}"
|
171
168
|
# FIXME
|
@@ -173,19 +170,16 @@ class TestGimchi < Test::Unit::TestCase
|
|
173
170
|
end
|
174
171
|
|
175
172
|
def test_romanize_preserve_non_korean
|
176
|
-
|
177
|
-
assert_equal 'ttok-kkateun kkk', ko.romanize('똑같은 kkk')
|
173
|
+
assert_equal 'ttok-kkateun kkk', Gimchi.romanize('똑같은 kkk')
|
178
174
|
end
|
179
175
|
|
180
176
|
def test_romanize
|
181
|
-
ko = Gimchi::Korean.new
|
182
|
-
|
183
177
|
cnt = 0
|
184
178
|
s = 0
|
185
179
|
test_set = YAML.load File.read(File.dirname(__FILE__) + '/romanization.yml')
|
186
180
|
test_set.each do | k, v |
|
187
181
|
cnt += 1
|
188
|
-
rom =
|
182
|
+
rom = Gimchi.romanize k.sub(/\[.*/, '')
|
189
183
|
if rom.downcase.gsub(/[\s-]/, '') == v.downcase.gsub(/\(.*\)/, '').gsub(/[\s-]/, '')
|
190
184
|
r = ANSI::Code::BLUE + ANSI::Code::BOLD + rom + ANSI::Code::RESET
|
191
185
|
s += 1
|
@@ -198,4 +192,38 @@ class TestGimchi < Test::Unit::TestCase
|
|
198
192
|
# FIXME
|
199
193
|
assert s >= 63
|
200
194
|
end
|
195
|
+
|
196
|
+
def test_cho_jung_jongsung?
|
197
|
+
c, j, jo = Gimchi::Char("달").to_a
|
198
|
+
assert Gimchi.chosung?(c)
|
199
|
+
assert Gimchi.jungsung?(j)
|
200
|
+
assert Gimchi.jongsung?(jo)
|
201
|
+
|
202
|
+
assert Gimchi.chosung?( 'ㄱ')
|
203
|
+
assert !Gimchi.jungsung?('ㄱ')
|
204
|
+
assert Gimchi.jongsung?('ㄱ')
|
205
|
+
assert !Gimchi.chosung?( 'ㅏ')
|
206
|
+
assert Gimchi.jungsung?('ㅏ')
|
207
|
+
assert !Gimchi.jongsung?('ㅏ')
|
208
|
+
assert !Gimchi.chosung?( 'ㄺ')
|
209
|
+
assert !Gimchi.jungsung?('ㄺ')
|
210
|
+
assert Gimchi.jongsung?('ㄺ')
|
211
|
+
end
|
212
|
+
|
213
|
+
def test_compose_decompose
|
214
|
+
ret = Gimchi.decompose("한")
|
215
|
+
assert ret.is_a?(Array)
|
216
|
+
assert_equal 'ㅎ', ret[0]
|
217
|
+
assert_equal 'ㅏ', ret[1]
|
218
|
+
assert_equal 'ㄴ', ret[2]
|
219
|
+
|
220
|
+
assert_equal '한', Gimchi.compose(*ret)
|
221
|
+
|
222
|
+
ret = Gimchi.decompose("ㅋ")
|
223
|
+
assert_equal 'ㅋ', ret[0]
|
224
|
+
assert_equal nil, ret[1]
|
225
|
+
assert_equal nil, ret[2]
|
226
|
+
|
227
|
+
assert_equal 'ㅋ', Gimchi.compose(*ret)
|
228
|
+
end
|
201
229
|
end
|