gimchi 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +17 -0
- data/CHANGELOG.rdoc +42 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +3 -1
- data/{README.ko.markdown → README.ko.md} +68 -66
- data/README.md +162 -0
- data/Rakefile +7 -0
- data/config/default.yml +162 -162
- data/crawler/crawler.rb +49 -0
- data/gimchi.gemspec +21 -0
- data/lib/gimchi.rb +374 -4
- data/lib/gimchi/char.rb +26 -38
- data/lib/gimchi/patch_1.8.rb +9 -9
- data/lib/gimchi/pronouncer.rb +26 -27
- data/test/helper.rb +1 -0
- data/test/test_gimchi.rb +114 -86
- metadata +23 -51
- data/README.markdown +0 -155
- data/lib/gimchi/korean.rb +0 -323
data/lib/gimchi/char.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
# Class representing each Korean character. Its three components,
|
3
|
+
class Gimchi
|
4
|
+
# Class representing each Korean character. Its three components,
|
6
5
|
# `chosung', `jungsung' and `jongsung' can be get and set.
|
7
6
|
#
|
8
7
|
# `to_s' merges components into a String. `to_a' returns the three components.
|
@@ -14,13 +13,13 @@ class Korean
|
|
14
13
|
# @return [String] Jongsung component of this character.
|
15
14
|
attr_reader :jongsung
|
16
15
|
|
17
|
-
# @param [Gimchi
|
16
|
+
# @param [Gimchi] gimchi Gimchi instance
|
18
17
|
# @param [String] kchar Korean character string
|
19
|
-
def initialize
|
20
|
-
raise ArgumentError.new('Not a korean character') unless
|
18
|
+
def initialize gimchi, kchar
|
19
|
+
raise ArgumentError.new('Not a korean character') unless gimchi.korean_char? kchar
|
21
20
|
|
22
|
-
@
|
23
|
-
if @
|
21
|
+
@gimchi = gimchi
|
22
|
+
if @gimchi.complete_korean_char? kchar
|
24
23
|
c = kchar.unpack('U').first
|
25
24
|
n = c - 0xAC00
|
26
25
|
# '가' ~ '깋' -> 'ㄱ'
|
@@ -29,14 +28,14 @@ class Korean
|
|
29
28
|
n = n % (21 * 28)
|
30
29
|
n2 = n / 28;
|
31
30
|
n3 = n % 28;
|
32
|
-
self.chosung = @
|
33
|
-
self.jungsung = @
|
34
|
-
self.jongsung = ([nil] + @
|
35
|
-
elsif @
|
31
|
+
self.chosung = @gimchi.chosungs[n1]
|
32
|
+
self.jungsung = @gimchi.jungsungs[n2]
|
33
|
+
self.jongsung = ([nil] + @gimchi.jongsungs)[n3]
|
34
|
+
elsif @gimchi.chosung? kchar
|
36
35
|
self.chosung = kchar
|
37
|
-
elsif @
|
36
|
+
elsif @gimchi.jungsung? kchar
|
38
37
|
self.jungsung = kchar
|
39
|
-
elsif @
|
38
|
+
elsif @gimchi.jongsung? kchar
|
40
39
|
self.jongsung = kchar
|
41
40
|
end
|
42
41
|
end
|
@@ -44,42 +43,32 @@ class Korean
|
|
44
43
|
# Recombines components into a korean character.
|
45
44
|
# @return [String] Combined korean character
|
46
45
|
def to_s
|
47
|
-
|
48
|
-
""
|
49
|
-
elsif chosung && jungsung
|
50
|
-
n1, n2, n3 =
|
51
|
-
n1 = @kor.chosungs.index(chosung) || 0
|
52
|
-
n2 = @kor.jungsungs.index(jungsung) || 0
|
53
|
-
n3 = ([nil] + @kor.jongsungs).index(jongsung) || 0
|
54
|
-
[ 0xAC00 + n1 * (21 * 28) + n2 * 28 + n3 ].pack('U')
|
55
|
-
else
|
56
|
-
chosung || jungsung
|
57
|
-
end
|
46
|
+
@gimchi.compose chosung, jungsung, jongsung
|
58
47
|
end
|
59
48
|
|
60
49
|
# Sets the chosung component.
|
61
|
-
# @param [String]
|
50
|
+
# @param [String]
|
62
51
|
def chosung= c
|
63
52
|
raise ArgumentError.new('Invalid chosung component') if
|
64
|
-
c && @
|
65
|
-
@chosung = c && c.dup.extend(Component).tap { |e| e.kor = @
|
53
|
+
c && @gimchi.chosung?(c) == false
|
54
|
+
@chosung = c && c.dup.extend(Component).tap { |e| e.kor = @gimchi }
|
66
55
|
end
|
67
56
|
|
68
57
|
# Sets the jungsung component
|
69
|
-
# @param [String]
|
58
|
+
# @param [String]
|
70
59
|
def jungsung= c
|
71
60
|
raise ArgumentError.new('Invalid jungsung component') if
|
72
|
-
c && @
|
73
|
-
@jungsung = c && c.dup.extend(Component).tap { |e| e.kor = @
|
61
|
+
c && @gimchi.jungsung?(c) == false
|
62
|
+
@jungsung = c && c.dup.extend(Component).tap { |e| e.kor = @gimchi }
|
74
63
|
end
|
75
64
|
|
76
65
|
# Sets the jongsung component
|
77
66
|
#
|
78
|
-
# @param [String]
|
67
|
+
# @param [String]
|
79
68
|
def jongsung= c
|
80
69
|
raise ArgumentError.new('Invalid jongsung component') if
|
81
|
-
c && @
|
82
|
-
@jongsung = c && c.dup.extend(Component).tap { |e| e.kor = @
|
70
|
+
c && @gimchi.jongsung?(c) == false
|
71
|
+
@jongsung = c && c.dup.extend(Component).tap { |e| e.kor = @gimchi }
|
83
72
|
end
|
84
73
|
|
85
74
|
# Returns Array of three components.
|
@@ -105,22 +94,21 @@ class Korean
|
|
105
94
|
end
|
106
95
|
|
107
96
|
private
|
108
|
-
# Three components of
|
97
|
+
# Three components of Gimchi::Char are extended to support #vowel? and #consonant? method.
|
109
98
|
module Component
|
110
99
|
# @return [Korean] Hosting Korean instance
|
111
100
|
attr_accessor :kor
|
112
101
|
|
113
102
|
# Is this component a vowel?
|
114
103
|
def vowel?
|
115
|
-
kor.
|
104
|
+
kor.jungsung? self
|
116
105
|
end
|
117
106
|
|
118
107
|
# Is this component a consonant?
|
119
108
|
def consonant?
|
120
|
-
self != 'ㅇ' && kor.
|
109
|
+
self != 'ㅇ' && kor.chosung?(self)
|
121
110
|
end
|
122
111
|
end#Component
|
123
112
|
end#Char
|
124
|
-
end#Korean
|
125
113
|
end#Gimchi
|
126
114
|
|
data/lib/gimchi/patch_1.8.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
|
1
|
+
if RUBY_VERSION =~ /^1\.8\./
|
2
|
+
$KCODE = 'U'
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end#Gimchi
|
4
|
+
class Gimchi
|
5
|
+
private
|
6
|
+
def str_length str
|
7
|
+
str.scan(/./mu).length
|
8
|
+
end
|
9
|
+
end#Gimchi
|
10
|
+
end
|
data/lib/gimchi/pronouncer.rb
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
|
4
|
-
class Korean
|
3
|
+
class Gimchi
|
5
4
|
# Private class.
|
6
5
|
# Partial implementation of Korean pronouncement pronunciation rules specified in
|
7
6
|
# http://http://www.korean.go.kr/
|
7
|
+
# @private
|
8
8
|
class Pronouncer
|
9
9
|
private
|
10
|
-
def initialize
|
11
|
-
@
|
12
|
-
@pconfig =
|
10
|
+
def initialize gimchi
|
11
|
+
@gimchi = gimchi
|
12
|
+
@pconfig = gimchi.config[:pronouncer]
|
13
13
|
end
|
14
14
|
|
15
15
|
def pronounce! str, options = {}
|
16
|
-
@sequence = @pconfig[
|
17
|
-
|
16
|
+
@sequence = @pconfig[:transformation][
|
17
|
+
"sequence_for_#{options[:each_char] ? '1' : '2'}".to_sym] - options[:except]
|
18
18
|
|
19
19
|
# Dissecting
|
20
|
-
@chars = @
|
20
|
+
@chars = str.each_char.map { |c| @gimchi.kchar(c) rescue c }
|
21
21
|
@orig_chars = @chars.dup
|
22
22
|
|
23
23
|
# Padding
|
@@ -35,9 +35,9 @@ class Korean
|
|
35
35
|
# Transform one by one
|
36
36
|
applied += (0...@chars.length).inject([]) { | arr, i | arr + transform(i); }
|
37
37
|
|
38
|
-
# Post-processing (actually just for :
|
39
|
-
@chars.select { |c| c.is_a?(
|
40
|
-
c.jongsung = @pconfig[
|
38
|
+
# Post-processing (actually just for :each_char option)
|
39
|
+
@chars.select { |c| c.is_a?(Gimchi::Char) && c.jongsung }.each do | c |
|
40
|
+
c.jongsung = @pconfig[:jongsung_sound][c.jongsung]
|
41
41
|
end
|
42
42
|
|
43
43
|
break unless options[:slur]
|
@@ -52,20 +52,20 @@ class Korean
|
|
52
52
|
kc = @chars[@cursor]
|
53
53
|
|
54
54
|
# Not korean
|
55
|
-
return [] unless kc.is_a?
|
55
|
+
return [] unless kc.is_a? Gimchi::Char
|
56
56
|
|
57
57
|
# Setting up variables for fast lookup
|
58
58
|
@kc = kc
|
59
|
-
@next_kc = (nkc = @chars[@cursor + 1]).is_a?(
|
59
|
+
@next_kc = (nkc = @chars[@cursor + 1]).is_a?(Gimchi::Char) ? nkc : nil
|
60
60
|
@kc_org = @initial_chars[@cursor]
|
61
|
-
@next_kc_org = (nkco = @initial_chars[@cursor + 1]).is_a?(
|
61
|
+
@next_kc_org = (nkco = @initial_chars[@cursor + 1]).is_a?(Gimchi::Char) ? nkco : nil
|
62
62
|
|
63
63
|
# Cannot properly pronounce
|
64
64
|
return [] if @kc.chosung.nil? && @kc.jungsung.nil? && @kc.jongsung.nil?
|
65
65
|
|
66
66
|
applied = []
|
67
67
|
not_todo = []
|
68
|
-
blocking_rule = @pconfig[
|
68
|
+
blocking_rule = @pconfig[:transformation][:blocking_rule]
|
69
69
|
@sequence.each do | rule |
|
70
70
|
next if not_todo.include?(rule)
|
71
71
|
|
@@ -78,7 +78,7 @@ class Korean
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def pad c
|
81
|
-
return unless c.is_a?
|
81
|
+
return unless c.is_a? Gimchi::Char
|
82
82
|
|
83
83
|
c.chosung = 'ㅇ' if c.chosung.nil?
|
84
84
|
c.jungsung = 'ㅡ' if c.jungsung.nil?
|
@@ -86,12 +86,12 @@ class Korean
|
|
86
86
|
|
87
87
|
# shortcut
|
88
88
|
def fortis_map
|
89
|
-
@
|
89
|
+
@gimchi.config[:structure][:fortis_map]
|
90
90
|
end
|
91
91
|
|
92
92
|
# shortcut
|
93
93
|
def double_consonant_map
|
94
|
-
@
|
94
|
+
@gimchi.config[:structure][:double_consonant_map]
|
95
95
|
end
|
96
96
|
|
97
97
|
# 제5항: ‘ㅑ ㅒ ㅕ ㅖ ㅘ ㅙ ㅛ ㅝ ㅞ ㅠ ㅢ’는 이중 모음으로 발음한다.
|
@@ -193,7 +193,7 @@ class Korean
|
|
193
193
|
'ㄱ' => 'ㅋ',
|
194
194
|
'ㄷ' => 'ㅌ',
|
195
195
|
'ㅈ' => 'ㅊ' }
|
196
|
-
if %w[ㅎ ㄶ ㅀ].include?(@kc.jongsung)
|
196
|
+
if %w[ㅎ ㄶ ㅀ].include?(@kc.jongsung)
|
197
197
|
# 12-1
|
198
198
|
if map_12_1.keys.include?(@next_kc.chosung)
|
199
199
|
@next_kc.chosung = map_12_1[@next_kc.chosung]
|
@@ -277,7 +277,7 @@ class Korean
|
|
277
277
|
|
278
278
|
if false && %w[ㅏ ㅓ ㅗ ㅜ ㅟ].include?(@next_kc.jungsung) &&
|
279
279
|
%[ㅆ ㄲ ㅈ ㅊ ㄵ ㄻ ㄾ ㄿ ㄺ].include?(@kc.jongsung) == false # PATCH
|
280
|
-
@next_kc.chosung = @pconfig[
|
280
|
+
@next_kc.chosung = @pconfig[:jongsung_sound][ @kc.jongsung ]
|
281
281
|
@kc.jongsung = nil
|
282
282
|
|
283
283
|
true
|
@@ -299,7 +299,7 @@ class Korean
|
|
299
299
|
|
300
300
|
word = @kc.to_s + @next_kc.to_s
|
301
301
|
if map.keys.include? word
|
302
|
-
new_char = @
|
302
|
+
new_char = @gimchi.kchar(map[word].scan(/./mu)[1])
|
303
303
|
@next_kc.chosung = new_char.chosung
|
304
304
|
@next_kc.jongsung = new_char.jongsung
|
305
305
|
|
@@ -331,8 +331,8 @@ class Korean
|
|
331
331
|
# ㄿ, ㅄ)’은 ‘ㄴ, ㅁ’ 앞에서 [ㅇ, ㄴ, ㅁ]으로 발음한다.
|
332
332
|
def rule_18
|
333
333
|
map = {
|
334
|
-
%w[ㄱ ㄲ ㅋ ㄳ ㄺ] => 'ㅇ',
|
335
|
-
%w[ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅎ] => 'ㄴ',
|
334
|
+
%w[ㄱ ㄲ ㅋ ㄳ ㄺ] => 'ㅇ',
|
335
|
+
%w[ㄷ ㅅ ㅆ ㅈ ㅊ ㅌ ㅎ] => 'ㄴ',
|
336
336
|
%w[ㅂ ㅍ ㄼ ㄿ ㅄ] => 'ㅁ'
|
337
337
|
}
|
338
338
|
if @next_kc && map.keys.flatten.include?(@kc.jongsung) && %w[ㄴ ㅁ].include?(@next_kc.chosung)
|
@@ -395,7 +395,7 @@ class Korean
|
|
395
395
|
# 다만, 피동, 사동의 접미사 ‘-기-’는 된소리로 발음하지 않는다.
|
396
396
|
# 용언 어간에만 적용.
|
397
397
|
def rule_24
|
398
|
-
return if @next_kc.nil? ||
|
398
|
+
return if @next_kc.nil? ||
|
399
399
|
@next_kc.to_s == '기' # FIXME 피동/사동 여부 판단 불가. e.g. 줄넘기
|
400
400
|
|
401
401
|
# FIXME 용언 여부를 판단. 정확한 판단 불가.
|
@@ -441,7 +441,7 @@ class Korean
|
|
441
441
|
return if @next_kc.nil?
|
442
442
|
|
443
443
|
# 비교적 확률이 높은 경우들에 대해서만 처리. "일" 은 제외.
|
444
|
-
if %w[할 갈 날 볼 을 앨 말 힐].include?(@kc.to_s) && # @kc.jongsung == 'ㄹ' &&
|
444
|
+
if %w[할 갈 날 볼 을 앨 말 힐].include?(@kc.to_s) && # @kc.jongsung == 'ㄹ' &&
|
445
445
|
%w[ㄱ ㄷ ㅂ ㅅ ㅈ].include?(@next_kc.chosung)
|
446
446
|
@next_kc.chosung = fortis_map[@next_kc.chosung]
|
447
447
|
true
|
@@ -467,7 +467,7 @@ class Korean
|
|
467
467
|
# 1. ‘ㄱ, ㄷ, ㅂ, ㅅ, ㅈ’으로 시작하는 단어 앞에 사이시옷이 올 때는 이들
|
468
468
|
# 자음만을 된소리로 발음하는 것을 원칙으로 하되, 사이시옷을 [ㄷ]으로
|
469
469
|
# 발음하는 것도 허용한다.
|
470
|
-
# 2. 사이시옷 뒤에 ‘ㄴ, ㅁ’이 결합되는 경우에는 [ㄴ]으로 발음한다.
|
470
|
+
# 2. 사이시옷 뒤에 ‘ㄴ, ㅁ’이 결합되는 경우에는 [ㄴ]으로 발음한다.
|
471
471
|
# 3. 사이시옷 뒤에 ‘이’ 음이 결합되는 경우에는 [ㄴㄴ]으로 발음한다.
|
472
472
|
def rule_30
|
473
473
|
return if @next_kc.nil? || @kc.jongsung != 'ㅅ'
|
@@ -490,5 +490,4 @@ class Korean
|
|
490
490
|
end
|
491
491
|
end
|
492
492
|
end#Pronouncer
|
493
|
-
end#Korean
|
494
493
|
end#Gimchi
|
data/test/helper.rb
CHANGED
data/test/test_gimchi.rb
CHANGED
@@ -2,76 +2,75 @@
|
|
2
2
|
|
3
3
|
$LOAD_PATH.unshift File.dirname(__FILE__)
|
4
4
|
require 'helper'
|
5
|
+
require 'yaml'
|
6
|
+
require 'ansi'
|
7
|
+
|
5
8
|
|
6
9
|
class TestGimchi < Test::Unit::TestCase
|
7
10
|
def test_korean_char
|
8
|
-
|
9
|
-
assert_equal true,
|
10
|
-
assert_equal true,
|
11
|
-
assert_equal true,
|
12
|
-
assert_equal true,
|
13
|
-
assert_equal true,
|
14
|
-
|
15
|
-
|
16
|
-
assert_equal false,
|
17
|
-
|
18
|
-
assert_raise(ArgumentError) {
|
19
|
-
assert_raise(ArgumentError) { ko.kchar?('두자') }
|
11
|
+
assert_equal true, Gimchi.korean_char?('ㄱ') # true
|
12
|
+
assert_equal true, Gimchi.kchar?('ㄱ') # true
|
13
|
+
assert_equal true, Gimchi.korean_char?('ㅏ') # true
|
14
|
+
assert_equal true, Gimchi.korean_char?('가') # true
|
15
|
+
assert_equal true, Gimchi.korean_char?('값') # true
|
16
|
+
assert_equal true, Gimchi.kchar?('값') # true
|
17
|
+
|
18
|
+
assert_equal false, Gimchi.korean_char?('a') # false
|
19
|
+
assert_equal false, Gimchi.korean_char?('1') # false
|
20
|
+
assert_raise(ArgumentError) { Gimchi.korean_char?('두자') }
|
21
|
+
assert_raise(ArgumentError) { Gimchi.kchar?('두자') }
|
20
22
|
end
|
21
23
|
|
22
24
|
def test_kchar
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
assert_raise(ArgumentError) { ko.kchar('A') }
|
38
|
-
|
39
|
-
assert_equal true, ko.kchar("ㅏ").partial?
|
25
|
+
kc = Gimchi::Char('한')
|
26
|
+
assert_equal Gimchi::Char, kc.class
|
27
|
+
assert_equal "ㅎ", kc.chosung
|
28
|
+
assert_equal "ㅏ", kc.jungsung
|
29
|
+
assert_equal "ㄴ", kc.jongsung
|
30
|
+
assert_equal ["ㅎ", "ㅏ", "ㄴ"], kc.to_a
|
31
|
+
assert_equal "한", kc.to_s
|
32
|
+
assert_equal true, kc.complete?
|
33
|
+
assert_equal false, kc.partial?
|
34
|
+
|
35
|
+
assert_raise(ArgumentError) { Gimchi::Char('한글') }
|
36
|
+
assert_raise(ArgumentError) { Gimchi::Char('A') }
|
37
|
+
|
38
|
+
assert_equal true, Gimchi::Char("ㅏ").partial?
|
40
39
|
end
|
41
40
|
|
42
41
|
def test_complete_korean_char
|
43
|
-
ko = Gimchi::Korean.new
|
44
42
|
|
45
|
-
assert_equal false,
|
46
|
-
assert_equal false,
|
47
|
-
assert_equal true,
|
48
|
-
assert_equal true,
|
43
|
+
assert_equal false, Gimchi.complete_korean_char?('ㄱ') # false
|
44
|
+
assert_equal false, Gimchi.complete_korean_char?('ㅏ') # false
|
45
|
+
assert_equal true, Gimchi.complete_korean_char?('가') # true
|
46
|
+
assert_equal true, Gimchi.complete_korean_char?('값') # true
|
49
47
|
|
50
|
-
assert_equal false,
|
51
|
-
assert_equal false,
|
52
|
-
assert_raise(ArgumentError) {
|
48
|
+
assert_equal false, Gimchi.korean_char?('a') # false
|
49
|
+
assert_equal false, Gimchi.korean_char?('1') # false
|
50
|
+
assert_raise(ArgumentError) { Gimchi.korean_char?('두자') }
|
53
51
|
end
|
54
52
|
|
55
53
|
def test_dissect
|
56
|
-
|
54
|
+
arr = '이것은 Hangul 입니다.'.each_char.map { |ch|
|
55
|
+
(Gimchi::Char(ch) rescue [ch]).to_a
|
56
|
+
}.flatten.compact
|
57
57
|
|
58
|
-
|
59
|
-
assert_equal ["ㅇ", "ㅣ", "ㄱ", "ㅓ", "ㅅ", "ㅇ", "ㅡ", "ㄴ", " ",
|
58
|
+
assert_equal ["ㅇ", "ㅣ", "ㄱ", "ㅓ", "ㅅ", "ㅇ", "ㅡ", "ㄴ", " ",
|
60
59
|
"H", "a", "n", "g", "u", "l", " ", "ㅇ", "ㅣ", "ㅂ",
|
61
60
|
"ㄴ", "ㅣ", "ㄷ", "ㅏ", "."], arr
|
62
61
|
end
|
63
62
|
|
64
63
|
def test_convert
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
arr = '이것은 한글입니다.'.each_char.map { |ch|
|
65
|
+
Gimchi::Char(ch) rescue ch
|
66
|
+
}
|
68
67
|
# [이, 것, 은, " ", 한, 글, 입, 니, 다, "."]
|
69
68
|
|
70
69
|
assert_equal 10, arr.length
|
71
|
-
assert_equal Gimchi::
|
72
|
-
assert_equal Gimchi::
|
73
|
-
assert_equal Gimchi::
|
74
|
-
|
70
|
+
assert_equal Gimchi::Char, arr[0].class
|
71
|
+
assert_equal Gimchi::Char, arr[1].class
|
72
|
+
assert_equal Gimchi::Char, arr[2].class
|
73
|
+
|
75
74
|
ch = arr[2]
|
76
75
|
assert_equal 'ㅇ', ch.chosung
|
77
76
|
assert_equal 'ㅡ', ch.jungsung
|
@@ -108,43 +107,41 @@ class TestGimchi < Test::Unit::TestCase
|
|
108
107
|
end
|
109
108
|
|
110
109
|
def test_read_number
|
111
|
-
|
112
|
-
assert_equal "
|
113
|
-
assert_equal "
|
114
|
-
assert_equal "구",
|
115
|
-
assert_equal "
|
116
|
-
assert_equal "마이너스 백점일이삼", ko.read_number(- 100.123)
|
110
|
+
assert_equal "영", Gimchi.read_number(0)
|
111
|
+
assert_equal "일", Gimchi.read_number(1)
|
112
|
+
assert_equal "구", Gimchi.read_number(9)
|
113
|
+
assert_equal "천 구백 구십 구", Gimchi.read_number(1999)
|
114
|
+
assert_equal "마이너스 백점일이삼", Gimchi.read_number(- 100.123)
|
117
115
|
assert_equal "오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사오육칠",
|
118
|
-
|
119
|
-
assert_equal "영점영영영영영일이삼사오",
|
120
|
-
assert_equal "일해 이천 삼백 사십 오경",
|
121
|
-
assert_equal "플러스 일해 이천 삼백 사십 오경",
|
122
|
-
assert_equal "마이너스 일해 이천 삼백 사십 오경",
|
123
|
-
assert_equal "만 십 이점삼",
|
124
|
-
assert_equal "십만 십 이점삼",
|
125
|
-
assert_equal "백 일만 십 이점삼",
|
126
|
-
assert_equal "천 십 이점삼",
|
127
|
-
assert_equal "십점영",
|
128
|
-
assert_equal "플러스 십점영",
|
116
|
+
Gimchi.read_number("53,191,100,678.3214567")
|
117
|
+
assert_equal "영점영영영영영일이삼사오", Gimchi.read_number("1.2345e-06")
|
118
|
+
assert_equal "일해 이천 삼백 사십 오경", Gimchi.read_number("1.2345e+20")
|
119
|
+
assert_equal "플러스 일해 이천 삼백 사십 오경", Gimchi.read_number("+ 1.2345e+20")
|
120
|
+
assert_equal "마이너스 일해 이천 삼백 사십 오경", Gimchi.read_number("- 1.2345e+20")
|
121
|
+
assert_equal "만 십 이점삼", Gimchi.read_number("100.123e+2")
|
122
|
+
assert_equal "십만 십 이점삼", Gimchi.read_number("1000.123e+2")
|
123
|
+
assert_equal "백 일만 십 이점삼", Gimchi.read_number("10100.123e+2")
|
124
|
+
assert_equal "천 십 이점삼", Gimchi.read_number("10.123e+2")
|
125
|
+
assert_equal "십점영", Gimchi.read_number("10.0")
|
126
|
+
assert_equal "플러스 십점영", Gimchi.read_number("+ 10.0")
|
129
127
|
|
130
128
|
# 나이, 시간, 개수, 명 ( -살, -시, -개, -명 )
|
131
|
-
assert_equal "나는 이십",
|
132
|
-
assert_equal "나는 스무살",
|
133
|
-
assert_equal "나는 스물네살",
|
134
|
-
assert_equal "스무개",
|
135
|
-
assert_equal "스무 명",
|
136
|
-
assert_equal "이십 칠점일살",
|
137
|
-
assert_equal "너는 열세 살",
|
138
|
-
assert_equal "백 서른두명",
|
139
|
-
assert_equal "이천 오백 아흔아홉개",
|
140
|
-
assert_equal "지금은 일곱시 삼십분",
|
129
|
+
assert_equal "나는 이십", Gimchi.read_number("나는 20")
|
130
|
+
assert_equal "나는 스무살", Gimchi.read_number("나는 20살")
|
131
|
+
assert_equal "나는 스물네살", Gimchi.read_number("나는 24살")
|
132
|
+
assert_equal "스무개", Gimchi.read_number("20개")
|
133
|
+
assert_equal "스무 명", Gimchi.read_number("20 명")
|
134
|
+
assert_equal "이십 칠점일살", Gimchi.read_number("27.1살")
|
135
|
+
assert_equal "너는 열세 살", Gimchi.read_number("너는 13 살")
|
136
|
+
assert_equal "백 서른두명", Gimchi.read_number("132명")
|
137
|
+
assert_equal "이천 오백 아흔아홉개", Gimchi.read_number("2,599개")
|
138
|
+
assert_equal "지금은 일곱시 삼십분", Gimchi.read_number("지금은 7시 30분")
|
139
|
+
|
140
|
+
# No way!
|
141
|
+
assert_raise(RangeError) { Gimchi.read_number 10 ** 100 }
|
141
142
|
end
|
142
143
|
|
143
144
|
def test_pronounce
|
144
|
-
require 'yaml'
|
145
|
-
require 'ansi'
|
146
|
-
|
147
|
-
ko = Gimchi::Korean.new
|
148
145
|
cnt = 0
|
149
146
|
s = 0
|
150
147
|
test_set = YAML.load File.read(File.dirname(__FILE__) + '/pronunciation.yml')
|
@@ -152,8 +149,8 @@ class TestGimchi < Test::Unit::TestCase
|
|
152
149
|
cnt += 1
|
153
150
|
k = k.gsub(/[-]/, '')
|
154
151
|
|
155
|
-
t1, tfs1 =
|
156
|
-
t2, tfs2 =
|
152
|
+
t1, tfs1 = Gimchi.pronounce(k, :each_char => false, :slur => true, :debug => true)
|
153
|
+
t2, tfs2 = Gimchi.pronounce(k, :each_char => false, :slur => false, :debug => true)
|
157
154
|
|
158
155
|
path = ""
|
159
156
|
if (with_slur = v.include?(t1.gsub(/\s/, ''))) || v.include?(t2.gsub(/\s/, ''))
|
@@ -165,7 +162,7 @@ class TestGimchi < Test::Unit::TestCase
|
|
165
162
|
r = ANSI::Code::RED + ANSI::Code::BOLD + v.join(' / ') + ANSI::Code::RESET
|
166
163
|
t = [t1, t2].join ' | '
|
167
164
|
end
|
168
|
-
puts "#{k} => #{t} (#{
|
165
|
+
puts "#{k} => #{t} (#{Gimchi.romanize t, :as_pronounced => false}) [#{path}] #{r}"
|
169
166
|
end
|
170
167
|
puts "#{s} / #{cnt}"
|
171
168
|
# FIXME
|
@@ -173,19 +170,16 @@ class TestGimchi < Test::Unit::TestCase
|
|
173
170
|
end
|
174
171
|
|
175
172
|
def test_romanize_preserve_non_korean
|
176
|
-
|
177
|
-
assert_equal 'ttok-kkateun kkk', ko.romanize('똑같은 kkk')
|
173
|
+
assert_equal 'ttok-kkateun kkk', Gimchi.romanize('똑같은 kkk')
|
178
174
|
end
|
179
175
|
|
180
176
|
def test_romanize
|
181
|
-
ko = Gimchi::Korean.new
|
182
|
-
|
183
177
|
cnt = 0
|
184
178
|
s = 0
|
185
179
|
test_set = YAML.load File.read(File.dirname(__FILE__) + '/romanization.yml')
|
186
180
|
test_set.each do | k, v |
|
187
181
|
cnt += 1
|
188
|
-
rom =
|
182
|
+
rom = Gimchi.romanize k.sub(/\[.*/, '')
|
189
183
|
if rom.downcase.gsub(/[\s-]/, '') == v.downcase.gsub(/\(.*\)/, '').gsub(/[\s-]/, '')
|
190
184
|
r = ANSI::Code::BLUE + ANSI::Code::BOLD + rom + ANSI::Code::RESET
|
191
185
|
s += 1
|
@@ -198,4 +192,38 @@ class TestGimchi < Test::Unit::TestCase
|
|
198
192
|
# FIXME
|
199
193
|
assert s >= 63
|
200
194
|
end
|
195
|
+
|
196
|
+
def test_cho_jung_jongsung?
|
197
|
+
c, j, jo = Gimchi::Char("달").to_a
|
198
|
+
assert Gimchi.chosung?(c)
|
199
|
+
assert Gimchi.jungsung?(j)
|
200
|
+
assert Gimchi.jongsung?(jo)
|
201
|
+
|
202
|
+
assert Gimchi.chosung?( 'ㄱ')
|
203
|
+
assert !Gimchi.jungsung?('ㄱ')
|
204
|
+
assert Gimchi.jongsung?('ㄱ')
|
205
|
+
assert !Gimchi.chosung?( 'ㅏ')
|
206
|
+
assert Gimchi.jungsung?('ㅏ')
|
207
|
+
assert !Gimchi.jongsung?('ㅏ')
|
208
|
+
assert !Gimchi.chosung?( 'ㄺ')
|
209
|
+
assert !Gimchi.jungsung?('ㄺ')
|
210
|
+
assert Gimchi.jongsung?('ㄺ')
|
211
|
+
end
|
212
|
+
|
213
|
+
def test_compose_decompose
|
214
|
+
ret = Gimchi.decompose("한")
|
215
|
+
assert ret.is_a?(Array)
|
216
|
+
assert_equal 'ㅎ', ret[0]
|
217
|
+
assert_equal 'ㅏ', ret[1]
|
218
|
+
assert_equal 'ㄴ', ret[2]
|
219
|
+
|
220
|
+
assert_equal '한', Gimchi.compose(*ret)
|
221
|
+
|
222
|
+
ret = Gimchi.decompose("ㅋ")
|
223
|
+
assert_equal 'ㅋ', ret[0]
|
224
|
+
assert_equal nil, ret[1]
|
225
|
+
assert_equal nil, ret[2]
|
226
|
+
|
227
|
+
assert_equal 'ㅋ', Gimchi.compose(*ret)
|
228
|
+
end
|
201
229
|
end
|