gimchi 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.txt +20 -0
- data/README.ko.rdoc +116 -0
- data/README.rdoc +118 -0
- data/config/default.yml +156 -0
- data/lib/gimchi.rb +8 -0
- data/lib/gimchi/char.rb +104 -0
- data/lib/gimchi/korean.rb +265 -0
- data/lib/gimchi/pronouncer.rb +461 -0
- data/test/helper.rb +17 -0
- data/test/pronunciation.yml +490 -0
- data/test/romanization.yml +97 -0
- data/test/test_gimchi.rb +135 -0
- metadata +111 -0
@@ -0,0 +1,97 @@
|
|
1
|
+
---
|
2
|
+
"구미": "Gumi"
|
3
|
+
"영동": "Yeongdong"
|
4
|
+
"백암": "Baegam"
|
5
|
+
"옥천": "Okcheon"
|
6
|
+
"합덕": "Hapdeok"
|
7
|
+
"호법": "Hobeop"
|
8
|
+
"월곶[월곧]": "Wolgot"
|
9
|
+
"벚꽃[벋꼳]": "Beotkkot"
|
10
|
+
"한밭[한받]": "Hanbat"
|
11
|
+
"구리": "Guri"
|
12
|
+
"설악": "Seorak"
|
13
|
+
"칠곡": "Chilgok"
|
14
|
+
"임실": "Imsil"
|
15
|
+
"울릉": "Ulleung"
|
16
|
+
"대관령[대괄령]": "Daegwallyeong"
|
17
|
+
"백마[뱅마]": "Baengma"
|
18
|
+
"신문로[신문노]": "Sinmunno"
|
19
|
+
"종로[종노]": "Jongno"
|
20
|
+
"왕십리[왕심니]": "Wangsimni"
|
21
|
+
"별내[별래]": "Byeollae"
|
22
|
+
"신라[실라]": "Silla"
|
23
|
+
"학여울[항녀울]": "Hangnyeoul"
|
24
|
+
"알약[알략]": "allyak"
|
25
|
+
"해돋이[해도지]": "haedoji"
|
26
|
+
"같이[가치]": "gachi"
|
27
|
+
"맞히다[마치다]": "machida"
|
28
|
+
"좋고[조코]": "joko"
|
29
|
+
"놓다[노타]": "nota"
|
30
|
+
"잡혀[자펴]": "japyeo"
|
31
|
+
"낳지[나치]": "nachi"
|
32
|
+
"압구정": "Apgujeong"
|
33
|
+
"낙동강": "Nakdonggang"
|
34
|
+
"죽변": "Jukbyeon"
|
35
|
+
"낙성대": "Nakseongdae"
|
36
|
+
"합정": "Hapjeong"
|
37
|
+
"팔당": "Paldang"
|
38
|
+
"샛별": "saetbyeol"
|
39
|
+
"울산": "Ulsan"
|
40
|
+
"중앙": "Jung-ang"
|
41
|
+
"반구대": "Ban-gudae"
|
42
|
+
"세운": "Se-un"
|
43
|
+
"해운대": "Hae-undae"
|
44
|
+
"부산": "Busan"
|
45
|
+
"세종": "Sejong"
|
46
|
+
"한복남": "Han Boknam (Han Bok-nam)"
|
47
|
+
"홍빛나": "Hong Bitna (Hong Bit-na)"
|
48
|
+
"강남대로": "Gangnam-daero"
|
49
|
+
"세종로": "Sejong-ro"
|
50
|
+
"개나리길": "Gaenari-gil"
|
51
|
+
"충청북도": "Chungcheongbuk-do"
|
52
|
+
"제주도": "Jeju-do"
|
53
|
+
"의정부시": "Uijeongbu-si"
|
54
|
+
"양주군": "Yangju-gun"
|
55
|
+
"도봉구": "Dobong-gu"
|
56
|
+
"신창읍": "Sinchang-eup"
|
57
|
+
"삼죽면": "Samjuk-myeon"
|
58
|
+
"인왕리": "Inwang-ri"
|
59
|
+
"당산동": "Dangsan-dong"
|
60
|
+
"봉천 1동": "Bongcheon 1(il)-dong"
|
61
|
+
"종로 2가": "Jongno 2(i)-ga"
|
62
|
+
"퇴계로 3가": "Toegyero 3(sam)-ga"
|
63
|
+
"청주시": "Cheongju"
|
64
|
+
"함평군": "Hampyeong"
|
65
|
+
"순창읍": "Sunchang"
|
66
|
+
"남산": "Namsan"
|
67
|
+
"속리산": "Songnisan"
|
68
|
+
"금강": "Geumgang"
|
69
|
+
"독도": "Dokdo"
|
70
|
+
"경복궁": "Gyeongbokgung"
|
71
|
+
"무량수전": "Muryangsujeon"
|
72
|
+
"연화교": "Yeonhwagyo"
|
73
|
+
"극락전": "Geungnakjeon"
|
74
|
+
"안압지": "Anapji"
|
75
|
+
"남한산성": "Namhansanseong"
|
76
|
+
"화랑대": "Hwarangdae"
|
77
|
+
"불국사": "Bulguksa"
|
78
|
+
"현충사": "Hyeonchungsa"
|
79
|
+
"독립문": "Dongnimmun"
|
80
|
+
"오죽헌": "Ojukheon"
|
81
|
+
"촉석루": "Chokseongnu"
|
82
|
+
"종묘": "Jongmyo"
|
83
|
+
"다보탑": "Dabotap"
|
84
|
+
"집": "jib"
|
85
|
+
"짚": "jip"
|
86
|
+
"밖": "bakk"
|
87
|
+
"값": "gabs"
|
88
|
+
"붓꽃": "buskkoch"
|
89
|
+
"먹는": "meogneun"
|
90
|
+
"독립": "doglib"
|
91
|
+
"문리": "munli"
|
92
|
+
"물엿": "mul-yeos"
|
93
|
+
"굳이": "gud-i"
|
94
|
+
"좋다": "johda"
|
95
|
+
"가곡": "gagog"
|
96
|
+
"조랑말": "jolangmal"
|
97
|
+
"없었습니다.": "eobs-eoss-seubnida"
|
data/test/test_gimchi.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'helper'
|
4
|
+
|
5
|
+
class TestGimchi < Test::Unit::TestCase
|
6
|
+
def test_korean_char
|
7
|
+
ko = Gimchi::Korean.new
|
8
|
+
assert_equal true, ko.korean_char?('ㄱ') # true
|
9
|
+
assert_equal true, ko.korean_char?('ㅏ') # true
|
10
|
+
assert_equal true, ko.korean_char?('가') # true
|
11
|
+
assert_equal true, ko.korean_char?('값') # true
|
12
|
+
|
13
|
+
assert_equal false, ko.korean_char?('a') # false
|
14
|
+
assert_equal false, ko.korean_char?('1') # false
|
15
|
+
assert_raise(ArgumentError) { ko.korean_char?('두자') }
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_complete_korean_char
|
19
|
+
ko = Gimchi::Korean.new
|
20
|
+
|
21
|
+
assert_equal false, ko.complete_korean_char?('ㄱ') # false
|
22
|
+
assert_equal false, ko.complete_korean_char?('ㅏ') # false
|
23
|
+
assert_equal true, ko.complete_korean_char?('가') # true
|
24
|
+
assert_equal true, ko.complete_korean_char?('값') # true
|
25
|
+
|
26
|
+
assert_equal false, ko.korean_char?('a') # false
|
27
|
+
assert_equal false, ko.korean_char?('1') # false
|
28
|
+
assert_raise(ArgumentError) { ko.korean_char?('두자') }
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_dissect
|
32
|
+
ko = Gimchi::Korean.new
|
33
|
+
|
34
|
+
arr = ko.dissect '이것은 한글입니다.'
|
35
|
+
# [이, 것, 은, " ", 한, 글, 입, 니, 다, "."]
|
36
|
+
|
37
|
+
assert_equal 10, arr.length
|
38
|
+
assert_equal Gimchi::Korean::Char, arr[0].class
|
39
|
+
assert_equal Gimchi::Korean::Char, arr[1].class
|
40
|
+
assert_equal Gimchi::Korean::Char, arr[2].class
|
41
|
+
|
42
|
+
ch = arr[2]
|
43
|
+
assert_equal 'ㅇ', ch.chosung
|
44
|
+
assert_equal 'ㅡ', ch.jungsung
|
45
|
+
assert_equal 'ㄴ', ch.jongsung
|
46
|
+
|
47
|
+
ch.chosung = 'ㄱ'
|
48
|
+
ch.jongsung = 'ㅁ'
|
49
|
+
assert_equal '금', ch.to_s
|
50
|
+
assert_equal 3, ch.to_a.length
|
51
|
+
|
52
|
+
ch.jongsung = nil
|
53
|
+
assert_equal '그', ch.to_s
|
54
|
+
assert_equal 2, ch.to_a.compact.length
|
55
|
+
assert_equal true, ch.complete?
|
56
|
+
assert_equal false, ch.partial?
|
57
|
+
|
58
|
+
ch.chosung = nil
|
59
|
+
assert_equal 1, ch.to_a.compact.length
|
60
|
+
assert_equal false, ch.complete?
|
61
|
+
assert_equal true, ch.partial?
|
62
|
+
assert_equal 'ㅡ', ch.to_s
|
63
|
+
|
64
|
+
ch.jungsung = nil
|
65
|
+
assert_equal 0, ch.to_a.compact.length
|
66
|
+
assert_equal false, ch.complete?
|
67
|
+
assert_equal true, ch.partial?
|
68
|
+
assert_equal '', ch.to_s
|
69
|
+
|
70
|
+
assert_raise(ArgumentError) { ch.chosung = 'ㅡ' }
|
71
|
+
assert_raise(ArgumentError) { ch.chosung = 'ㄳ' }
|
72
|
+
assert_raise(ArgumentError) { ch.jungsung = 'ㄱ' }
|
73
|
+
assert_raise(ArgumentError) { ch.jongsung = 'ㅠ' }
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_read_number
|
77
|
+
ko = Gimchi::Korean.new
|
78
|
+
assert_equal "천 구백 구십 구", ko.read_number(1999)
|
79
|
+
assert_equal "마이너스 백점일이삼", ko.read_number(- 100.123)
|
80
|
+
assert_equal "천 오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사",
|
81
|
+
ko.read_number("153,191,100,678.3214")
|
82
|
+
|
83
|
+
# 나이, 시간 ( -살, -시 )
|
84
|
+
assert_equal "나는 스무살", ko.read_number("나는 20살")
|
85
|
+
assert_equal "너는 열세 살", ko.read_number("너는 13 살")
|
86
|
+
assert_equal "지금은 일곱시 삼십분", ko.read_number("지금은 7시 30분")
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_pronounce
|
90
|
+
require 'yaml'
|
91
|
+
require 'ansi'
|
92
|
+
|
93
|
+
ko = Gimchi::Korean.new
|
94
|
+
cnt = 0
|
95
|
+
s = 0
|
96
|
+
test_set = YAML.load File.read(File.dirname(__FILE__) + '/pronunciation.yml')
|
97
|
+
test_set.each do | k, v |
|
98
|
+
cnt += 1
|
99
|
+
k = k.gsub(/[-]/, '')
|
100
|
+
t, tfs = ko.pronounce(k, :pronounce_each_char => false, :slur => k.include?(' '), :debug => true)
|
101
|
+
if v.include? t.gsub(/\s/, '')
|
102
|
+
r = ANSI::Code::BLUE + ANSI::Code::BOLD + v.join(' / ') + ANSI::Code::RESET if v.length > 1
|
103
|
+
s += 1
|
104
|
+
else
|
105
|
+
r = ANSI::Code::RED + ANSI::Code::BOLD + v.join(' / ') + ANSI::Code::RESET
|
106
|
+
end
|
107
|
+
puts "#{k} => #{t} (#{ko.romanize t}) [#{tfs.join(' > ')}] #{r}"
|
108
|
+
end
|
109
|
+
puts "#{s} / #{cnt}"
|
110
|
+
# FIXME
|
111
|
+
assert s >= 410
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_romanize
|
115
|
+
ko = Gimchi::Korean.new
|
116
|
+
|
117
|
+
cnt = 0
|
118
|
+
s = 0
|
119
|
+
test_set = YAML.load File.read(File.dirname(__FILE__) + '/romanization.yml')
|
120
|
+
test_set.each do | k, v |
|
121
|
+
cnt += 1
|
122
|
+
rom = ko.romanize k.sub(/\[.*/, '')
|
123
|
+
if rom.downcase.gsub(/[\s-]/, '') == v.downcase.gsub(/\(.*\)/, '').gsub(/[\s-]/, '')
|
124
|
+
r = ANSI::Code::BLUE + ANSI::Code::BOLD + rom + ANSI::Code::RESET
|
125
|
+
s += 1
|
126
|
+
else
|
127
|
+
r = ANSI::Code::RED + ANSI::Code::BOLD + rom + ANSI::Code::RESET
|
128
|
+
end
|
129
|
+
puts "#{k} => #{r} [#{v}]"
|
130
|
+
end
|
131
|
+
puts "#{s} / #{cnt}"
|
132
|
+
# FIXME
|
133
|
+
assert s >= 55
|
134
|
+
end
|
135
|
+
end
|
metadata
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gimchi
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Junegunn Choi
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-04-05 00:00:00.000000000 +09:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: bundler
|
17
|
+
requirement: &2156236300 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ~>
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.0.0
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *2156236300
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: jeweler
|
28
|
+
requirement: &2156235820 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.5.2
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *2156235820
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: rcov
|
39
|
+
requirement: &2156235340 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
type: :development
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *2156235340
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: ansi
|
50
|
+
requirement: &2156234860 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 1.2.2
|
56
|
+
type: :development
|
57
|
+
prerelease: false
|
58
|
+
version_requirements: *2156234860
|
59
|
+
description: Gimchi knows how to pronounce Korean string and how to write them in
|
60
|
+
roman alphabet.
|
61
|
+
email: junegunn.c@gmail.com
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files:
|
65
|
+
- LICENSE.txt
|
66
|
+
- README.ko.rdoc
|
67
|
+
- README.rdoc
|
68
|
+
files:
|
69
|
+
- config/default.yml
|
70
|
+
- lib/gimchi.rb
|
71
|
+
- lib/gimchi/char.rb
|
72
|
+
- lib/gimchi/korean.rb
|
73
|
+
- lib/gimchi/pronouncer.rb
|
74
|
+
- LICENSE.txt
|
75
|
+
- README.ko.rdoc
|
76
|
+
- README.rdoc
|
77
|
+
- test/helper.rb
|
78
|
+
- test/pronunciation.yml
|
79
|
+
- test/romanization.yml
|
80
|
+
- test/test_gimchi.rb
|
81
|
+
has_rdoc: true
|
82
|
+
homepage: http://github.com/junegunn/gimchi
|
83
|
+
licenses:
|
84
|
+
- MIT
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options: []
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
none: false
|
91
|
+
requirements:
|
92
|
+
- - ! '>='
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '0'
|
95
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
none: false
|
97
|
+
requirements:
|
98
|
+
- - ! '>='
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
requirements: []
|
102
|
+
rubyforge_project:
|
103
|
+
rubygems_version: 1.6.2
|
104
|
+
signing_key:
|
105
|
+
specification_version: 3
|
106
|
+
summary: Gimchi reads Korean.
|
107
|
+
test_files:
|
108
|
+
- test/helper.rb
|
109
|
+
- test/pronunciation.yml
|
110
|
+
- test/romanization.yml
|
111
|
+
- test/test_gimchi.rb
|