hangul_tools 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +45 -0
- data/Rakefile +14 -0
- data/hangul_tools.gemspec +25 -0
- data/lib/hangul_tools/version.rb +5 -0
- data/lib/hangul_tools.rb +189 -0
- data/test/romanization_test.rb +118 -0
- metadata +94 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4a7d8b03601df82bc4b060a6503025d9611f37d9
|
4
|
+
data.tar.gz: 0e0d1806fc74794b1a8f64eb53a365d56f4e5ade
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4f8bd7f2a8f49cabeacc1344fefd5ce635da1bc91b3b9cc1b94b2324cb3ef1b1cfb52b095b6f490abc1d2a224c11ceb4f93f5b7924ced36915b847455a734aeb
|
7
|
+
data.tar.gz: d7f2c2e6773651e9a03f0d9076891ae48b2d04283a49a5a8e6a98b1fa53fa8e1100def72312983d49999de485b398d2b553dfd2aaeabd01b4d22a8dc409decad
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# Hangul
|
2
|
+
|
3
|
+
A library for automatically romanizing Korean text.
|
4
|
+
|
5
|
+
Supports the two primary systems for romanizing Hangul:
|
6
|
+
|
7
|
+
* McCune-Reischauer (an older system which includes diacritics and has a stronger emphasis on representing actual pronunciation)
|
8
|
+
* Revised (the standard used by the government of South Korea; does not use diacritics but the pronunciation is less obvious)
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
Given a string containing Korean text (possibly intermingled with non-Korean characters):
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
require 'hangul_tools'
|
16
|
+
|
17
|
+
s = 'I told him, "안녕하십니까."'
|
18
|
+
|
19
|
+
puts Hangul.romanize(s, :revised)
|
20
|
+
# => I told him, "annyeonghasimnikka."
|
21
|
+
|
22
|
+
puts Hangul.romanize(s, :mccune_reischauer)
|
23
|
+
# => I told him, "annyŏnghashimnikka."
|
24
|
+
```
|
25
|
+
|
26
|
+
If you omit the system to use, it defaults to revised:
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
s = 'I told him, "안녕하십니까."'
|
30
|
+
|
31
|
+
puts Hangul.romanize(s)
|
32
|
+
# => I told him, "annyeonghasimnikka."
|
33
|
+
```
|
34
|
+
|
35
|
+
## Caveats
|
36
|
+
|
37
|
+
The results are not guaranteed to be accurate for all inputs, and are not even guaranteed to conform exactly to the Revised or McCune-Reischauer systems.
|
38
|
+
|
39
|
+
If you notice an inaccuracy, please:
|
40
|
+
|
41
|
+
1. Write a failing test that demonstrates the problem.
|
42
|
+
2. Fix the problem.
|
43
|
+
3. Submit a pull request.
|
44
|
+
|
45
|
+
Thank you!
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rubygems/tasks'
|
4
|
+
|
5
|
+
task default: :test
|
6
|
+
|
7
|
+
Rake::TestTask.new do |t|
|
8
|
+
t.libs << "test"
|
9
|
+
t.test_files = FileList['test/**/*_test.rb']
|
10
|
+
t.verbose = true
|
11
|
+
t.warning = false
|
12
|
+
end
|
13
|
+
|
14
|
+
Gem::Tasks.new
|
@@ -0,0 +1,25 @@
|
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require "hangul_tools/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.version = HangulTools::Version::STRING
|
7
|
+
gem.name = "hangul_tools"
|
8
|
+
gem.authors = ["Jamis Buck"]
|
9
|
+
gem.email = ["jamis@jamisbuck.org"]
|
10
|
+
gem.homepage = "http://github.com/jamis/hangul-tools"
|
11
|
+
gem.summary = "Romanize Korean text"
|
12
|
+
gem.description = "Convert Korean text to latin characters, using either the Revised system or McCune-Reischauer."
|
13
|
+
gem.license = 'MIT'
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($\)
|
16
|
+
gem.test_files = gem.files.grep(%r{^test/})
|
17
|
+
gem.require_paths = ["lib"]
|
18
|
+
|
19
|
+
##
|
20
|
+
# Development dependencies
|
21
|
+
#
|
22
|
+
gem.add_development_dependency "rake"
|
23
|
+
gem.add_development_dependency "test-unit"
|
24
|
+
gem.add_development_dependency "rubygems-tasks", "~> 0"
|
25
|
+
end
|
data/lib/hangul_tools.rb
ADDED
@@ -0,0 +1,189 @@
|
|
1
|
+
# Courtesy of algorithms described at:
|
2
|
+
# http://gernot-katzers-spice-pages.com/var/korean_hangul_unicode.html
|
3
|
+
|
4
|
+
module HangulTools
|
5
|
+
def self.romanize(text, system=:revised)
|
6
|
+
matrix = matrices[system]
|
7
|
+
vowels = VOWELS[system]
|
8
|
+
|
9
|
+
text.scan(/[\uAC00-\uD7a3]+|[^\uAC00-\uD7a3]+/).map.with_index do |string, idx|
|
10
|
+
if string =~ /[\uAC00-\uD7a3]/
|
11
|
+
romanize_with_system(string, system, idx > 0 ? :voiced : :initial)
|
12
|
+
else
|
13
|
+
string
|
14
|
+
end
|
15
|
+
end.join
|
16
|
+
end
|
17
|
+
|
18
|
+
LEADS = [ nil, 'g', 'gg', 'n', 'd', 'dd', 'r', 'm', 'b', 'bb' ,'s', 'ss', nil, 'j', 'jj', 'ch', 'k', 't', 'p', 'h' ]
|
19
|
+
TAILS = [ nil, 'g', 'gg', 'gs', 'n', 'nj', 'nh', 'd', 'l', 'lg', 'lm', 'lb', 'ls', 'lt', 'lp', 'lh', 'm', 'b', 'bs', 's', 'ss', 'ng', 'j', 'ch', 'k', 't', 'p', 'h' ]
|
20
|
+
|
21
|
+
# it is assumed that `text` contains nothing but hangul codepoints
|
22
|
+
def self.decompose(text)
|
23
|
+
text.codepoints.map do |point|
|
24
|
+
tail = (point - 44032) % 28
|
25
|
+
vowel = 1 + ((point - 44032 - tail) % 588) / 28
|
26
|
+
lead = 1 + (point - 44032) / 588
|
27
|
+
|
28
|
+
[lead, vowel, tail]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.romanize_with_system(text, system, voiced)
|
33
|
+
matrix = matrices[system]
|
34
|
+
vowels = VOWELS[system]
|
35
|
+
blends = BLENDS[system]
|
36
|
+
|
37
|
+
syllables = decompose(text)
|
38
|
+
phonemes = []
|
39
|
+
|
40
|
+
syllables.each.with_index do |(lead, vowel, tail), idx|
|
41
|
+
prior = (idx > 0) ? TAILS[syllables[idx-1][2].to_i] : voiced
|
42
|
+
final = syllables[idx+1] ? false : true
|
43
|
+
|
44
|
+
phonemes << (matrix[prior] || {})[LEADS[lead]]
|
45
|
+
phonemes << vowels[vowel]
|
46
|
+
|
47
|
+
if final
|
48
|
+
phonemes << (matrix[TAILS[tail]] || {})[:final]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
result = phonemes.compact.join
|
53
|
+
|
54
|
+
blends.each do |pattern, blend|
|
55
|
+
result = result.gsub(pattern, blend)
|
56
|
+
end
|
57
|
+
|
58
|
+
result
|
59
|
+
end
|
60
|
+
|
61
|
+
VOWELS = {
|
62
|
+
revised: [ nil, 'a', 'ae', 'ya', 'yae', 'eo', 'e', 'yeo', 'ye', 'o', 'wa', 'wae', 'oe', 'yo', 'u', 'weo', 'we', 'wi', 'yu', 'eu', 'yi', 'i' ],
|
63
|
+
mccune_reischauer: [ nil, 'a', 'ae', 'ya', 'yae', 'ŏ', 'e', 'yŏ', 'ye', 'o', 'wa', 'wae', 'oe', 'yo', 'u', 'wŏ', 'we', 'wi', 'yu', 'ŭ', 'ŭi', 'i' ]
|
64
|
+
}
|
65
|
+
|
66
|
+
BLENDS = {
|
67
|
+
revised: {},
|
68
|
+
mccune_reischauer: { "si" => "shi", "sy" => "shy", "swi" => "shwi" }
|
69
|
+
}
|
70
|
+
|
71
|
+
def self.matrices
|
72
|
+
@matrices ||= {}.tap do |hash|
|
73
|
+
raw = File.read(__FILE__).lines
|
74
|
+
split_at = raw.index("__END__\n")
|
75
|
+
|
76
|
+
key = lines = nil
|
77
|
+
raw[(split_at+1)..-1].each do |line|
|
78
|
+
if line =~ /^(\w+):$/
|
79
|
+
hash[key.to_sym] = parse_matrix(lines) if lines
|
80
|
+
key = $1
|
81
|
+
lines = []
|
82
|
+
elsif line !~ /^$/
|
83
|
+
lines << line
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
hash[key.to_sym] = parse_matrix(lines) if lines
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.parse_matrix(lines)
|
92
|
+
lead = lines.first.split(/\s+/)[1..-1].map do |v|
|
93
|
+
if v == '_'
|
94
|
+
nil
|
95
|
+
elsif v == 'final'
|
96
|
+
:final
|
97
|
+
else
|
98
|
+
v
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
matrix = {}
|
103
|
+
|
104
|
+
lines[1..-1].each do |line|
|
105
|
+
tail, *sounds = line.split(/\s+/)
|
106
|
+
|
107
|
+
if tail == 'initial'
|
108
|
+
tail = :initial
|
109
|
+
elsif tail == 'voiced'
|
110
|
+
tail = :voiced
|
111
|
+
elsif tail == '_'
|
112
|
+
tail = nil
|
113
|
+
end
|
114
|
+
|
115
|
+
sounds.map! { |s| s == '_' ? nil : s }
|
116
|
+
|
117
|
+
matrix[tail] = Hash[lead.zip(sounds)]
|
118
|
+
end
|
119
|
+
|
120
|
+
matrix
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
__END__
|
125
|
+
revised:
|
126
|
+
t\l g gg n d dd r m b bb s ss _ j jj ch k t p h final
|
127
|
+
initial g kk n d tt r m b pp s ss _ j jj ch k t p h _
|
128
|
+
voiced g kk n d tt r m b pp s ss _ j jj ch k t p h _
|
129
|
+
g kg kg ngn kd ktt ngn ngm kb kpp ks kss g kj kjj kch k-k kt kp kh k
|
130
|
+
gg kg kg ngn kd ktt ngn ngm kb kpp ks kss kk kj kjj kch k-k kt kp kh k
|
131
|
+
gs kk kk ngn kd ktt ngn ngm kb kpp ks kss ks kj kjj kch k-k kt kp kh k
|
132
|
+
n n-g n-kk nn nd ntt ll nm nb npp ns nss n nj njj nch nk nt np nh n
|
133
|
+
nj ntg ntkk nn ntd ntt ll nm ntb ntpp nts ntss nj njj njj nch nk nt np nh nt
|
134
|
+
nh nk nkk nn nt ntt ll nm np npp ns nss nh nch njj nch nk nt np nh n
|
135
|
+
d tg tkk nn td tt nn nm tb tpp ts tss d tj tjj tch tk tt tp th t
|
136
|
+
l lg lkk ll ld ltt ll lm lb lpp ls lss r lj ljj lch lk lt lp lh l
|
137
|
+
lg lkk lkk lng lkd lktt lngn lngm lkb lkpp lks lkss lg lkj lkjj lkch lk lkt lkp lk lk
|
138
|
+
lm lmg lmkk lmn lmd lmtt lmn lmm lmb lmpp lms lmss lm lmj lmjj lmch lmk lmt lmp lmh lm
|
139
|
+
lb lbg lbkk lmn lbd lbtt lmn lmm lpb lpp lbs lbss lb lbj lbjj lbch lbk lbt lbp lbh lp
|
140
|
+
ls ltk ltkk ll ltt ltt ll lm lpp lpp lss lss ls ljj ljj lch lk lt lp lt lt
|
141
|
+
lt ltk ltkk ll ltt ltt ll lm lpp lpp lss lss lt ljj ljj lch lk lt lp lt lt
|
142
|
+
lp lpk lpkk lmn lpt lptt lmn lm lpp lpp lps lpss lp lpj lpjj lpch lpk lpt lp lpt lp
|
143
|
+
lh lk lkk ll lt ltt ll lm lp lpp ls lss lh lch ljj lch lk lt lp lh l
|
144
|
+
m mg mkk mn md mtt mn mm mb mpp ms mss m mj mjj mch mk mt mp mh m
|
145
|
+
b pg pkk mn pd ptt mn mm pb pp ps pss b pj pjj pch pk pt p-p ph p
|
146
|
+
bs pg pkk mn pd ptt mn mm pb pp pss pss ps pjj pjj pch pk pt ptp pt p
|
147
|
+
s tg tkk nn td tt nn nm tb tpp ts tss s tj tjj tch tk t-t tp th t
|
148
|
+
ss tg tkk nn td tt nn nm tb tpp ts tss ss tj tjj tch tk t-t tp th t
|
149
|
+
_ g kk n d tt r m b pp s ss _ j jj ch k t p h _
|
150
|
+
ng ngg ngkk ngn ngd ngtt ngn ngm ngb ngpp ngs ngss ng ngj ngjj ngch ngk ngt ngp ngh ng
|
151
|
+
j tg tkk nn td tt nn nm tb tpp ts tss j tj tjj tch tk t-t tp th t
|
152
|
+
ch tg tkk nn td tt nn nm tb tpp ts tss ch tj tjj tch tk t-t tp th t
|
153
|
+
k kg kg ngn kd ktt ngn ngm kb kpp ks kss k kj kjj kch k-k kt kp kh k
|
154
|
+
t tg tkk nn td tt nn nm tb tpp ts tss t tj tjj tch tk tt tp th t
|
155
|
+
p pg pkk mn pd ptt mn mm pb pp ps pss p pj pjj pch pk pt p-p ph p
|
156
|
+
h k kk nn t tt nn nm p pp hs hss h ch ch tch tk tt tp t t
|
157
|
+
|
158
|
+
mccune_reischauer:
|
159
|
+
t\l g gg n d dd r m b bb s ss _ j jj ch k t p h final
|
160
|
+
initial k kk n t tt r m p pp s ss _ ch tch ch' k' t' p' h _
|
161
|
+
voiced g kk n d dd r m b bb s ss _ j jj ch' k' t' p' h _
|
162
|
+
g kk kk ngn kt ktt ngn ngm kp kpp ks kss g kj ktch kch' kk' kt' kp' kh k
|
163
|
+
gg kk kk ngn kt ktt ngn ngm kp kpp ks kss kk kj ktch kch' kk' kt' kp' kh kk
|
164
|
+
gs kk kk ngn kt ktt ngn ngm kb kpp ks kss ks ktch ktch kch' kk' kt' kp' kh k
|
165
|
+
n n'g n'kk nn nd ntt ll nm nb npp ns nss n nj ntch nch' nk' nt' np' nh n
|
166
|
+
nj nkk nkk nn ntt ntt ll nm npp npp nss nss nj ntch ntch nch' nk' nt' np' nch' nt
|
167
|
+
nh nk' nkk nn nt' ntt ll nm np' npp nss nss nh nch' ntch nch' nk' nt' np' nh n
|
168
|
+
d tk tkk nn tt tt nn nm tp tpp ss ss d tch tch tch' tk' tt' tp' t'h t
|
169
|
+
l lg lkk ll ld ltt ll lm lb lbb ls lss r lj lch lch' lk' lt' lp' rh l
|
170
|
+
lg lkk lkk ngn ltt ltt ll lngm lkb lkbb lks lkss lg lkj lktch lkch' lk' lkt' lkp' lk' lk
|
171
|
+
lm lmk lmkk lmn lmd lmdd lmn lmm lb lbb lms lmss lm lmj lmch lmch' lmk' lmt' lmp' lmh lm
|
172
|
+
lb lbk lbkk lmn lbd lbdd lmn lmm lb lbb lbs lbss lb lbj lbch lbch' lbk' lbt' lbp' lbh lp
|
173
|
+
ls ltk ltkk ll ltt ltt ll lm lpp lpp lss lss ls ltch ltch lch' lk' lt' lp' lt' lt
|
174
|
+
lt ltk ltkk ll ltt ltt ll lm lpp lpp lss lss lt ltch ltch ltch' ltk' lt' lp' lt' lt
|
175
|
+
lp lpg lpkk lmn lpd lptt lmn lmm lbb lbb lps lpss lp' lpj lptch lpch' lpk' lpt' lp' lp' lp'
|
176
|
+
lh lk' lkk ll lt' ltt ll lm lp' lpp ls lss lh lch' ltch lch' lk' lt' lp' lh l
|
177
|
+
m mg mkk mn md mdd mn mm mb mbb ms mss m mj mch mch' mk' mt' mp' mh m
|
178
|
+
b pk pkk mn pt ptt mn mm pp pp ps pss b pch ptch pch' pk' pt' pp' p'h p
|
179
|
+
bs pkk pkk mn ptt ptt mn mm pp pp pss pss ps ptch ptch ptch' pk' pt' pp' pt' p
|
180
|
+
s tk tkk nn tt tt nn nm tp tpp ss ss s tch tch tch' tk' tt' tp' t'h t
|
181
|
+
ss tk tkk nn tt tt nn nm tp tpp ss ss ss tch tch tch' tk' tt' tp' t'h t
|
182
|
+
_ g kk n d dd r m b bb s ss _ j jj ch' k' t' p' h _
|
183
|
+
ng ngg nkk ngn ngd ngdd ngn ngm ngb ngbb ngs ngss ng ngj ngjj ngch' ngk' ngt' ngp' ngh ng
|
184
|
+
j tk tkk nn tt tt nn nm tp tpp ss ss j tch tch tch' tk' tt' tp' ch' t
|
185
|
+
ch tk tkk nn tt tt nn nm tp tpp ss ss ch' tch tch tch' tk' tt' tp' ch' t
|
186
|
+
k kk kk ngn kt ktt ngn ngm kp kpp ks kss k' kch ktch kch' kk' kt' kp' k'h k
|
187
|
+
t tk tkk nn tt tt nn nm tp tpp ss ss t' tch tch tch' tk' tt' tp' t'h t
|
188
|
+
p pk pkk mn pt ptt mn mm pp pp ps pss p' pch ptch pch' pk' pt' pp' p'h p
|
189
|
+
h k' kk nn t' tt l m p' pp hs hss h ch' tch ch' k' t' p' h t
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'hangul_tools'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class RomanizationTest < Test::Unit::TestCase
|
5
|
+
def test_decompose_with_vowels
|
6
|
+
hangul = %w( 아 애 야 얘 어 에 여 예 오 와 왜 외 요 우 워 웨 위 유 으 의 이 )
|
7
|
+
hangul.each.with_index do |given, idx|
|
8
|
+
assert_equal [[ 12, idx + 1, 0 ]], HangulTools.decompose(given)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_decompose_with_lead_consonants
|
13
|
+
hangul = %w( 가 까 나 다 따 라 마 바 빠 사 싸 아 자 짜 차 카 타 파 하 )
|
14
|
+
hangul.each.with_index do |given, idx|
|
15
|
+
assert_equal [[ idx + 1, 1, 0 ]], HangulTools.decompose(given)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_decompose_with_tail_consonants
|
20
|
+
hangul = %w( 악 앆 앇 안 앉 않 앋 알 앍 앎 앏 앐 앑 앒 앓 암 압 앖 앗 았 앙 앚 앛 앜 앝 앞 앟 )
|
21
|
+
hangul.each.with_index do |given, idx|
|
22
|
+
assert_equal [[ 12, 1, idx + 1 ]], HangulTools.decompose(given)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_revised_romanization_of_vowels
|
27
|
+
hangul = %w( 아 애 야 얘 어 에 여 예 오 와 왜 외 요 우 워 웨 위 유 으 의 이 )
|
28
|
+
latin = %w( a ae ya yae eo e yeo ye o wa wae oe yo u weo we wi yu eu yi i )
|
29
|
+
|
30
|
+
hangul.zip(latin).each do |(given, expect)|
|
31
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
32
|
+
actual = HangulTools.romanize(given, :revised)
|
33
|
+
assert_equal expect, actual
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_revised_romanization_of_lead_consonants
|
39
|
+
hangul = %w( 가 까 나 다 따 라 마 바 빠 사 싸 아 자 짜 차 카 타 파 하 )
|
40
|
+
latin = %w( ga kka na da tta ra ma ba ppa sa ssa a ja jja cha ka ta pa ha )
|
41
|
+
|
42
|
+
hangul.zip(latin).each do |(given, expect)|
|
43
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
44
|
+
actual = HangulTools.romanize(given, :revised)
|
45
|
+
assert_equal expect, actual
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_revised_romanization_of_tail_consonants
|
51
|
+
hangul = %w( 악 앆 앇 안 앉 않 앋 알 앍 앎 앏 앐 앑 앒 앓 암 압 앖 앗 았 앙 앚 앛 앜 앝 앞 앟 )
|
52
|
+
latin = %w( ak ak ak an ant an at al alk alm alp alt alt alp al am ap ap at at ang at at ak at ap at )
|
53
|
+
|
54
|
+
hangul.zip(latin).each do |(given, expect)|
|
55
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
56
|
+
actual = HangulTools.romanize(given, :revised)
|
57
|
+
assert_equal expect, actual
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_revised_romanization_concatenation_of_consecutive_syllables
|
63
|
+
given = "안녕하십니까"
|
64
|
+
actual = HangulTools.romanize(given, :revised)
|
65
|
+
|
66
|
+
assert_equal "annyeonghasimnikka", actual
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_romanization_of_mixed_hangul_and_latin_romanizes_only_hangul
|
70
|
+
given = 'I said, "안녕하십니까," and she said "누구세요?"'
|
71
|
+
actual = HangulTools.romanize(given, :revised)
|
72
|
+
|
73
|
+
assert_equal 'I said, "annyeonghasimnikka," and she said "nuguseyo?"', actual
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_mccune_reischauer_romanization_of_vowels
|
77
|
+
hangul = %w( 아 애 야 얘 어 에 여 예 오 와 왜 외 요 우 워 웨 위 유 으 의 이 )
|
78
|
+
latin = %w( a ae ya yae ŏ e yŏ ye o wa wae oe yo u wŏ we wi yu ŭ ŭi i )
|
79
|
+
|
80
|
+
hangul.zip(latin).each do |(given, expect)|
|
81
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
82
|
+
actual = HangulTools.romanize(given, :mccune_reischauer)
|
83
|
+
assert_equal expect, actual
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_mccune_reischauer_romanization_of_lead_consonants
|
89
|
+
hangul = %w( 가 까 나 다 따 라 마 바 빠 사 싸 아 자 짜 차 카 타 파 하 )
|
90
|
+
latin = %w( ka kka na ta tta ra ma pa ppa sa ssa a cha tcha ch'a k'a t'a p'a ha )
|
91
|
+
|
92
|
+
hangul.zip(latin).each do |(given, expect)|
|
93
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
94
|
+
actual = HangulTools.romanize(given, :mccune_reischauer)
|
95
|
+
assert_equal expect, actual
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_mccune_reischauer_romanization_of_tail_consonants
|
101
|
+
hangul = %w( 악 앆 앇 안 앉 않 앋 알 앍 앎 앏 앐 앑 앒 앓 암 압 앖 앗 았 앙 앚 앛 앜 앝 앞 앟 )
|
102
|
+
latin = %w( ak akk ak an ant an at al alk alm alp alt alt alp' al am ap ap at at ang at at ak at ap at )
|
103
|
+
|
104
|
+
hangul.zip(latin).each do |(given, expect)|
|
105
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
106
|
+
actual = HangulTools.romanize(given, :mccune_reischauer)
|
107
|
+
assert_equal expect, actual
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_mccune_reischauer_romanization_concatenation_of_consecutive_syllables
|
113
|
+
given = "안녕하십니까"
|
114
|
+
actual = HangulTools.romanize(given, :mccune_reischauer)
|
115
|
+
|
116
|
+
assert_equal "annyŏnghashimnikka", actual
|
117
|
+
end
|
118
|
+
end
|
metadata
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hangul_tools
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jamis Buck
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-11-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: test-unit
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rubygems-tasks
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Convert Korean text to latin characters, using either the Revised system
|
56
|
+
or McCune-Reischauer.
|
57
|
+
email:
|
58
|
+
- jamis@jamisbuck.org
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- README.md
|
64
|
+
- Rakefile
|
65
|
+
- hangul_tools.gemspec
|
66
|
+
- lib/hangul_tools.rb
|
67
|
+
- lib/hangul_tools/version.rb
|
68
|
+
- test/romanization_test.rb
|
69
|
+
homepage: http://github.com/jamis/hangul-tools
|
70
|
+
licenses:
|
71
|
+
- MIT
|
72
|
+
metadata: {}
|
73
|
+
post_install_message:
|
74
|
+
rdoc_options: []
|
75
|
+
require_paths:
|
76
|
+
- lib
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0'
|
87
|
+
requirements: []
|
88
|
+
rubyforge_project:
|
89
|
+
rubygems_version: 2.6.13
|
90
|
+
signing_key:
|
91
|
+
specification_version: 4
|
92
|
+
summary: Romanize Korean text
|
93
|
+
test_files:
|
94
|
+
- test/romanization_test.rb
|