hangul_tools 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +45 -0
- data/Rakefile +14 -0
- data/hangul_tools.gemspec +25 -0
- data/lib/hangul_tools/version.rb +5 -0
- data/lib/hangul_tools.rb +189 -0
- data/test/romanization_test.rb +118 -0
- metadata +94 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4a7d8b03601df82bc4b060a6503025d9611f37d9
|
4
|
+
data.tar.gz: 0e0d1806fc74794b1a8f64eb53a365d56f4e5ade
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4f8bd7f2a8f49cabeacc1344fefd5ce635da1bc91b3b9cc1b94b2324cb3ef1b1cfb52b095b6f490abc1d2a224c11ceb4f93f5b7924ced36915b847455a734aeb
|
7
|
+
data.tar.gz: d7f2c2e6773651e9a03f0d9076891ae48b2d04283a49a5a8e6a98b1fa53fa8e1100def72312983d49999de485b398d2b553dfd2aaeabd01b4d22a8dc409decad
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# Hangul
|
2
|
+
|
3
|
+
A library for automatically romanizing Korean text.
|
4
|
+
|
5
|
+
Supports the two primary systems for romanizing Hangul:
|
6
|
+
|
7
|
+
* McCune-Reischauer (an older system which includes diacritics and has a stronger emphasis on representing actual pronunciation)
|
8
|
+
* Revised (the standard used by the government of South Korea; does not use diacritics but the pronunciation is less obvious)
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
Given a string containing Korean text (possibly intermingled with non-Korean characters):
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
require 'hangul_tools'
|
16
|
+
|
17
|
+
s = 'I told him, "안녕하십니까."'
|
18
|
+
|
19
|
+
puts Hangul.romanize(s, :revised)
|
20
|
+
# => I told him, "annyeonghasimnikka."
|
21
|
+
|
22
|
+
puts Hangul.romanize(s, :mccune_reischauer)
|
23
|
+
# => I told him, "annyŏnghashimnikka."
|
24
|
+
```
|
25
|
+
|
26
|
+
If you omit the system to use, it defaults to revised:
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
s = 'I told him, "안녕하십니까."'
|
30
|
+
|
31
|
+
puts Hangul.romanize(s)
|
32
|
+
# => I told him, "annyeonghasimnikka."
|
33
|
+
```
|
34
|
+
|
35
|
+
## Caveats
|
36
|
+
|
37
|
+
The results are not guaranteed to be accurate for all inputs, and are not even guaranteed to conform exactly to the Revised or McCune-Reischauer systems.
|
38
|
+
|
39
|
+
If you notice an inaccuracy, please:
|
40
|
+
|
41
|
+
1. Write a failing test that demonstrates the problem.
|
42
|
+
2. Fix the problem.
|
43
|
+
3. Submit a pull request.
|
44
|
+
|
45
|
+
Thank you!
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rubygems/tasks'
|
4
|
+
|
5
|
+
task default: :test
|
6
|
+
|
7
|
+
Rake::TestTask.new do |t|
|
8
|
+
t.libs << "test"
|
9
|
+
t.test_files = FileList['test/**/*_test.rb']
|
10
|
+
t.verbose = true
|
11
|
+
t.warning = false
|
12
|
+
end
|
13
|
+
|
14
|
+
Gem::Tasks.new
|
@@ -0,0 +1,25 @@
|
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require "hangul_tools/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.version = HangulTools::Version::STRING
|
7
|
+
gem.name = "hangul_tools"
|
8
|
+
gem.authors = ["Jamis Buck"]
|
9
|
+
gem.email = ["jamis@jamisbuck.org"]
|
10
|
+
gem.homepage = "http://github.com/jamis/hangul-tools"
|
11
|
+
gem.summary = "Romanize Korean text"
|
12
|
+
gem.description = "Convert Korean text to latin characters, using either the Revised system or McCune-Reischauer."
|
13
|
+
gem.license = 'MIT'
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($\)
|
16
|
+
gem.test_files = gem.files.grep(%r{^test/})
|
17
|
+
gem.require_paths = ["lib"]
|
18
|
+
|
19
|
+
##
|
20
|
+
# Development dependencies
|
21
|
+
#
|
22
|
+
gem.add_development_dependency "rake"
|
23
|
+
gem.add_development_dependency "test-unit"
|
24
|
+
gem.add_development_dependency "rubygems-tasks", "~> 0"
|
25
|
+
end
|
data/lib/hangul_tools.rb
ADDED
@@ -0,0 +1,189 @@
|
|
1
|
+
# Courtesy of algorithms described at:
|
2
|
+
# http://gernot-katzers-spice-pages.com/var/korean_hangul_unicode.html
|
3
|
+
|
4
|
+
module HangulTools
|
5
|
+
def self.romanize(text, system=:revised)
|
6
|
+
matrix = matrices[system]
|
7
|
+
vowels = VOWELS[system]
|
8
|
+
|
9
|
+
text.scan(/[\uAC00-\uD7a3]+|[^\uAC00-\uD7a3]+/).map.with_index do |string, idx|
|
10
|
+
if string =~ /[\uAC00-\uD7a3]/
|
11
|
+
romanize_with_system(string, system, idx > 0 ? :voiced : :initial)
|
12
|
+
else
|
13
|
+
string
|
14
|
+
end
|
15
|
+
end.join
|
16
|
+
end
|
17
|
+
|
18
|
+
LEADS = [ nil, 'g', 'gg', 'n', 'd', 'dd', 'r', 'm', 'b', 'bb' ,'s', 'ss', nil, 'j', 'jj', 'ch', 'k', 't', 'p', 'h' ]
|
19
|
+
TAILS = [ nil, 'g', 'gg', 'gs', 'n', 'nj', 'nh', 'd', 'l', 'lg', 'lm', 'lb', 'ls', 'lt', 'lp', 'lh', 'm', 'b', 'bs', 's', 'ss', 'ng', 'j', 'ch', 'k', 't', 'p', 'h' ]
|
20
|
+
|
21
|
+
# it is assumed that `text` contains nothing but hangul codepoints
|
22
|
+
def self.decompose(text)
|
23
|
+
text.codepoints.map do |point|
|
24
|
+
tail = (point - 44032) % 28
|
25
|
+
vowel = 1 + ((point - 44032 - tail) % 588) / 28
|
26
|
+
lead = 1 + (point - 44032) / 588
|
27
|
+
|
28
|
+
[lead, vowel, tail]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.romanize_with_system(text, system, voiced)
|
33
|
+
matrix = matrices[system]
|
34
|
+
vowels = VOWELS[system]
|
35
|
+
blends = BLENDS[system]
|
36
|
+
|
37
|
+
syllables = decompose(text)
|
38
|
+
phonemes = []
|
39
|
+
|
40
|
+
syllables.each.with_index do |(lead, vowel, tail), idx|
|
41
|
+
prior = (idx > 0) ? TAILS[syllables[idx-1][2].to_i] : voiced
|
42
|
+
final = syllables[idx+1] ? false : true
|
43
|
+
|
44
|
+
phonemes << (matrix[prior] || {})[LEADS[lead]]
|
45
|
+
phonemes << vowels[vowel]
|
46
|
+
|
47
|
+
if final
|
48
|
+
phonemes << (matrix[TAILS[tail]] || {})[:final]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
result = phonemes.compact.join
|
53
|
+
|
54
|
+
blends.each do |pattern, blend|
|
55
|
+
result = result.gsub(pattern, blend)
|
56
|
+
end
|
57
|
+
|
58
|
+
result
|
59
|
+
end
|
60
|
+
|
61
|
+
VOWELS = {
|
62
|
+
revised: [ nil, 'a', 'ae', 'ya', 'yae', 'eo', 'e', 'yeo', 'ye', 'o', 'wa', 'wae', 'oe', 'yo', 'u', 'weo', 'we', 'wi', 'yu', 'eu', 'yi', 'i' ],
|
63
|
+
mccune_reischauer: [ nil, 'a', 'ae', 'ya', 'yae', 'ŏ', 'e', 'yŏ', 'ye', 'o', 'wa', 'wae', 'oe', 'yo', 'u', 'wŏ', 'we', 'wi', 'yu', 'ŭ', 'ŭi', 'i' ]
|
64
|
+
}
|
65
|
+
|
66
|
+
BLENDS = {
|
67
|
+
revised: {},
|
68
|
+
mccune_reischauer: { "si" => "shi", "sy" => "shy", "swi" => "shwi" }
|
69
|
+
}
|
70
|
+
|
71
|
+
def self.matrices
|
72
|
+
@matrices ||= {}.tap do |hash|
|
73
|
+
raw = File.read(__FILE__).lines
|
74
|
+
split_at = raw.index("__END__\n")
|
75
|
+
|
76
|
+
key = lines = nil
|
77
|
+
raw[(split_at+1)..-1].each do |line|
|
78
|
+
if line =~ /^(\w+):$/
|
79
|
+
hash[key.to_sym] = parse_matrix(lines) if lines
|
80
|
+
key = $1
|
81
|
+
lines = []
|
82
|
+
elsif line !~ /^$/
|
83
|
+
lines << line
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
hash[key.to_sym] = parse_matrix(lines) if lines
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.parse_matrix(lines)
|
92
|
+
lead = lines.first.split(/\s+/)[1..-1].map do |v|
|
93
|
+
if v == '_'
|
94
|
+
nil
|
95
|
+
elsif v == 'final'
|
96
|
+
:final
|
97
|
+
else
|
98
|
+
v
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
matrix = {}
|
103
|
+
|
104
|
+
lines[1..-1].each do |line|
|
105
|
+
tail, *sounds = line.split(/\s+/)
|
106
|
+
|
107
|
+
if tail == 'initial'
|
108
|
+
tail = :initial
|
109
|
+
elsif tail == 'voiced'
|
110
|
+
tail = :voiced
|
111
|
+
elsif tail == '_'
|
112
|
+
tail = nil
|
113
|
+
end
|
114
|
+
|
115
|
+
sounds.map! { |s| s == '_' ? nil : s }
|
116
|
+
|
117
|
+
matrix[tail] = Hash[lead.zip(sounds)]
|
118
|
+
end
|
119
|
+
|
120
|
+
matrix
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
__END__
|
125
|
+
revised:
|
126
|
+
t\l g gg n d dd r m b bb s ss _ j jj ch k t p h final
|
127
|
+
initial g kk n d tt r m b pp s ss _ j jj ch k t p h _
|
128
|
+
voiced g kk n d tt r m b pp s ss _ j jj ch k t p h _
|
129
|
+
g kg kg ngn kd ktt ngn ngm kb kpp ks kss g kj kjj kch k-k kt kp kh k
|
130
|
+
gg kg kg ngn kd ktt ngn ngm kb kpp ks kss kk kj kjj kch k-k kt kp kh k
|
131
|
+
gs kk kk ngn kd ktt ngn ngm kb kpp ks kss ks kj kjj kch k-k kt kp kh k
|
132
|
+
n n-g n-kk nn nd ntt ll nm nb npp ns nss n nj njj nch nk nt np nh n
|
133
|
+
nj ntg ntkk nn ntd ntt ll nm ntb ntpp nts ntss nj njj njj nch nk nt np nh nt
|
134
|
+
nh nk nkk nn nt ntt ll nm np npp ns nss nh nch njj nch nk nt np nh n
|
135
|
+
d tg tkk nn td tt nn nm tb tpp ts tss d tj tjj tch tk tt tp th t
|
136
|
+
l lg lkk ll ld ltt ll lm lb lpp ls lss r lj ljj lch lk lt lp lh l
|
137
|
+
lg lkk lkk lng lkd lktt lngn lngm lkb lkpp lks lkss lg lkj lkjj lkch lk lkt lkp lk lk
|
138
|
+
lm lmg lmkk lmn lmd lmtt lmn lmm lmb lmpp lms lmss lm lmj lmjj lmch lmk lmt lmp lmh lm
|
139
|
+
lb lbg lbkk lmn lbd lbtt lmn lmm lpb lpp lbs lbss lb lbj lbjj lbch lbk lbt lbp lbh lp
|
140
|
+
ls ltk ltkk ll ltt ltt ll lm lpp lpp lss lss ls ljj ljj lch lk lt lp lt lt
|
141
|
+
lt ltk ltkk ll ltt ltt ll lm lpp lpp lss lss lt ljj ljj lch lk lt lp lt lt
|
142
|
+
lp lpk lpkk lmn lpt lptt lmn lm lpp lpp lps lpss lp lpj lpjj lpch lpk lpt lp lpt lp
|
143
|
+
lh lk lkk ll lt ltt ll lm lp lpp ls lss lh lch ljj lch lk lt lp lh l
|
144
|
+
m mg mkk mn md mtt mn mm mb mpp ms mss m mj mjj mch mk mt mp mh m
|
145
|
+
b pg pkk mn pd ptt mn mm pb pp ps pss b pj pjj pch pk pt p-p ph p
|
146
|
+
bs pg pkk mn pd ptt mn mm pb pp pss pss ps pjj pjj pch pk pt ptp pt p
|
147
|
+
s tg tkk nn td tt nn nm tb tpp ts tss s tj tjj tch tk t-t tp th t
|
148
|
+
ss tg tkk nn td tt nn nm tb tpp ts tss ss tj tjj tch tk t-t tp th t
|
149
|
+
_ g kk n d tt r m b pp s ss _ j jj ch k t p h _
|
150
|
+
ng ngg ngkk ngn ngd ngtt ngn ngm ngb ngpp ngs ngss ng ngj ngjj ngch ngk ngt ngp ngh ng
|
151
|
+
j tg tkk nn td tt nn nm tb tpp ts tss j tj tjj tch tk t-t tp th t
|
152
|
+
ch tg tkk nn td tt nn nm tb tpp ts tss ch tj tjj tch tk t-t tp th t
|
153
|
+
k kg kg ngn kd ktt ngn ngm kb kpp ks kss k kj kjj kch k-k kt kp kh k
|
154
|
+
t tg tkk nn td tt nn nm tb tpp ts tss t tj tjj tch tk tt tp th t
|
155
|
+
p pg pkk mn pd ptt mn mm pb pp ps pss p pj pjj pch pk pt p-p ph p
|
156
|
+
h k kk nn t tt nn nm p pp hs hss h ch ch tch tk tt tp t t
|
157
|
+
|
158
|
+
mccune_reischauer:
|
159
|
+
t\l g gg n d dd r m b bb s ss _ j jj ch k t p h final
|
160
|
+
initial k kk n t tt r m p pp s ss _ ch tch ch' k' t' p' h _
|
161
|
+
voiced g kk n d dd r m b bb s ss _ j jj ch' k' t' p' h _
|
162
|
+
g kk kk ngn kt ktt ngn ngm kp kpp ks kss g kj ktch kch' kk' kt' kp' kh k
|
163
|
+
gg kk kk ngn kt ktt ngn ngm kp kpp ks kss kk kj ktch kch' kk' kt' kp' kh kk
|
164
|
+
gs kk kk ngn kt ktt ngn ngm kb kpp ks kss ks ktch ktch kch' kk' kt' kp' kh k
|
165
|
+
n n'g n'kk nn nd ntt ll nm nb npp ns nss n nj ntch nch' nk' nt' np' nh n
|
166
|
+
nj nkk nkk nn ntt ntt ll nm npp npp nss nss nj ntch ntch nch' nk' nt' np' nch' nt
|
167
|
+
nh nk' nkk nn nt' ntt ll nm np' npp nss nss nh nch' ntch nch' nk' nt' np' nh n
|
168
|
+
d tk tkk nn tt tt nn nm tp tpp ss ss d tch tch tch' tk' tt' tp' t'h t
|
169
|
+
l lg lkk ll ld ltt ll lm lb lbb ls lss r lj lch lch' lk' lt' lp' rh l
|
170
|
+
lg lkk lkk ngn ltt ltt ll lngm lkb lkbb lks lkss lg lkj lktch lkch' lk' lkt' lkp' lk' lk
|
171
|
+
lm lmk lmkk lmn lmd lmdd lmn lmm lb lbb lms lmss lm lmj lmch lmch' lmk' lmt' lmp' lmh lm
|
172
|
+
lb lbk lbkk lmn lbd lbdd lmn lmm lb lbb lbs lbss lb lbj lbch lbch' lbk' lbt' lbp' lbh lp
|
173
|
+
ls ltk ltkk ll ltt ltt ll lm lpp lpp lss lss ls ltch ltch lch' lk' lt' lp' lt' lt
|
174
|
+
lt ltk ltkk ll ltt ltt ll lm lpp lpp lss lss lt ltch ltch ltch' ltk' lt' lp' lt' lt
|
175
|
+
lp lpg lpkk lmn lpd lptt lmn lmm lbb lbb lps lpss lp' lpj lptch lpch' lpk' lpt' lp' lp' lp'
|
176
|
+
lh lk' lkk ll lt' ltt ll lm lp' lpp ls lss lh lch' ltch lch' lk' lt' lp' lh l
|
177
|
+
m mg mkk mn md mdd mn mm mb mbb ms mss m mj mch mch' mk' mt' mp' mh m
|
178
|
+
b pk pkk mn pt ptt mn mm pp pp ps pss b pch ptch pch' pk' pt' pp' p'h p
|
179
|
+
bs pkk pkk mn ptt ptt mn mm pp pp pss pss ps ptch ptch ptch' pk' pt' pp' pt' p
|
180
|
+
s tk tkk nn tt tt nn nm tp tpp ss ss s tch tch tch' tk' tt' tp' t'h t
|
181
|
+
ss tk tkk nn tt tt nn nm tp tpp ss ss ss tch tch tch' tk' tt' tp' t'h t
|
182
|
+
_ g kk n d dd r m b bb s ss _ j jj ch' k' t' p' h _
|
183
|
+
ng ngg nkk ngn ngd ngdd ngn ngm ngb ngbb ngs ngss ng ngj ngjj ngch' ngk' ngt' ngp' ngh ng
|
184
|
+
j tk tkk nn tt tt nn nm tp tpp ss ss j tch tch tch' tk' tt' tp' ch' t
|
185
|
+
ch tk tkk nn tt tt nn nm tp tpp ss ss ch' tch tch tch' tk' tt' tp' ch' t
|
186
|
+
k kk kk ngn kt ktt ngn ngm kp kpp ks kss k' kch ktch kch' kk' kt' kp' k'h k
|
187
|
+
t tk tkk nn tt tt nn nm tp tpp ss ss t' tch tch tch' tk' tt' tp' t'h t
|
188
|
+
p pk pkk mn pt ptt mn mm pp pp ps pss p' pch ptch pch' pk' pt' pp' p'h p
|
189
|
+
h k' kk nn t' tt l m p' pp hs hss h ch' tch ch' k' t' p' h t
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'hangul_tools'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class RomanizationTest < Test::Unit::TestCase
|
5
|
+
def test_decompose_with_vowels
|
6
|
+
hangul = %w( 아 애 야 얘 어 에 여 예 오 와 왜 외 요 우 워 웨 위 유 으 의 이 )
|
7
|
+
hangul.each.with_index do |given, idx|
|
8
|
+
assert_equal [[ 12, idx + 1, 0 ]], HangulTools.decompose(given)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_decompose_with_lead_consonants
|
13
|
+
hangul = %w( 가 까 나 다 따 라 마 바 빠 사 싸 아 자 짜 차 카 타 파 하 )
|
14
|
+
hangul.each.with_index do |given, idx|
|
15
|
+
assert_equal [[ idx + 1, 1, 0 ]], HangulTools.decompose(given)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_decompose_with_tail_consonants
|
20
|
+
hangul = %w( 악 앆 앇 안 앉 않 앋 알 앍 앎 앏 앐 앑 앒 앓 암 압 앖 앗 았 앙 앚 앛 앜 앝 앞 앟 )
|
21
|
+
hangul.each.with_index do |given, idx|
|
22
|
+
assert_equal [[ 12, 1, idx + 1 ]], HangulTools.decompose(given)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_revised_romanization_of_vowels
|
27
|
+
hangul = %w( 아 애 야 얘 어 에 여 예 오 와 왜 외 요 우 워 웨 위 유 으 의 이 )
|
28
|
+
latin = %w( a ae ya yae eo e yeo ye o wa wae oe yo u weo we wi yu eu yi i )
|
29
|
+
|
30
|
+
hangul.zip(latin).each do |(given, expect)|
|
31
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
32
|
+
actual = HangulTools.romanize(given, :revised)
|
33
|
+
assert_equal expect, actual
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_revised_romanization_of_lead_consonants
|
39
|
+
hangul = %w( 가 까 나 다 따 라 마 바 빠 사 싸 아 자 짜 차 카 타 파 하 )
|
40
|
+
latin = %w( ga kka na da tta ra ma ba ppa sa ssa a ja jja cha ka ta pa ha )
|
41
|
+
|
42
|
+
hangul.zip(latin).each do |(given, expect)|
|
43
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
44
|
+
actual = HangulTools.romanize(given, :revised)
|
45
|
+
assert_equal expect, actual
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_revised_romanization_of_tail_consonants
|
51
|
+
hangul = %w( 악 앆 앇 안 앉 않 앋 알 앍 앎 앏 앐 앑 앒 앓 암 압 앖 앗 았 앙 앚 앛 앜 앝 앞 앟 )
|
52
|
+
latin = %w( ak ak ak an ant an at al alk alm alp alt alt alp al am ap ap at at ang at at ak at ap at )
|
53
|
+
|
54
|
+
hangul.zip(latin).each do |(given, expect)|
|
55
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
56
|
+
actual = HangulTools.romanize(given, :revised)
|
57
|
+
assert_equal expect, actual
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_revised_romanization_concatenation_of_consecutive_syllables
|
63
|
+
given = "안녕하십니까"
|
64
|
+
actual = HangulTools.romanize(given, :revised)
|
65
|
+
|
66
|
+
assert_equal "annyeonghasimnikka", actual
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_romanization_of_mixed_hangul_and_latin_romanizes_only_hangul
|
70
|
+
given = 'I said, "안녕하십니까," and she said "누구세요?"'
|
71
|
+
actual = HangulTools.romanize(given, :revised)
|
72
|
+
|
73
|
+
assert_equal 'I said, "annyeonghasimnikka," and she said "nuguseyo?"', actual
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_mccune_reischauer_romanization_of_vowels
|
77
|
+
hangul = %w( 아 애 야 얘 어 에 여 예 오 와 왜 외 요 우 워 웨 위 유 으 의 이 )
|
78
|
+
latin = %w( a ae ya yae ŏ e yŏ ye o wa wae oe yo u wŏ we wi yu ŭ ŭi i )
|
79
|
+
|
80
|
+
hangul.zip(latin).each do |(given, expect)|
|
81
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
82
|
+
actual = HangulTools.romanize(given, :mccune_reischauer)
|
83
|
+
assert_equal expect, actual
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_mccune_reischauer_romanization_of_lead_consonants
|
89
|
+
hangul = %w( 가 까 나 다 따 라 마 바 빠 사 싸 아 자 짜 차 카 타 파 하 )
|
90
|
+
latin = %w( ka kka na ta tta ra ma pa ppa sa ssa a cha tcha ch'a k'a t'a p'a ha )
|
91
|
+
|
92
|
+
hangul.zip(latin).each do |(given, expect)|
|
93
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
94
|
+
actual = HangulTools.romanize(given, :mccune_reischauer)
|
95
|
+
assert_equal expect, actual
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_mccune_reischauer_romanization_of_tail_consonants
|
101
|
+
hangul = %w( 악 앆 앇 안 앉 않 앋 알 앍 앎 앏 앐 앑 앒 앓 암 압 앖 앗 았 앙 앚 앛 앜 앝 앞 앟 )
|
102
|
+
latin = %w( ak akk ak an ant an at al alk alm alp alt alt alp' al am ap ap at at ang at at ak at ap at )
|
103
|
+
|
104
|
+
hangul.zip(latin).each do |(given, expect)|
|
105
|
+
assert_nothing_raised "given #{given.inspect} expect #{expect.inspect}" do
|
106
|
+
actual = HangulTools.romanize(given, :mccune_reischauer)
|
107
|
+
assert_equal expect, actual
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_mccune_reischauer_romanization_concatenation_of_consecutive_syllables
|
113
|
+
given = "안녕하십니까"
|
114
|
+
actual = HangulTools.romanize(given, :mccune_reischauer)
|
115
|
+
|
116
|
+
assert_equal "annyŏnghashimnikka", actual
|
117
|
+
end
|
118
|
+
end
|
metadata
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hangul_tools
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jamis Buck
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-11-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: test-unit
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rubygems-tasks
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Convert Korean text to latin characters, using either the Revised system
|
56
|
+
or McCune-Reischauer.
|
57
|
+
email:
|
58
|
+
- jamis@jamisbuck.org
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- README.md
|
64
|
+
- Rakefile
|
65
|
+
- hangul_tools.gemspec
|
66
|
+
- lib/hangul_tools.rb
|
67
|
+
- lib/hangul_tools/version.rb
|
68
|
+
- test/romanization_test.rb
|
69
|
+
homepage: http://github.com/jamis/hangul-tools
|
70
|
+
licenses:
|
71
|
+
- MIT
|
72
|
+
metadata: {}
|
73
|
+
post_install_message:
|
74
|
+
rdoc_options: []
|
75
|
+
require_paths:
|
76
|
+
- lib
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0'
|
87
|
+
requirements: []
|
88
|
+
rubyforge_project:
|
89
|
+
rubygems_version: 2.6.13
|
90
|
+
signing_key:
|
91
|
+
specification_version: 4
|
92
|
+
summary: Romanize Korean text
|
93
|
+
test_files:
|
94
|
+
- test/romanization_test.rb
|