gimchi 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,174 +1,174 @@
1
1
  ---
2
2
  structure:
3
- chosung: [ㄱ, ㄲ, ㄴ, ㄷ, ㄸ, ㄹ, ㅁ, ㅂ, ㅃ, ㅅ, ㅆ, ㅇ, ㅈ, ㅉ, ㅊ, ㅋ, ㅌ, ㅍ, ㅎ]
4
- jungsung: [ㅏ, ㅐ, ㅑ, ㅒ, ㅓ, ㅔ, ㅕ, ㅖ, ㅗ, ㅘ, ㅙ, ㅚ, ㅛ, ㅜ, ㅝ, ㅞ, ㅟ, ㅠ, ㅡ, ㅢ, ㅣ]
5
- jongsung: [ㄱ, ㄲ, ㄳ, ㄴ, ㄵ, ㄶ, ㄷ, ㄹ, ㄺ, ㄻ, ㄼ, ㄽ, ㄾ, ㄿ, ㅀ, ㅁ, ㅂ, ㅄ, ㅅ,
6
- ㅆ, ㅇ, ㅈ, ㅊ, ㅋ, ㅌ, ㅍ, ㅎ]
3
+ chosung: [ㄱ, ㄲ, ㄴ, ㄷ, ㄸ, ㄹ, ㅁ, ㅂ, ㅃ, ㅅ, ㅆ, ㅇ, ㅈ, ㅉ, ㅊ, ㅋ, ㅌ, ㅍ, ㅎ]
4
+ jungsung: [ㅏ, ㅐ, ㅑ, ㅒ, ㅓ, ㅔ, ㅕ, ㅖ, ㅗ, ㅘ, ㅙ, ㅚ, ㅛ, ㅜ, ㅝ, ㅞ, ㅟ, ㅠ, ㅡ, ㅢ, ㅣ]
5
+ jongsung: [ㄱ, ㄲ, ㄳ, ㄴ, ㄵ, ㄶ, ㄷ, ㄹ, ㄺ, ㄻ, ㄼ, ㄽ, ㄾ, ㄿ, ㅀ, ㅁ, ㅂ, ㅄ, ㅅ,
6
+ ㅆ, ㅇ, ㅈ, ㅊ, ㅋ, ㅌ, ㅍ, ㅎ]
7
7
 
8
- fortis map:
9
- ㄱ: ㄲ
10
- ㄷ: ㄸ
11
- ㅂ: ㅃ
12
- ㅅ: ㅆ
13
- ㅈ: ㅉ
8
+ fortis map:
9
+ ㄱ: ㄲ
10
+ ㄷ: ㄸ
11
+ ㅂ: ㅃ
12
+ ㅅ: ㅆ
13
+ ㅈ: ㅉ
14
14
 
15
- double consonant map:
16
- ㄳ: [ㄱ, ㅅ]
17
- ㄵ: [ㄴ, ㅈ]
18
- ㄶ: [ㄴ, ㅎ]
19
- ㄺ: [ㄹ, ㄱ]
20
- ㄻ: [ㄹ, ㅁ]
21
- ㄼ: [ㄹ, ㅂ]
22
- ㄽ: [ㄹ, ㅅ]
23
- ㄾ: [ㄹ, ㅌ]
24
- ㄿ: [ㄹ, ㅍ]
25
- ㅀ: [ㄹ, ㅎ]
26
- ㅄ: [ㅂ, ㅅ]
15
+ double consonant map:
16
+ ㄳ: [ㄱ, ㅅ]
17
+ ㄵ: [ㄴ, ㅈ]
18
+ ㄶ: [ㄴ, ㅎ]
19
+ ㄺ: [ㄹ, ㄱ]
20
+ ㄻ: [ㄹ, ㅁ]
21
+ ㄼ: [ㄹ, ㅂ]
22
+ ㄽ: [ㄹ, ㅅ]
23
+ ㄾ: [ㄹ, ㅌ]
24
+ ㄿ: [ㄹ, ㅍ]
25
+ ㅀ: [ㄹ, ㅎ]
26
+ ㅄ: [ㅂ, ㅅ]
27
27
 
28
28
  pronouncer:
29
- jongsung sound:
30
- ㄱ: ㄱ
31
- ㄲ: ㄱ
32
- ㄳ: ㄱ
33
- ㄴ: ㄴ
34
- ㄵ: ㄴ
35
- ㄶ: ㄴ
36
- ㄷ: ㄷ
37
- ㄹ: ㄹ
38
- ㄺ: ㄱ
39
- ㄻ: ㅁ
40
- ㄼ: ㄹ
41
- ㄽ: ㄹ
42
- ㄾ: ㅌ
43
- ㄿ: ㅂ
44
- ㅀ: ㄹ
45
- ㅁ: ㅁ
46
- ㅂ: ㅂ
47
- ㅄ: ㅂ
48
- ㅅ: ㄷ
49
- ㅆ: ㄷ
50
- ㅇ: ㅇ
51
- ㅈ: ㄷ
52
- ㅊ: ㄷ
53
- ㅋ: ㄱ
54
- ㅌ: ㄷ
55
- ㅍ: ㅂ
56
- ㅎ:
57
- transformation:
58
- # changing the order affects the quality of the transformation
59
- sequence for 1:
60
- - rule_5_1
61
- - rule_5_3
29
+ jongsung sound:
30
+ ㄱ: ㄱ
31
+ ㄲ: ㄱ
32
+ ㄳ: ㄱ
33
+ ㄴ: ㄴ
34
+ ㄵ: ㄴ
35
+ ㄶ: ㄴ
36
+ ㄷ: ㄷ
37
+ ㄹ: ㄹ
38
+ ㄺ: ㄱ
39
+ ㄻ: ㅁ
40
+ ㄼ: ㄹ
41
+ ㄽ: ㄹ
42
+ ㄾ: ㅌ
43
+ ㄿ: ㅂ
44
+ ㅀ: ㄹ
45
+ ㅁ: ㅁ
46
+ ㅂ: ㅂ
47
+ ㅄ: ㅂ
48
+ ㅅ: ㄷ
49
+ ㅆ: ㄷ
50
+ ㅇ: ㅇ
51
+ ㅈ: ㄷ
52
+ ㅊ: ㄷ
53
+ ㅋ: ㄱ
54
+ ㅌ: ㄷ
55
+ ㅍ: ㅂ
56
+ ㅎ:
57
+ transformation:
58
+ # changing the order affects the quality of the transformation
59
+ sequence for 1:
60
+ - rule_5_1
61
+ - rule_5_3
62
62
 
63
- sequence for 2:
64
- - rule_16
65
- - rule_17
66
- - rule_18
67
- - rule_19
68
- - rule_5_1
69
- - rule_5_3
70
- - rule_30
71
- - rule_23
72
- - rule_24
73
- - rule_25
74
- - rule_12
75
- - rule_20
76
- - rule_10
77
- - rule_27
78
- - rule_9
79
- - rule_11
80
- - rule_14
81
- - rule_13
82
- - rule_15
83
- blocking rule:
84
- rule_16: [rule_30]
63
+ sequence for 2:
64
+ - rule_16
65
+ - rule_17
66
+ - rule_18
67
+ - rule_19
68
+ - rule_5_1
69
+ - rule_5_3
70
+ - rule_30
71
+ - rule_23
72
+ - rule_24
73
+ - rule_25
74
+ - rule_12
75
+ - rule_20
76
+ - rule_10
77
+ - rule_27
78
+ - rule_9
79
+ - rule_11
80
+ - rule_14
81
+ - rule_13
82
+ - rule_15
83
+ blocking rule:
84
+ rule_16: [rule_30]
85
85
 
86
86
  number:
87
- positive: 플러스
88
- negative: 마이너스
89
- decimal point: 점
90
- units: ["", 만, 억, 조, 경, 해, 자, 양, 구, 간, 정, 재, 극, 항하사, 아승기, 나유타, 불가사의, 무량대수]
91
- digits: [영, 일, 이, 삼, 사, 오, 육, 칠, 팔, 구]
92
- post substitution:
93
- "^일만": 만
87
+ positive: 플러스
88
+ negative: 마이너스
89
+ decimal point: 점
90
+ units: ["", 만, 억, 조, 경, 해, 자, 양, 구, 간, 정, 재, 극, 항하사, 아승기, 나유타, 불가사의, 무량대수]
91
+ digits: [영, 일, 이, 삼, 사, 오, 육, 칠, 팔, 구]
92
+ post substitution:
93
+ "^일만": 만
94
94
 
95
- # 정수형일 때 또다른 표현법 (나이, 시간)
96
- alt notation:
97
- when suffix:
98
- 개:
99
- max:
100
- 명:
101
- max:
102
- 살:
103
- max:
104
- 시:
105
- max: 12
106
- tenfolds: [열, 스물, 서른, 마흔, 쉰, 예순, 일흔, 여든, 아흔, 백]
107
- digits: ["", 한, 두, 세, 네, 다섯, 여섯, 일곱, 여덟, 아홉]
108
- post substitution:
109
- "스물$": 스무
95
+ # 정수형일 때 또다른 표현법 (나이, 시간)
96
+ alt notation:
97
+ when suffix:
98
+ 개:
99
+ max:
100
+ 명:
101
+ max:
102
+ 살:
103
+ max:
104
+ 시:
105
+ max: 12
106
+ tenfolds: [열, 스물, 서른, 마흔, 쉰, 예순, 일흔, 여든, 아흔, 백]
107
+ digits: ["", 한, 두, 세, 네, 다섯, 여섯, 일곱, 여덟, 아홉]
108
+ post substitution:
109
+ "스물$": 스무
110
110
 
111
111
  romanization:
112
- chosung:
113
- ㄱ: g
114
- ㄲ: kk
115
- ㅋ: k
116
- ㄷ: d
117
- ㄸ: tt
118
- ㅌ: t
119
- ㅂ: b
120
- ㅃ: pp
121
- ㅍ: p
122
- ㅈ: j
123
- ㅉ: jj
124
- ㅊ: ch
125
- ㅅ: s
126
- ㅆ: ss
127
- ㅎ: h
128
- ㄴ: n
129
- ㅁ: m
130
- ㄹ: r
131
- ㅇ: "-"
132
- jungsung:
133
- ㅏ: a
134
- ㅓ: eo
135
- ㅗ: o
136
- ㅜ: u
137
- ㅡ: eu
138
- ㅣ: i
139
- ㅐ: ae
140
- ㅔ: e
141
- ㅚ: oe
142
- ㅟ: wi
143
- ㅑ: ya
144
- ㅕ: yeo
145
- ㅛ: yo
146
- ㅠ: yu
147
- ㅒ: yae
148
- ㅖ: ye
149
- ㅘ: wa
150
- ㅙ: wae
151
- ㅝ: wo
152
- ㅞ: we
153
- ㅢ: ui
154
- jongsung:
155
- ㄱ: k
156
- ㄴ: n-
157
- ㄷ: t
158
- ㄹ: l
159
- ㅁ: m
160
- ㅂ: p
161
- ㅇ: ng
162
- post substitution:
163
- # 제2항 [붙임 2]‘ㄹ’은 모음 앞에서는 ‘r’로, 자음 앞이나 어말에서는
164
- # ‘l’로 적는다. 단, ‘ㄹㄹ’은 ‘ll’로 적는다.
165
- lr: ll
166
- "-w": w
167
- "-y": y
168
- kkk: k-kk
169
- ttt: t-tt
170
- ppp: p-pp
171
- "--": "-"
172
- "n-([^gaeiou])": "n\\1"
173
- "-(\\s)": "\\1"
174
- "-$": ""
112
+ chosung:
113
+ ㄱ: g
114
+ ㄲ: kk
115
+ ㅋ: k
116
+ ㄷ: d
117
+ ㄸ: tt
118
+ ㅌ: t
119
+ ㅂ: b
120
+ ㅃ: pp
121
+ ㅍ: p
122
+ ㅈ: j
123
+ ㅉ: jj
124
+ ㅊ: ch
125
+ ㅅ: s
126
+ ㅆ: ss
127
+ ㅎ: h
128
+ ㄴ: n
129
+ ㅁ: m
130
+ ㄹ: r
131
+ ㅇ: "-"
132
+ jungsung:
133
+ ㅏ: a
134
+ ㅓ: eo
135
+ ㅗ: o
136
+ ㅜ: u
137
+ ㅡ: eu
138
+ ㅣ: i
139
+ ㅐ: ae
140
+ ㅔ: e
141
+ ㅚ: oe
142
+ ㅟ: wi
143
+ ㅑ: ya
144
+ ㅕ: yeo
145
+ ㅛ: yo
146
+ ㅠ: yu
147
+ ㅒ: yae
148
+ ㅖ: ye
149
+ ㅘ: wa
150
+ ㅙ: wae
151
+ ㅝ: wo
152
+ ㅞ: we
153
+ ㅢ: ui
154
+ jongsung:
155
+ ㄱ: k
156
+ ㄴ: n-
157
+ ㄷ: t
158
+ ㄹ: l
159
+ ㅁ: m
160
+ ㅂ: p
161
+ ㅇ: ng
162
+ post substitution:
163
+ # 제2항 [붙임 2]‘ㄹ’은 모음 앞에서는 ‘r’로, 자음 앞이나 어말에서는
164
+ # ‘l’로 적는다. 단, ‘ㄹㄹ’은 ‘ll’로 적는다.
165
+ lr: ll
166
+ "-w": w
167
+ "-y": y
168
+ kkk: k-kk
169
+ ttt: t-tt
170
+ ppp: p-pp
171
+ "--": "-"
172
+ "n-([^gaeiou])": "n\\1"
173
+ "-(\\s)": "\\1"
174
+ "-$": ""
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # Junegunn Choi (junegunn.c@gmail.com)
4
+ # 2011/04/02-
5
+
6
+ # A dirty little script to fetch test sets from http://www.korean.go.kr
7
+
8
+ require 'open-uri'
9
+ require 'yaml'
10
+
11
+ # Crawl romanization test set
12
+ rdata = open('http://www.korean.go.kr/09_new/dic/rule/rule_roman_0101.jsp').read.
13
+ scan(%r{th>(.*?)</td}m).flatten.map { |e| e.split %r{<.*>}m }.
14
+ select { |e| e.length == 2 }
15
+
16
+ File.open(File.dirname(__FILE__) + '/../test/romanization.yml', 'w') do | f |
17
+ f.puts "---"
18
+
19
+ rdata.each do | arr |
20
+ f.puts "\"#{arr.first}\": \"#{arr.last}\""
21
+ end
22
+ end
23
+
24
+ exit
25
+
26
+ # Crawl pronunciation test set
27
+ m = {}
28
+ %w[
29
+ http://www.korean.go.kr/09_new/dic/rule/rule02_0202.jsp
30
+ http://www.korean.go.kr/09_new/dic/rule/rule02_0204.jsp
31
+ http://www.korean.go.kr/09_new/dic/rule/rule02_0205.jsp
32
+ http://www.korean.go.kr/09_new/dic/rule/rule02_0206.jsp
33
+ http://www.korean.go.kr/09_new/dic/rule/rule02_0207.jsp
34
+ ].each do | url |
35
+ open(url).read.scan(/>([^0-9<>);?]+?)\[(.*?)\]</).each do | match |
36
+ puts match[0, 2].join(' => ')
37
+ m[match[0]] = match[1]
38
+ end
39
+ end
40
+
41
+ File.open(File.dirname(__FILE__) + '/../test/pronunciation.yml', 'w') do | f |
42
+ f.puts "---"
43
+ m.each do | k, v |
44
+ k = k.sub(/.*→/, '').gsub(/-/, '')
45
+ v = v.sub(/.*→/, '').gsub(/[\(:ː\)]/, '').split(%r{[/∼]})
46
+ f.puts "\"#{k}\": [#{v.join(', ')}]"
47
+ end
48
+ end
49
+
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = %q{gimchi}
7
+ gem.version = "0.2.0"
8
+ gem.authors = ["Junegunn Choi"]
9
+ gem.email = ["junegunn.c@gmail.com"]
10
+ gem.description = %q{A Ruby gem for Korean characters}
11
+ gem.summary = %q{A Ruby gem for Korean characters}
12
+ gem.homepage = "https://github.com/junegunn/gimchi"
13
+
14
+ gem.files = `git ls-files`.split($/).reject { |f| f =~ %r[^viz/] }
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
+ gem.require_paths = ["lib"]
18
+ gem.license = "MIT"
19
+
20
+ gem.add_development_dependency 'ansi'
21
+ end
@@ -2,10 +2,380 @@
2
2
  # encoding: UTF-8
3
3
  # Junegunn Choi (junegunn.c@gmail.com)
4
4
 
5
- require 'gimchi/korean'
5
+ require 'yaml'
6
+ require 'set'
6
7
  require 'gimchi/char'
7
8
  require 'gimchi/pronouncer'
8
9
 
9
- if RUBY_VERSION =~ /^1\.8\./
10
- require 'gimchi/patch_1.8'
11
- end
10
+ class Gimchi
11
+ class << self
12
+ def setup
13
+ @@default ||= Gimchi.new
14
+ end
15
+
16
+ def Char ch
17
+ @@default.kchar ch
18
+ end
19
+
20
+ [
21
+ :decompose,
22
+ :compose,
23
+ :korean_char?,
24
+ :complete_korean_char?,
25
+ :kchar,
26
+ :kchar?,
27
+ :chosung?,
28
+ :jungsung?,
29
+ :jongsung?,
30
+ :read_number,
31
+ :pronounce,
32
+ :romanize
33
+ ].each do |sym|
34
+ define_method(sym) do |*arg, &b|
35
+ @@default.send sym, *arg, &b
36
+ end
37
+ end
38
+ end
39
+
40
+ CONFIG_FILE_PATH = File.expand_path('../../config/default.yml', __FILE__)
41
+ attr_reader :config, :chosungs, :jungsungs, :jongsungs
42
+
43
+ # Initialize Gimchi::Korean.
44
+ def initialize
45
+ symbolize_keys = lambda do |val|
46
+ case val
47
+ when Hash
48
+ {}.tap do |h|
49
+ val.each do |k, v|
50
+ k = k.gsub(' ', '_').to_sym if k =~ /[a-z0-9 ]/
51
+ h[k] = symbolize_keys.call v
52
+ end
53
+ end
54
+ when Array
55
+ val.map { |v| symbolize_keys.call v }
56
+ else
57
+ val
58
+ end
59
+ end
60
+ @config = symbolize_keys.call YAML.load(File.read CONFIG_FILE_PATH)
61
+
62
+ [
63
+ @config[:romanization][:post_substitution],
64
+ @config[:number][:post_substitution],
65
+ @config[:number][:alt_notation][:post_substitution]
66
+ ].each do |r|
67
+ r.keys.each do |k|
68
+ r[Regexp.compile k.to_s] = r.delete k
69
+ end
70
+ end
71
+ @config.freeze
72
+
73
+ @pronouncer = Gimchi::Pronouncer.send :new, self
74
+
75
+ @chosungs = config[:structure][:chosung]
76
+ @jungsungs = config[:structure][:jungsung]
77
+ @jongsungs = config[:structure][:jongsung]
78
+ @chosung_set = Set[*@chosungs]
79
+ @jungsung_set = Set[*@jungsungs]
80
+ @jongsung_set = Set[*@jongsungs]
81
+ @all = @chosung_set + @jungsung_set + @jongsung_set
82
+ end
83
+
84
+ # Decompose a Korean character into 3 components
85
+ # @param [String] ch Korean character
86
+ # @return [Array]
87
+ def decompose ch
88
+ kchar(ch).to_a
89
+ end
90
+
91
+ # Compose 3 elements into a Korean character String
92
+ # @param [String] chosung
93
+ # @param [String] jungsung
94
+ # @param [String] jongsung
95
+ # @return [String]
96
+ def compose chosung, jungsung = nil, jongsung = nil
97
+ if chosung.nil? && jungsung.nil?
98
+ ""
99
+ elsif chosung && jungsung
100
+ n1, n2, n3 =
101
+ n1 = chosungs.index(chosung) || 0
102
+ n2 = jungsungs.index(jungsung) || 0
103
+ n3 = ([nil] + jongsungs).index(jongsung) || 0
104
+ [ 0xAC00 + n1 * (21 * 28) + n2 * 28 + n3 ].pack('U')
105
+ else
106
+ chosung || jungsung
107
+ end
108
+ end
109
+
110
+ # @param [String] ch
111
+ # @return [Boolean]
112
+ def chosung? ch
113
+ @chosung_set.include? ch
114
+ end
115
+
116
+ # @param [String] ch
117
+ # @return [Boolean]
118
+ def jungsung? ch
119
+ @jungsung_set.include? ch
120
+ end
121
+
122
+ # @param [String] ch
123
+ # @return [Boolean]
124
+ def jongsung? ch
125
+ @jongsung_set.include? ch
126
+ end
127
+
128
+ # Checks if the given character is a korean character.
129
+ # @param [String] ch A string of size 1
130
+ def korean_char? ch
131
+ raise ArgumentError.new('Lengthy input') if str_length(ch) > 1
132
+
133
+ complete_korean_char?(ch) || @all.include?(ch)
134
+ end
135
+ alias kchar? korean_char?
136
+
137
+ # Checks if the given character is a "complete" korean character.
138
+ # "Complete" Korean character must have chosung and jungsung, with optional jongsung.
139
+ # @param [String] ch A string of size 1
140
+ def complete_korean_char? ch
141
+ raise ArgumentError.new('Lengthy input') if str_length(ch) > 1
142
+
143
+ # Range of Korean chracters in Unicode 2.0: AC00(가) ~ D7A3(힣)
144
+ ch.unpack('U').all? { | c | c >= 0xAC00 && c <= 0xD7A3 }
145
+ end
146
+
147
+ # Returns a Gimchi::Char object for the given Korean character.
148
+ # @param [String] ch Korean character in String
149
+ # @return [Gimchi::Char] Gimchi::Char instance
150
+ def kchar ch
151
+ Gimchi::Char.new(self, ch)
152
+ end
153
+
154
+ # Reads numeric expressions in Korean way.
155
+ # @param [String, Number] str Numeric type or String containing numeric expressions
156
+ # @return [String] Output string
157
+ def read_number str
158
+ str.to_s.gsub(/(([+-]\s*)?[0-9,]*,*[0-9]+(\.[0-9]+(e[+-][0-9]+)?)?)(\s*.)?/) {
159
+ read_number_sub($1, $5)
160
+ }
161
+ end
162
+
163
+ # Returns the pronunciation of the given string containing Korean characters.
164
+ # Takes optional options hash.
165
+ #
166
+ # @param [String] Input string
167
+ # @param [Hash] options Options
168
+ # @option options [Boolean] each_char Each character of the string is pronounced respectively.
169
+ # @option options [Boolean] slur Strings separated by whitespaces are processed again as if they were contiguous.
170
+ # @option options [Boolean] number Numberic parts of the string is also pronounced in Korean.
171
+ # @option options [Array] except Allows you to skip certain transformations.
172
+ # @return [String] Output string
173
+ def pronounce str, options = {}
174
+ options = {
175
+ :each_char => false,
176
+ :slur => false,
177
+ :number => true,
178
+ :except => [],
179
+ :debug => false
180
+ }.merge options
181
+
182
+ str = read_number(str) if options[:number]
183
+
184
+ result, transforms = @pronouncer.send :pronounce!, str, options
185
+
186
+ if options[:debug]
187
+ return result, transforms
188
+ else
189
+ return result
190
+ end
191
+ end
192
+
193
+ # Returns the romanization (alphabetical notation) of the given Korean string.
194
+ # http://en.wikipedia.org/wiki/Korean_romanization
195
+ # @param [String] str Input Korean string
196
+ # @param [Hash] options Options
197
+ # @option options [Boolean] as_pronounced If true, #pronounce is internally called before romanize
198
+ # @option options [Boolean] number Whether to read numeric expressions in the string
199
+ # @option options [Boolean] slur Same as :slur in #pronounce
200
+ # @return [String] Output string in Roman Alphabet
201
+ # @see Korean#pronounce
202
+ def romanize str, options = {}
203
+ options = {
204
+ :as_pronounced => true,
205
+ :number => true,
206
+ :slur => false
207
+ }.merge options
208
+
209
+ rdata = config[:romanization]
210
+ post_subs = rdata[:post_substitution]
211
+ rdata = [rdata[:chosung], rdata[:jungsung], rdata[:jongsung]]
212
+
213
+ str = pronounce str,
214
+ :each_char => !options[:as_pronounced],
215
+ :number => options[:number],
216
+ :slur => options[:slur],
217
+ # 제1항 [붙임 1] ‘ㅢ’는 ‘ㅣ’로 소리 나더라도 ‘ui’로 적는다.
218
+ :except => %w[rule_5_3]
219
+ dash = rdata[0]["ㅇ"]
220
+ romanization = ""
221
+
222
+ romanize_chunk = lambda do |chunk|
223
+ chunk.each_char.map { |ch| kchar(ch) rescue ch }.each do |kc|
224
+ kc.to_a.each_with_index do |comp, idx|
225
+ next if comp.nil?
226
+ comp = rdata[idx][comp] || comp
227
+ comp = comp[1..-1] if comp[0, 1] == dash &&
228
+ (romanization.empty? || romanization[-1, 1] =~ /\s/)
229
+ romanization += comp
230
+ end
231
+ end
232
+
233
+ return post_subs.keys.inject(romanization) { | output, pattern |
234
+ output.gsub(pattern, post_subs[pattern])
235
+ }
236
+ end
237
+
238
+ k_chunk = ""
239
+ str.each_char do | c |
240
+ if korean_char? c
241
+ k_chunk += c
242
+ else
243
+ unless k_chunk.empty?
244
+ romanization = romanize_chunk.call k_chunk
245
+ k_chunk = ""
246
+ end
247
+ romanization += c
248
+ end
249
+ end
250
+ romanization = romanize_chunk.call k_chunk unless k_chunk.empty?
251
+ romanization
252
+ end
253
+
254
+ private
255
+ def str_length str
256
+ str.length
257
+ end
258
+
259
+ def read_number_sub num, next_char
260
+ nconfig = config[:number]
261
+
262
+ if num == '0'
263
+ return nconfig[:digits].first
264
+ end
265
+
266
+ num = num.gsub(',', '')
267
+ next_char = next_char.to_s
268
+ is_float = num.match(/[\.e]/) != nil
269
+
270
+ # Alternative notation for integers with proper suffix
271
+ alt = false
272
+ if is_float == false &&
273
+ nconfig[:alt_notation][:when_suffix].keys.include?(next_char.strip)
274
+ max = nconfig[:alt_notation][:when_suffix][next_char.strip][:max]
275
+
276
+ if max.nil? || num.to_i <= max
277
+ alt = true
278
+ end
279
+ end
280
+
281
+ # Sign
282
+ sign = []
283
+ negative = false
284
+ if num =~ /^-/
285
+ num = num.sub(/^-\s*/, '')
286
+ sign << nconfig[:negative]
287
+ negative = true
288
+ elsif num =~ /^\+/
289
+ num = num.sub(/^\+\s*/, '')
290
+ sign << nconfig[:positive]
291
+ end
292
+
293
+ if is_float
294
+ below = nconfig[:decimal_point]
295
+ below = nconfig[:digits][0] + below if num.to_f < 1
296
+
297
+ if md = num.match(/(.*)e(.*)/)
298
+ dp = md[1].index('.')
299
+ num = md[1].tr '.', ''
300
+ exp = md[2].to_i
301
+
302
+ dp += exp
303
+ if dp > num.length
304
+ num = num.ljust(dp, '0')
305
+ num = num.sub(/^0+([1-9])/, "\\1")
306
+
307
+ below = ""
308
+ elsif dp < 0
309
+ num = '0.' + '0' * (-dp) + num
310
+ else
311
+ num[dp, 1] = '.' + num[dp, 1]
312
+ end
313
+ end
314
+ num.sub(/.*\./, '').each_char do | char |
315
+ below += nconfig[:digits][char.to_i]
316
+ end if num.include? '.'
317
+ num = num.sub(/\..*/, '')
318
+ else
319
+ below = ""
320
+ end
321
+
322
+ tokens = []
323
+ unit_idx = -1
324
+ num = num.to_i
325
+ while num > 0
326
+ v = num % 10000
327
+
328
+ unit_idx += 1
329
+ if v > 0
330
+ if alt == false || unit_idx >= 1
331
+ str = ""
332
+ # Cannot use hash as they're unordered in 1.8
333
+ [[1000, '천'],
334
+ [100, '백'],
335
+ [10, '십']].each do | arr |
336
+ u, sub_unit = arr
337
+ str += (nconfig[:digits][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
338
+ v %= u
339
+ end
340
+ str += nconfig[:digits][v] if v > 0
341
+
342
+ raise RangeError, "number too large" unless nconfig[:units][unit_idx]
343
+ tokens << str.sub(/ $/, '') + nconfig[:units][unit_idx]
344
+ else
345
+ str = ""
346
+ tenfolds = nconfig[:alt_notation][:tenfolds]
347
+ digits = nconfig[:alt_notation][:digits]
348
+ alt_post_subs = nconfig[:alt_notation][:post_substitution]
349
+
350
+ # Likewise.
351
+ [[1000, '천'],
352
+ [100, '백']].each do |u, sub_unit|
353
+ str += (nconfig[:digits][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
354
+ v %= u
355
+ end
356
+
357
+ str += tenfolds[(v / 10) - 1] if v / 10 > 0
358
+ v %= 10
359
+ str += digits[v] if v > 0
360
+
361
+ alt_post_subs.each do |p, s|
362
+ str.gsub!(p, s)
363
+ end if alt
364
+ tokens << str.sub(/ $/, '') + nconfig[:units][unit_idx]
365
+ end
366
+ end
367
+ num /= 10000
368
+ end
369
+
370
+ tokens += sign unless sign.empty?
371
+ ret = tokens.reverse.join(' ') + below + next_char
372
+ nconfig[:post_substitution].each do |p, s|
373
+ ret.gsub!(p, s)
374
+ end
375
+ ret
376
+ end
377
+ end#Gimchi
378
+
379
+ require 'gimchi/patch_1.8'
380
+
381
+ Gimchi.setup