gimchi 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,174 +1,174 @@
1
1
  ---
2
2
  structure:
3
- chosung: [ㄱ, ㄲ, ㄴ, ㄷ, ㄸ, ㄹ, ㅁ, ㅂ, ㅃ, ㅅ, ㅆ, ㅇ, ㅈ, ㅉ, ㅊ, ㅋ, ㅌ, ㅍ, ㅎ]
4
- jungsung: [ㅏ, ㅐ, ㅑ, ㅒ, ㅓ, ㅔ, ㅕ, ㅖ, ㅗ, ㅘ, ㅙ, ㅚ, ㅛ, ㅜ, ㅝ, ㅞ, ㅟ, ㅠ, ㅡ, ㅢ, ㅣ]
5
- jongsung: [ㄱ, ㄲ, ㄳ, ㄴ, ㄵ, ㄶ, ㄷ, ㄹ, ㄺ, ㄻ, ㄼ, ㄽ, ㄾ, ㄿ, ㅀ, ㅁ, ㅂ, ㅄ, ㅅ,
6
- ㅆ, ㅇ, ㅈ, ㅊ, ㅋ, ㅌ, ㅍ, ㅎ]
3
+ chosung: [ㄱ, ㄲ, ㄴ, ㄷ, ㄸ, ㄹ, ㅁ, ㅂ, ㅃ, ㅅ, ㅆ, ㅇ, ㅈ, ㅉ, ㅊ, ㅋ, ㅌ, ㅍ, ㅎ]
4
+ jungsung: [ㅏ, ㅐ, ㅑ, ㅒ, ㅓ, ㅔ, ㅕ, ㅖ, ㅗ, ㅘ, ㅙ, ㅚ, ㅛ, ㅜ, ㅝ, ㅞ, ㅟ, ㅠ, ㅡ, ㅢ, ㅣ]
5
+ jongsung: [ㄱ, ㄲ, ㄳ, ㄴ, ㄵ, ㄶ, ㄷ, ㄹ, ㄺ, ㄻ, ㄼ, ㄽ, ㄾ, ㄿ, ㅀ, ㅁ, ㅂ, ㅄ, ㅅ,
6
+ ㅆ, ㅇ, ㅈ, ㅊ, ㅋ, ㅌ, ㅍ, ㅎ]
7
7
 
8
- fortis map:
9
- ㄱ: ㄲ
10
- ㄷ: ㄸ
11
- ㅂ: ㅃ
12
- ㅅ: ㅆ
13
- ㅈ: ㅉ
8
+ fortis map:
9
+ ㄱ: ㄲ
10
+ ㄷ: ㄸ
11
+ ㅂ: ㅃ
12
+ ㅅ: ㅆ
13
+ ㅈ: ㅉ
14
14
 
15
- double consonant map:
16
- ㄳ: [ㄱ, ㅅ]
17
- ㄵ: [ㄴ, ㅈ]
18
- ㄶ: [ㄴ, ㅎ]
19
- ㄺ: [ㄹ, ㄱ]
20
- ㄻ: [ㄹ, ㅁ]
21
- ㄼ: [ㄹ, ㅂ]
22
- ㄽ: [ㄹ, ㅅ]
23
- ㄾ: [ㄹ, ㅌ]
24
- ㄿ: [ㄹ, ㅍ]
25
- ㅀ: [ㄹ, ㅎ]
26
- ㅄ: [ㅂ, ㅅ]
15
+ double consonant map:
16
+ ㄳ: [ㄱ, ㅅ]
17
+ ㄵ: [ㄴ, ㅈ]
18
+ ㄶ: [ㄴ, ㅎ]
19
+ ㄺ: [ㄹ, ㄱ]
20
+ ㄻ: [ㄹ, ㅁ]
21
+ ㄼ: [ㄹ, ㅂ]
22
+ ㄽ: [ㄹ, ㅅ]
23
+ ㄾ: [ㄹ, ㅌ]
24
+ ㄿ: [ㄹ, ㅍ]
25
+ ㅀ: [ㄹ, ㅎ]
26
+ ㅄ: [ㅂ, ㅅ]
27
27
 
28
28
  pronouncer:
29
- jongsung sound:
30
- ㄱ: ㄱ
31
- ㄲ: ㄱ
32
- ㄳ: ㄱ
33
- ㄴ: ㄴ
34
- ㄵ: ㄴ
35
- ㄶ: ㄴ
36
- ㄷ: ㄷ
37
- ㄹ: ㄹ
38
- ㄺ: ㄱ
39
- ㄻ: ㅁ
40
- ㄼ: ㄹ
41
- ㄽ: ㄹ
42
- ㄾ: ㅌ
43
- ㄿ: ㅂ
44
- ㅀ: ㄹ
45
- ㅁ: ㅁ
46
- ㅂ: ㅂ
47
- ㅄ: ㅂ
48
- ㅅ: ㄷ
49
- ㅆ: ㄷ
50
- ㅇ: ㅇ
51
- ㅈ: ㄷ
52
- ㅊ: ㄷ
53
- ㅋ: ㄱ
54
- ㅌ: ㄷ
55
- ㅍ: ㅂ
56
- ㅎ:
57
- transformation:
58
- # changing the order affects the quality of the transformation
59
- sequence for 1:
60
- - rule_5_1
61
- - rule_5_3
29
+ jongsung sound:
30
+ ㄱ: ㄱ
31
+ ㄲ: ㄱ
32
+ ㄳ: ㄱ
33
+ ㄴ: ㄴ
34
+ ㄵ: ㄴ
35
+ ㄶ: ㄴ
36
+ ㄷ: ㄷ
37
+ ㄹ: ㄹ
38
+ ㄺ: ㄱ
39
+ ㄻ: ㅁ
40
+ ㄼ: ㄹ
41
+ ㄽ: ㄹ
42
+ ㄾ: ㅌ
43
+ ㄿ: ㅂ
44
+ ㅀ: ㄹ
45
+ ㅁ: ㅁ
46
+ ㅂ: ㅂ
47
+ ㅄ: ㅂ
48
+ ㅅ: ㄷ
49
+ ㅆ: ㄷ
50
+ ㅇ: ㅇ
51
+ ㅈ: ㄷ
52
+ ㅊ: ㄷ
53
+ ㅋ: ㄱ
54
+ ㅌ: ㄷ
55
+ ㅍ: ㅂ
56
+ ㅎ:
57
+ transformation:
58
+ # changing the order affects the quality of the transformation
59
+ sequence for 1:
60
+ - rule_5_1
61
+ - rule_5_3
62
62
 
63
- sequence for 2:
64
- - rule_16
65
- - rule_17
66
- - rule_18
67
- - rule_19
68
- - rule_5_1
69
- - rule_5_3
70
- - rule_30
71
- - rule_23
72
- - rule_24
73
- - rule_25
74
- - rule_12
75
- - rule_20
76
- - rule_10
77
- - rule_27
78
- - rule_9
79
- - rule_11
80
- - rule_14
81
- - rule_13
82
- - rule_15
83
- blocking rule:
84
- rule_16: [rule_30]
63
+ sequence for 2:
64
+ - rule_16
65
+ - rule_17
66
+ - rule_18
67
+ - rule_19
68
+ - rule_5_1
69
+ - rule_5_3
70
+ - rule_30
71
+ - rule_23
72
+ - rule_24
73
+ - rule_25
74
+ - rule_12
75
+ - rule_20
76
+ - rule_10
77
+ - rule_27
78
+ - rule_9
79
+ - rule_11
80
+ - rule_14
81
+ - rule_13
82
+ - rule_15
83
+ blocking rule:
84
+ rule_16: [rule_30]
85
85
 
86
86
  number:
87
- positive: 플러스
88
- negative: 마이너스
89
- decimal point: 점
90
- units: ["", 만, 억, 조, 경, 해, 자, 양, 구, 간, 정, 재, 극, 항하사, 아승기, 나유타, 불가사의, 무량대수]
91
- digits: [영, 일, 이, 삼, 사, 오, 육, 칠, 팔, 구]
92
- post substitution:
93
- "^일만": 만
87
+ positive: 플러스
88
+ negative: 마이너스
89
+ decimal point: 점
90
+ units: ["", 만, 억, 조, 경, 해, 자, 양, 구, 간, 정, 재, 극, 항하사, 아승기, 나유타, 불가사의, 무량대수]
91
+ digits: [영, 일, 이, 삼, 사, 오, 육, 칠, 팔, 구]
92
+ post substitution:
93
+ "^일만": 만
94
94
 
95
- # 정수형일 때 또다른 표현법 (나이, 시간)
96
- alt notation:
97
- when suffix:
98
- 개:
99
- max:
100
- 명:
101
- max:
102
- 살:
103
- max:
104
- 시:
105
- max: 12
106
- tenfolds: [열, 스물, 서른, 마흔, 쉰, 예순, 일흔, 여든, 아흔, 백]
107
- digits: ["", 한, 두, 세, 네, 다섯, 여섯, 일곱, 여덟, 아홉]
108
- post substitution:
109
- "스물$": 스무
95
+ # 정수형일 때 또다른 표현법 (나이, 시간)
96
+ alt notation:
97
+ when suffix:
98
+ 개:
99
+ max:
100
+ 명:
101
+ max:
102
+ 살:
103
+ max:
104
+ 시:
105
+ max: 12
106
+ tenfolds: [열, 스물, 서른, 마흔, 쉰, 예순, 일흔, 여든, 아흔, 백]
107
+ digits: ["", 한, 두, 세, 네, 다섯, 여섯, 일곱, 여덟, 아홉]
108
+ post substitution:
109
+ "스물$": 스무
110
110
 
111
111
  romanization:
112
- chosung:
113
- ㄱ: g
114
- ㄲ: kk
115
- ㅋ: k
116
- ㄷ: d
117
- ㄸ: tt
118
- ㅌ: t
119
- ㅂ: b
120
- ㅃ: pp
121
- ㅍ: p
122
- ㅈ: j
123
- ㅉ: jj
124
- ㅊ: ch
125
- ㅅ: s
126
- ㅆ: ss
127
- ㅎ: h
128
- ㄴ: n
129
- ㅁ: m
130
- ㄹ: r
131
- ㅇ: "-"
132
- jungsung:
133
- ㅏ: a
134
- ㅓ: eo
135
- ㅗ: o
136
- ㅜ: u
137
- ㅡ: eu
138
- ㅣ: i
139
- ㅐ: ae
140
- ㅔ: e
141
- ㅚ: oe
142
- ㅟ: wi
143
- ㅑ: ya
144
- ㅕ: yeo
145
- ㅛ: yo
146
- ㅠ: yu
147
- ㅒ: yae
148
- ㅖ: ye
149
- ㅘ: wa
150
- ㅙ: wae
151
- ㅝ: wo
152
- ㅞ: we
153
- ㅢ: ui
154
- jongsung:
155
- ㄱ: k
156
- ㄴ: n-
157
- ㄷ: t
158
- ㄹ: l
159
- ㅁ: m
160
- ㅂ: p
161
- ㅇ: ng
162
- post substitution:
163
- # 제2항 [붙임 2]‘ㄹ’은 모음 앞에서는 ‘r’로, 자음 앞이나 어말에서는
164
- # ‘l’로 적는다. 단, ‘ㄹㄹ’은 ‘ll’로 적는다.
165
- lr: ll
166
- "-w": w
167
- "-y": y
168
- kkk: k-kk
169
- ttt: t-tt
170
- ppp: p-pp
171
- "--": "-"
172
- "n-([^gaeiou])": "n\\1"
173
- "-(\\s)": "\\1"
174
- "-$": ""
112
+ chosung:
113
+ ㄱ: g
114
+ ㄲ: kk
115
+ ㅋ: k
116
+ ㄷ: d
117
+ ㄸ: tt
118
+ ㅌ: t
119
+ ㅂ: b
120
+ ㅃ: pp
121
+ ㅍ: p
122
+ ㅈ: j
123
+ ㅉ: jj
124
+ ㅊ: ch
125
+ ㅅ: s
126
+ ㅆ: ss
127
+ ㅎ: h
128
+ ㄴ: n
129
+ ㅁ: m
130
+ ㄹ: r
131
+ ㅇ: "-"
132
+ jungsung:
133
+ ㅏ: a
134
+ ㅓ: eo
135
+ ㅗ: o
136
+ ㅜ: u
137
+ ㅡ: eu
138
+ ㅣ: i
139
+ ㅐ: ae
140
+ ㅔ: e
141
+ ㅚ: oe
142
+ ㅟ: wi
143
+ ㅑ: ya
144
+ ㅕ: yeo
145
+ ㅛ: yo
146
+ ㅠ: yu
147
+ ㅒ: yae
148
+ ㅖ: ye
149
+ ㅘ: wa
150
+ ㅙ: wae
151
+ ㅝ: wo
152
+ ㅞ: we
153
+ ㅢ: ui
154
+ jongsung:
155
+ ㄱ: k
156
+ ㄴ: n-
157
+ ㄷ: t
158
+ ㄹ: l
159
+ ㅁ: m
160
+ ㅂ: p
161
+ ㅇ: ng
162
+ post substitution:
163
+ # 제2항 [붙임 2]‘ㄹ’은 모음 앞에서는 ‘r’로, 자음 앞이나 어말에서는
164
+ # ‘l’로 적는다. 단, ‘ㄹㄹ’은 ‘ll’로 적는다.
165
+ lr: ll
166
+ "-w": w
167
+ "-y": y
168
+ kkk: k-kk
169
+ ttt: t-tt
170
+ ppp: p-pp
171
+ "--": "-"
172
+ "n-([^gaeiou])": "n\\1"
173
+ "-(\\s)": "\\1"
174
+ "-$": ""
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # Junegunn Choi (junegunn.c@gmail.com)
4
+ # 2011/04/02-
5
+
6
+ # A dirty little script to fetch test sets from http://www.korean.go.kr
7
+
8
+ require 'open-uri'
9
+ require 'yaml'
10
+
11
+ # Crawl romanization test set
12
+ rdata = open('http://www.korean.go.kr/09_new/dic/rule/rule_roman_0101.jsp').read.
13
+ scan(%r{th>(.*?)</td}m).flatten.map { |e| e.split %r{<.*>}m }.
14
+ select { |e| e.length == 2 }
15
+
16
+ File.open(File.dirname(__FILE__) + '/../test/romanization.yml', 'w') do | f |
17
+ f.puts "---"
18
+
19
+ rdata.each do | arr |
20
+ f.puts "\"#{arr.first}\": \"#{arr.last}\""
21
+ end
22
+ end
23
+
24
+ exit
25
+
26
+ # Crawl pronunciation test set
27
+ m = {}
28
+ %w[
29
+ http://www.korean.go.kr/09_new/dic/rule/rule02_0202.jsp
30
+ http://www.korean.go.kr/09_new/dic/rule/rule02_0204.jsp
31
+ http://www.korean.go.kr/09_new/dic/rule/rule02_0205.jsp
32
+ http://www.korean.go.kr/09_new/dic/rule/rule02_0206.jsp
33
+ http://www.korean.go.kr/09_new/dic/rule/rule02_0207.jsp
34
+ ].each do | url |
35
+ open(url).read.scan(/>([^0-9<>);?]+?)\[(.*?)\]</).each do | match |
36
+ puts match[0, 2].join(' => ')
37
+ m[match[0]] = match[1]
38
+ end
39
+ end
40
+
41
+ File.open(File.dirname(__FILE__) + '/../test/pronunciation.yml', 'w') do | f |
42
+ f.puts "---"
43
+ m.each do | k, v |
44
+ k = k.sub(/.*→/, '').gsub(/-/, '')
45
+ v = v.sub(/.*→/, '').gsub(/[\(:ː\)]/, '').split(%r{[/∼]})
46
+ f.puts "\"#{k}\": [#{v.join(', ')}]"
47
+ end
48
+ end
49
+
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = %q{gimchi}
7
+ gem.version = "0.2.0"
8
+ gem.authors = ["Junegunn Choi"]
9
+ gem.email = ["junegunn.c@gmail.com"]
10
+ gem.description = %q{A Ruby gem for Korean characters}
11
+ gem.summary = %q{A Ruby gem for Korean characters}
12
+ gem.homepage = "https://github.com/junegunn/gimchi"
13
+
14
+ gem.files = `git ls-files`.split($/).reject { |f| f =~ %r[^viz/] }
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
+ gem.require_paths = ["lib"]
18
+ gem.license = "MIT"
19
+
20
+ gem.add_development_dependency 'ansi'
21
+ end
@@ -2,10 +2,380 @@
2
2
  # encoding: UTF-8
3
3
  # Junegunn Choi (junegunn.c@gmail.com)
4
4
 
5
- require 'gimchi/korean'
5
+ require 'yaml'
6
+ require 'set'
6
7
  require 'gimchi/char'
7
8
  require 'gimchi/pronouncer'
8
9
 
9
- if RUBY_VERSION =~ /^1\.8\./
10
- require 'gimchi/patch_1.8'
11
- end
10
+ class Gimchi
11
+ class << self
12
+ def setup
13
+ @@default ||= Gimchi.new
14
+ end
15
+
16
+ def Char ch
17
+ @@default.kchar ch
18
+ end
19
+
20
+ [
21
+ :decompose,
22
+ :compose,
23
+ :korean_char?,
24
+ :complete_korean_char?,
25
+ :kchar,
26
+ :kchar?,
27
+ :chosung?,
28
+ :jungsung?,
29
+ :jongsung?,
30
+ :read_number,
31
+ :pronounce,
32
+ :romanize
33
+ ].each do |sym|
34
+ define_method(sym) do |*arg, &b|
35
+ @@default.send sym, *arg, &b
36
+ end
37
+ end
38
+ end
39
+
40
+ CONFIG_FILE_PATH = File.expand_path('../../config/default.yml', __FILE__)
41
+ attr_reader :config, :chosungs, :jungsungs, :jongsungs
42
+
43
+ # Initialize Gimchi::Korean.
44
+ def initialize
45
+ symbolize_keys = lambda do |val|
46
+ case val
47
+ when Hash
48
+ {}.tap do |h|
49
+ val.each do |k, v|
50
+ k = k.gsub(' ', '_').to_sym if k =~ /[a-z0-9 ]/
51
+ h[k] = symbolize_keys.call v
52
+ end
53
+ end
54
+ when Array
55
+ val.map { |v| symbolize_keys.call v }
56
+ else
57
+ val
58
+ end
59
+ end
60
+ @config = symbolize_keys.call YAML.load(File.read CONFIG_FILE_PATH)
61
+
62
+ [
63
+ @config[:romanization][:post_substitution],
64
+ @config[:number][:post_substitution],
65
+ @config[:number][:alt_notation][:post_substitution]
66
+ ].each do |r|
67
+ r.keys.each do |k|
68
+ r[Regexp.compile k.to_s] = r.delete k
69
+ end
70
+ end
71
+ @config.freeze
72
+
73
+ @pronouncer = Gimchi::Pronouncer.send :new, self
74
+
75
+ @chosungs = config[:structure][:chosung]
76
+ @jungsungs = config[:structure][:jungsung]
77
+ @jongsungs = config[:structure][:jongsung]
78
+ @chosung_set = Set[*@chosungs]
79
+ @jungsung_set = Set[*@jungsungs]
80
+ @jongsung_set = Set[*@jongsungs]
81
+ @all = @chosung_set + @jungsung_set + @jongsung_set
82
+ end
83
+
84
+ # Decompose a Korean character into 3 components
85
+ # @param [String] ch Korean character
86
+ # @return [Array]
87
+ def decompose ch
88
+ kchar(ch).to_a
89
+ end
90
+
91
+ # Compose 3 elements into a Korean character String
92
+ # @param [String] chosung
93
+ # @param [String] jungsung
94
+ # @param [String] jongsung
95
+ # @return [String]
96
+ def compose chosung, jungsung = nil, jongsung = nil
97
+ if chosung.nil? && jungsung.nil?
98
+ ""
99
+ elsif chosung && jungsung
100
+ n1, n2, n3 =
101
+ n1 = chosungs.index(chosung) || 0
102
+ n2 = jungsungs.index(jungsung) || 0
103
+ n3 = ([nil] + jongsungs).index(jongsung) || 0
104
+ [ 0xAC00 + n1 * (21 * 28) + n2 * 28 + n3 ].pack('U')
105
+ else
106
+ chosung || jungsung
107
+ end
108
+ end
109
+
110
+ # @param [String] ch
111
+ # @return [Boolean]
112
+ def chosung? ch
113
+ @chosung_set.include? ch
114
+ end
115
+
116
+ # @param [String] ch
117
+ # @return [Boolean]
118
+ def jungsung? ch
119
+ @jungsung_set.include? ch
120
+ end
121
+
122
+ # @param [String] ch
123
+ # @return [Boolean]
124
+ def jongsung? ch
125
+ @jongsung_set.include? ch
126
+ end
127
+
128
+ # Checks if the given character is a korean character.
129
+ # @param [String] ch A string of size 1
130
+ def korean_char? ch
131
+ raise ArgumentError.new('Lengthy input') if str_length(ch) > 1
132
+
133
+ complete_korean_char?(ch) || @all.include?(ch)
134
+ end
135
+ alias kchar? korean_char?
136
+
137
+ # Checks if the given character is a "complete" korean character.
138
+ # "Complete" Korean character must have chosung and jungsung, with optional jongsung.
139
+ # @param [String] ch A string of size 1
140
+ def complete_korean_char? ch
141
+ raise ArgumentError.new('Lengthy input') if str_length(ch) > 1
142
+
143
+ # Range of Korean chracters in Unicode 2.0: AC00(가) ~ D7A3(힣)
144
+ ch.unpack('U').all? { | c | c >= 0xAC00 && c <= 0xD7A3 }
145
+ end
146
+
147
+ # Returns a Gimchi::Char object for the given Korean character.
148
+ # @param [String] ch Korean character in String
149
+ # @return [Gimchi::Char] Gimchi::Char instance
150
+ def kchar ch
151
+ Gimchi::Char.new(self, ch)
152
+ end
153
+
154
+ # Reads numeric expressions in Korean way.
155
+ # @param [String, Number] str Numeric type or String containing numeric expressions
156
+ # @return [String] Output string
157
+ def read_number str
158
+ str.to_s.gsub(/(([+-]\s*)?[0-9,]*,*[0-9]+(\.[0-9]+(e[+-][0-9]+)?)?)(\s*.)?/) {
159
+ read_number_sub($1, $5)
160
+ }
161
+ end
162
+
163
+ # Returns the pronunciation of the given string containing Korean characters.
164
+ # Takes optional options hash.
165
+ #
166
+ # @param [String] Input string
167
+ # @param [Hash] options Options
168
+ # @option options [Boolean] each_char Each character of the string is pronounced respectively.
169
+ # @option options [Boolean] slur Strings separated by whitespaces are processed again as if they were contiguous.
170
+ # @option options [Boolean] number Numberic parts of the string is also pronounced in Korean.
171
+ # @option options [Array] except Allows you to skip certain transformations.
172
+ # @return [String] Output string
173
+ def pronounce str, options = {}
174
+ options = {
175
+ :each_char => false,
176
+ :slur => false,
177
+ :number => true,
178
+ :except => [],
179
+ :debug => false
180
+ }.merge options
181
+
182
+ str = read_number(str) if options[:number]
183
+
184
+ result, transforms = @pronouncer.send :pronounce!, str, options
185
+
186
+ if options[:debug]
187
+ return result, transforms
188
+ else
189
+ return result
190
+ end
191
+ end
192
+
193
+ # Returns the romanization (alphabetical notation) of the given Korean string.
194
+ # http://en.wikipedia.org/wiki/Korean_romanization
195
+ # @param [String] str Input Korean string
196
+ # @param [Hash] options Options
197
+ # @option options [Boolean] as_pronounced If true, #pronounce is internally called before romanize
198
+ # @option options [Boolean] number Whether to read numeric expressions in the string
199
+ # @option options [Boolean] slur Same as :slur in #pronounce
200
+ # @return [String] Output string in Roman Alphabet
201
+ # @see Korean#pronounce
202
+ def romanize str, options = {}
203
+ options = {
204
+ :as_pronounced => true,
205
+ :number => true,
206
+ :slur => false
207
+ }.merge options
208
+
209
+ rdata = config[:romanization]
210
+ post_subs = rdata[:post_substitution]
211
+ rdata = [rdata[:chosung], rdata[:jungsung], rdata[:jongsung]]
212
+
213
+ str = pronounce str,
214
+ :each_char => !options[:as_pronounced],
215
+ :number => options[:number],
216
+ :slur => options[:slur],
217
+ # 제1항 [붙임 1] ‘ㅢ’는 ‘ㅣ’로 소리 나더라도 ‘ui’로 적는다.
218
+ :except => %w[rule_5_3]
219
+ dash = rdata[0]["ㅇ"]
220
+ romanization = ""
221
+
222
+ romanize_chunk = lambda do |chunk|
223
+ chunk.each_char.map { |ch| kchar(ch) rescue ch }.each do |kc|
224
+ kc.to_a.each_with_index do |comp, idx|
225
+ next if comp.nil?
226
+ comp = rdata[idx][comp] || comp
227
+ comp = comp[1..-1] if comp[0, 1] == dash &&
228
+ (romanization.empty? || romanization[-1, 1] =~ /\s/)
229
+ romanization += comp
230
+ end
231
+ end
232
+
233
+ return post_subs.keys.inject(romanization) { | output, pattern |
234
+ output.gsub(pattern, post_subs[pattern])
235
+ }
236
+ end
237
+
238
+ k_chunk = ""
239
+ str.each_char do | c |
240
+ if korean_char? c
241
+ k_chunk += c
242
+ else
243
+ unless k_chunk.empty?
244
+ romanization = romanize_chunk.call k_chunk
245
+ k_chunk = ""
246
+ end
247
+ romanization += c
248
+ end
249
+ end
250
+ romanization = romanize_chunk.call k_chunk unless k_chunk.empty?
251
+ romanization
252
+ end
253
+
254
+ private
255
+ def str_length str
256
+ str.length
257
+ end
258
+
259
+ def read_number_sub num, next_char
260
+ nconfig = config[:number]
261
+
262
+ if num == '0'
263
+ return nconfig[:digits].first
264
+ end
265
+
266
+ num = num.gsub(',', '')
267
+ next_char = next_char.to_s
268
+ is_float = num.match(/[\.e]/) != nil
269
+
270
+ # Alternative notation for integers with proper suffix
271
+ alt = false
272
+ if is_float == false &&
273
+ nconfig[:alt_notation][:when_suffix].keys.include?(next_char.strip)
274
+ max = nconfig[:alt_notation][:when_suffix][next_char.strip][:max]
275
+
276
+ if max.nil? || num.to_i <= max
277
+ alt = true
278
+ end
279
+ end
280
+
281
+ # Sign
282
+ sign = []
283
+ negative = false
284
+ if num =~ /^-/
285
+ num = num.sub(/^-\s*/, '')
286
+ sign << nconfig[:negative]
287
+ negative = true
288
+ elsif num =~ /^\+/
289
+ num = num.sub(/^\+\s*/, '')
290
+ sign << nconfig[:positive]
291
+ end
292
+
293
+ if is_float
294
+ below = nconfig[:decimal_point]
295
+ below = nconfig[:digits][0] + below if num.to_f < 1
296
+
297
+ if md = num.match(/(.*)e(.*)/)
298
+ dp = md[1].index('.')
299
+ num = md[1].tr '.', ''
300
+ exp = md[2].to_i
301
+
302
+ dp += exp
303
+ if dp > num.length
304
+ num = num.ljust(dp, '0')
305
+ num = num.sub(/^0+([1-9])/, "\\1")
306
+
307
+ below = ""
308
+ elsif dp < 0
309
+ num = '0.' + '0' * (-dp) + num
310
+ else
311
+ num[dp, 1] = '.' + num[dp, 1]
312
+ end
313
+ end
314
+ num.sub(/.*\./, '').each_char do | char |
315
+ below += nconfig[:digits][char.to_i]
316
+ end if num.include? '.'
317
+ num = num.sub(/\..*/, '')
318
+ else
319
+ below = ""
320
+ end
321
+
322
+ tokens = []
323
+ unit_idx = -1
324
+ num = num.to_i
325
+ while num > 0
326
+ v = num % 10000
327
+
328
+ unit_idx += 1
329
+ if v > 0
330
+ if alt == false || unit_idx >= 1
331
+ str = ""
332
+ # Cannot use hash as they're unordered in 1.8
333
+ [[1000, '천'],
334
+ [100, '백'],
335
+ [10, '십']].each do | arr |
336
+ u, sub_unit = arr
337
+ str += (nconfig[:digits][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
338
+ v %= u
339
+ end
340
+ str += nconfig[:digits][v] if v > 0
341
+
342
+ raise RangeError, "number too large" unless nconfig[:units][unit_idx]
343
+ tokens << str.sub(/ $/, '') + nconfig[:units][unit_idx]
344
+ else
345
+ str = ""
346
+ tenfolds = nconfig[:alt_notation][:tenfolds]
347
+ digits = nconfig[:alt_notation][:digits]
348
+ alt_post_subs = nconfig[:alt_notation][:post_substitution]
349
+
350
+ # Likewise.
351
+ [[1000, '천'],
352
+ [100, '백']].each do |u, sub_unit|
353
+ str += (nconfig[:digits][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
354
+ v %= u
355
+ end
356
+
357
+ str += tenfolds[(v / 10) - 1] if v / 10 > 0
358
+ v %= 10
359
+ str += digits[v] if v > 0
360
+
361
+ alt_post_subs.each do |p, s|
362
+ str.gsub!(p, s)
363
+ end if alt
364
+ tokens << str.sub(/ $/, '') + nconfig[:units][unit_idx]
365
+ end
366
+ end
367
+ num /= 10000
368
+ end
369
+
370
+ tokens += sign unless sign.empty?
371
+ ret = tokens.reverse.join(' ') + below + next_char
372
+ nconfig[:post_substitution].each do |p, s|
373
+ ret.gsub!(p, s)
374
+ end
375
+ ret
376
+ end
377
+ end#Gimchi
378
+
379
+ require 'gimchi/patch_1.8'
380
+
381
+ Gimchi.setup