gimchi 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +17 -0
- data/CHANGELOG.rdoc +42 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +3 -1
- data/{README.ko.markdown → README.ko.md} +68 -66
- data/README.md +162 -0
- data/Rakefile +7 -0
- data/config/default.yml +162 -162
- data/crawler/crawler.rb +49 -0
- data/gimchi.gemspec +21 -0
- data/lib/gimchi.rb +374 -4
- data/lib/gimchi/char.rb +26 -38
- data/lib/gimchi/patch_1.8.rb +9 -9
- data/lib/gimchi/pronouncer.rb +26 -27
- data/test/helper.rb +1 -0
- data/test/test_gimchi.rb +114 -86
- metadata +23 -51
- data/README.markdown +0 -155
- data/lib/gimchi/korean.rb +0 -323
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gimchi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,33 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-03-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
requirement:
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: 1.0.0
|
22
|
-
type: :development
|
23
|
-
prerelease: false
|
24
|
-
version_requirements: *2158357040
|
25
|
-
- !ruby/object:Gem::Dependency
|
26
|
-
name: jeweler
|
27
|
-
requirement: &2158358840 !ruby/object:Gem::Requirement
|
28
|
-
none: false
|
29
|
-
requirements:
|
30
|
-
- - ~>
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: 1.5.2
|
33
|
-
type: :development
|
34
|
-
prerelease: false
|
35
|
-
version_requirements: *2158358840
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: rcov
|
38
|
-
requirement: &2158315180 !ruby/object:Gem::Requirement
|
15
|
+
name: ansi
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
39
17
|
none: false
|
40
18
|
requirements:
|
41
19
|
- - ! '>='
|
@@ -43,42 +21,39 @@ dependencies:
|
|
43
21
|
version: '0'
|
44
22
|
type: :development
|
45
23
|
prerelease: false
|
46
|
-
version_requirements:
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
|
-
name: ansi
|
49
|
-
requirement: &2154666280 !ruby/object:Gem::Requirement
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
25
|
none: false
|
51
26
|
requirements:
|
52
27
|
- - ! '>='
|
53
28
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
description: Gimchi knows how to pronounce Korean strings and how to write them in
|
59
|
-
roman alphabet.
|
60
|
-
email: junegunn.c@gmail.com
|
29
|
+
version: '0'
|
30
|
+
description: A Ruby gem for Korean characters
|
31
|
+
email:
|
32
|
+
- junegunn.c@gmail.com
|
61
33
|
executables: []
|
62
34
|
extensions: []
|
63
|
-
extra_rdoc_files:
|
64
|
-
- LICENSE.txt
|
65
|
-
- README.ko.markdown
|
66
|
-
- README.markdown
|
35
|
+
extra_rdoc_files: []
|
67
36
|
files:
|
37
|
+
- .document
|
38
|
+
- .gitignore
|
39
|
+
- CHANGELOG.rdoc
|
40
|
+
- Gemfile
|
41
|
+
- LICENSE.txt
|
42
|
+
- README.ko.md
|
43
|
+
- README.md
|
44
|
+
- Rakefile
|
68
45
|
- config/default.yml
|
46
|
+
- crawler/crawler.rb
|
47
|
+
- gimchi.gemspec
|
69
48
|
- lib/gimchi.rb
|
70
49
|
- lib/gimchi/char.rb
|
71
|
-
- lib/gimchi/korean.rb
|
72
50
|
- lib/gimchi/patch_1.8.rb
|
73
51
|
- lib/gimchi/pronouncer.rb
|
74
|
-
- LICENSE.txt
|
75
|
-
- README.ko.markdown
|
76
|
-
- README.markdown
|
77
52
|
- test/helper.rb
|
78
53
|
- test/pronunciation.yml
|
79
54
|
- test/romanization.yml
|
80
55
|
- test/test_gimchi.rb
|
81
|
-
homepage:
|
56
|
+
homepage: https://github.com/junegunn/gimchi
|
82
57
|
licenses:
|
83
58
|
- MIT
|
84
59
|
post_install_message:
|
@@ -91,9 +66,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
91
66
|
- - ! '>='
|
92
67
|
- !ruby/object:Gem::Version
|
93
68
|
version: '0'
|
94
|
-
segments:
|
95
|
-
- 0
|
96
|
-
hash: -4061568131035276090
|
97
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
70
|
none: false
|
99
71
|
requirements:
|
@@ -102,10 +74,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
102
74
|
version: '0'
|
103
75
|
requirements: []
|
104
76
|
rubyforge_project:
|
105
|
-
rubygems_version: 1.8.
|
77
|
+
rubygems_version: 1.8.25
|
106
78
|
signing_key:
|
107
79
|
specification_version: 3
|
108
|
-
summary:
|
80
|
+
summary: A Ruby gem for Korean characters
|
109
81
|
test_files:
|
110
82
|
- test/helper.rb
|
111
83
|
- test/pronunciation.yml
|
data/README.markdown
DELETED
@@ -1,155 +0,0 @@
|
|
1
|
-
# gimchi
|
2
|
-
|
3
|
-
Gimchi is a simple Ruby gem which knows how to handle Korean strings. It knows
|
4
|
-
how to dissect Korean characters into its 3 components, namely chosung,
|
5
|
-
jungsung and optional jongsung. It knows how Korean sentences are pronounced
|
6
|
-
and how they're written in roman alphabet.
|
7
|
-
|
8
|
-
Gimchi (only partially) implements the following rules dictated by
|
9
|
-
The National Institute of The Korean Language (http://www.korean.go.kr)
|
10
|
-
* Korean Standard Pronunciation
|
11
|
-
* Korean Romanization
|
12
|
-
|
13
|
-
## Installation
|
14
|
-
```
|
15
|
-
gem install gimchi
|
16
|
-
```
|
17
|
-
|
18
|
-
## Usage
|
19
|
-
|
20
|
-
### Creating Gimchi::Korean instance
|
21
|
-
```ruby
|
22
|
-
require 'gimchi'
|
23
|
-
|
24
|
-
ko = Gimchi::Korean.new
|
25
|
-
```
|
26
|
-
|
27
|
-
### Checks if the given character is in Korean alphabet
|
28
|
-
```ruby
|
29
|
-
ko.korean_char? 'ㄱ' # true
|
30
|
-
ko.complete_korean_char? 'ㄱ' # false
|
31
|
-
|
32
|
-
ko.korean_char? 'ㅏ' # true
|
33
|
-
ko.complete_korean_char? 'ㅏ' # false
|
34
|
-
|
35
|
-
ko.korean_char? '가' # true
|
36
|
-
ko.complete_korean_char? '가' # true
|
37
|
-
|
38
|
-
# Shorthand of korean_char?
|
39
|
-
ko.kchar? '가' # true
|
40
|
-
```
|
41
|
-
|
42
|
-
### Usage of Gimchi::Korean::Char
|
43
|
-
```ruby
|
44
|
-
kc = ko.kchar "한"
|
45
|
-
kc.class # Gimchi::Korean::Char
|
46
|
-
|
47
|
-
kc.chosung # "ㅎ"
|
48
|
-
kc.jungsung # "ㅏ"
|
49
|
-
kc.jongsung # "ㄴ"
|
50
|
-
kc.to_a # ["ㅎ", "ㅏ", "ㄴ"]
|
51
|
-
kc.to_s # "한"
|
52
|
-
|
53
|
-
kc.complete? # true
|
54
|
-
kc.partial? # false
|
55
|
-
ko.kchar("ㅏ").partial? # true
|
56
|
-
|
57
|
-
# Modifying its elements
|
58
|
-
kc.chosung = 'ㄷ'
|
59
|
-
kc.jongsung = 'ㄹ'
|
60
|
-
kc.to_s # "달"
|
61
|
-
kc.complete? # true
|
62
|
-
kc.partial? # false
|
63
|
-
|
64
|
-
kc.chosung = nil
|
65
|
-
kc.jongsung = nil
|
66
|
-
kc.complete? # false
|
67
|
-
kc.partial? # true
|
68
|
-
|
69
|
-
# Alias of kchar
|
70
|
-
kc = ko.korean_char "한"
|
71
|
-
|
72
|
-
# Array of Gimchi::Korean::Char's
|
73
|
-
arr = ko.convert '이것은 한글입니다.'
|
74
|
-
# [이, 것, 은, " ", 한, 글, 입, 니, 다, "."]
|
75
|
-
|
76
|
-
arr[0].class # Gimchi::Korean::Char
|
77
|
-
|
78
|
-
# Dissects given String
|
79
|
-
arr = ko.dissect '이것은 한글입니다.'
|
80
|
-
# ["ㅇ", "ㅣ", "ㄱ", "ㅓ", "ㅅ", "ㅇ", "ㅡ", "ㄴ", " ",
|
81
|
-
# "H", "a", "n", "g", "u", "l", " ", "ㅇ", "ㅣ", "ㅂ",
|
82
|
-
# "ㄴ", "ㅣ", "ㄷ", "ㅏ", "."]
|
83
|
-
```
|
84
|
-
|
85
|
-
### Reading numbers in Korean
|
86
|
-
```ruby
|
87
|
-
ko.read_number(1999) # "천 구백 구십 구"
|
88
|
-
ko.read_number(- 100.123) # "마이너스 백점일이삼"
|
89
|
-
ko.read_number("153,191,100,678.3214")
|
90
|
-
# "천 오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사"
|
91
|
-
|
92
|
-
# Age, Time ( -살, -시 )
|
93
|
-
ko.read_number("20살") # "스무살"
|
94
|
-
ko.read_number("13 살") # "열세 살"
|
95
|
-
ko.read_number("7시 30분") # "일곱시 삼십분"
|
96
|
-
```
|
97
|
-
|
98
|
-
### Standard pronunciation (partially implemented)
|
99
|
-
```ruby
|
100
|
-
str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고"
|
101
|
-
ko.pronounce str
|
102
|
-
# "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 모라너코"
|
103
|
-
|
104
|
-
ko.pronounce str, :slur => true
|
105
|
-
# "돼써 돼써 이제 그런 가르치믄 돼써 매이 라치 밀곱 씨 삼십 뿐까지 우릴 조그만 교실로 모라너코"
|
106
|
-
|
107
|
-
ko.pronounce str, :pronounce_each_char => true
|
108
|
-
# "됃어 됃어 이제 그런 가르침은 됃어 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 몰아너고"
|
109
|
-
|
110
|
-
ko.pronounce str, :number => false
|
111
|
-
# "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 7 시 30 분까지 우릴 조그만 교실로 모라너코"
|
112
|
-
```
|
113
|
-
|
114
|
-
### Romanization (partially implemented)
|
115
|
-
```ruby
|
116
|
-
str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고"
|
117
|
-
|
118
|
-
ko.romanize str
|
119
|
-
# "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo moraneoko"
|
120
|
-
ko.romanize str, :slur => true
|
121
|
-
# "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-i rachi milgop ssi samsip ppunkkaji uril jogeuman gyosillo moraneoko"
|
122
|
-
ko.romanize str, :as_pronounced => false
|
123
|
-
# "Dwaet-eo dwaet-eo ije geureon gareuchim-eun dwaet-eo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo mol-aneogo"
|
124
|
-
ko.romanize str, :number => false
|
125
|
-
# "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim 7 si 30 bunkkaji uril jogeuman gyosillo moraneoko"
|
126
|
-
```
|
127
|
-
|
128
|
-
## Limitation of the implementation
|
129
|
-
|
130
|
-
Unfortunately in order to implement the complete specification of Korean
|
131
|
-
pronunciation and romanization, we need NLP, huge Korean dictionaries and even
|
132
|
-
semantic analysis of the given string. And even with all those complex
|
133
|
-
processing, we cannot guarantee 100% accuracy of the output. So yes, that is
|
134
|
-
definitely not what this gem tries to achieve. Gimchi tries to achieve "some"
|
135
|
-
level of accuracy with relatively simple code.
|
136
|
-
|
137
|
-
Currently, Gimchi code contains a lot of ad-hoc (possibly invalid) patches
|
138
|
-
that try to improve the quality of the output, which should better be
|
139
|
-
refactored anytime soon.
|
140
|
-
|
141
|
-
## Contributing to gimchi
|
142
|
-
|
143
|
-
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
144
|
-
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
145
|
-
* Fork the project
|
146
|
-
* Start a feature/bugfix branch
|
147
|
-
* Commit and push until you are happy with your contribution
|
148
|
-
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
149
|
-
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
150
|
-
|
151
|
-
## Copyright
|
152
|
-
|
153
|
-
Copyright (c) 2011 Junegunn Choi. See LICENSE.txt for
|
154
|
-
further details.
|
155
|
-
|
data/lib/gimchi/korean.rb
DELETED
@@ -1,323 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
|
-
module Gimchi
|
4
|
-
class Korean
|
5
|
-
DEFAULT_CONFIG_FILE_PATH =
|
6
|
-
File.dirname(__FILE__) + '/../../config/default.yml'
|
7
|
-
|
8
|
-
# Returns the YAML configuration used by this Korean instance.
|
9
|
-
# @return [String]
|
10
|
-
attr_reader :config
|
11
|
-
|
12
|
-
# Initialize Gimchi::Korean.
|
13
|
-
# @param [String] config_file You can override many parts of the implementation by customizing config file
|
14
|
-
def initialize config_file = DEFAULT_CONFIG_FILE_PATH
|
15
|
-
require 'yaml'
|
16
|
-
@config = YAML.load(File.read config_file)
|
17
|
-
|
18
|
-
[
|
19
|
-
@config['romanization']['post substitution'],
|
20
|
-
@config['number']['post substitution'],
|
21
|
-
@config['number']['alt notation']['post substitution']
|
22
|
-
].each do |r|
|
23
|
-
r.keys.each do |k|
|
24
|
-
r[Regexp.compile k] = r.delete k
|
25
|
-
end
|
26
|
-
end
|
27
|
-
@config.freeze
|
28
|
-
|
29
|
-
@pronouncer = Korean::Pronouncer.send :new, self
|
30
|
-
end
|
31
|
-
|
32
|
-
# Array of chosung's.
|
33
|
-
#
|
34
|
-
# @return [Array] Array of chosung strings
|
35
|
-
def chosungs
|
36
|
-
config['structure']['chosung']
|
37
|
-
end
|
38
|
-
|
39
|
-
# Array of jungsung's.
|
40
|
-
# @return [Array] Array of jungsung strings
|
41
|
-
def jungsungs
|
42
|
-
config['structure']['jungsung']
|
43
|
-
end
|
44
|
-
|
45
|
-
# Array of jongsung's.
|
46
|
-
# @return [Array] Array of jongsung strings
|
47
|
-
def jongsungs
|
48
|
-
config['structure']['jongsung']
|
49
|
-
end
|
50
|
-
|
51
|
-
# Checks if the given character is a korean character.
|
52
|
-
# @param [String] ch A string of size 1
|
53
|
-
def korean_char? ch
|
54
|
-
raise ArgumentError.new('Lengthy input') if str_length(ch) > 1
|
55
|
-
|
56
|
-
complete_korean_char?(ch) ||
|
57
|
-
(chosungs + jungsungs + jongsungs).include?(ch)
|
58
|
-
end
|
59
|
-
alias kchar? korean_char?
|
60
|
-
|
61
|
-
# Checks if the given character is a "complete" korean character.
|
62
|
-
# "Complete" Korean character must have chosung and jungsung, with optional jongsung.
|
63
|
-
# @param [String] ch A string of size 1
|
64
|
-
def complete_korean_char? ch
|
65
|
-
raise ArgumentError.new('Lengthy input') if str_length(ch) > 1
|
66
|
-
|
67
|
-
# Range of Korean chracters in Unicode 2.0: AC00(가) ~ D7A3(힣)
|
68
|
-
ch.unpack('U').all? { | c | c >= 0xAC00 && c <= 0xD7A3 }
|
69
|
-
end
|
70
|
-
|
71
|
-
# Splits the given string into an array of Korean::Char's and Strings of length 1.
|
72
|
-
# @param [String] str Input string.
|
73
|
-
# @return [Array] Mixed array of Korean::Char instances and Strings of length 1 (for non-korean characters)
|
74
|
-
def convert str
|
75
|
-
str.each_char.map { |c|
|
76
|
-
korean_char?(c) ? kchar(c) : c
|
77
|
-
}
|
78
|
-
end
|
79
|
-
|
80
|
-
# Splits the given string into an array of Korean character components
|
81
|
-
# @param [String] str Input string.
|
82
|
-
# @return [Array] Array of Korean character components
|
83
|
-
def dissect str
|
84
|
-
str.each_char.inject([]) { |arr, c|
|
85
|
-
arr += korean_char?(c) ? kchar(c).to_a.compact : [c]
|
86
|
-
}
|
87
|
-
end
|
88
|
-
|
89
|
-
# Returns a Korean::Char object for the given Korean character.
|
90
|
-
# @param [String] ch Korean character in String
|
91
|
-
# @return [Korean::Char] Korean::Char instance
|
92
|
-
def kchar ch
|
93
|
-
Korean::Char.new(self, ch)
|
94
|
-
end
|
95
|
-
alias korean_char kchar
|
96
|
-
|
97
|
-
# Reads numeric expressions in Korean way.
|
98
|
-
# @param [String, Number] str Numeric type or String containing numeric expressions
|
99
|
-
# @return [String] Output string
|
100
|
-
def read_number str
|
101
|
-
nconfig = config['number']
|
102
|
-
|
103
|
-
str.to_s.gsub(/(([+-]\s*)?[0-9,]*,*[0-9]+(\.[0-9]+(e[+-][0-9]+)?)?)(\s*.)?/) {
|
104
|
-
read_number_sub($1, $5)
|
105
|
-
}
|
106
|
-
end
|
107
|
-
|
108
|
-
# Returns the pronunciation of the given string containing Korean characters.
|
109
|
-
# Takes optional options hash.
|
110
|
-
#
|
111
|
-
# @param [String] Input string
|
112
|
-
# @param [Boolean] options[:pronounce_each_char] Each character of the string is pronounced respectively.
|
113
|
-
# @param [Boolean] options[:slur] Strings separated by whitespaces are processed again as if they were contiguous.
|
114
|
-
# @param [Boolean] options[:number] Numberic parts of the string is also pronounced in Korean.
|
115
|
-
# @param [Array] options[:except] Allows you to skip certain transformations.
|
116
|
-
# @return [String] Output string
|
117
|
-
def pronounce str, options = {}
|
118
|
-
options = {
|
119
|
-
:pronounce_each_char => false,
|
120
|
-
:slur => false,
|
121
|
-
:number => true,
|
122
|
-
:except => [],
|
123
|
-
:debug => false
|
124
|
-
}.merge options
|
125
|
-
|
126
|
-
str = read_number(str) if options[:number]
|
127
|
-
|
128
|
-
result, transforms = @pronouncer.send :pronounce!, str, options
|
129
|
-
|
130
|
-
if options[:debug]
|
131
|
-
return result, transforms
|
132
|
-
else
|
133
|
-
return result
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
# Returns the romanization (alphabetical notation) of the given Korean string.
|
138
|
-
# http://en.wikipedia.org/wiki/Korean_romanization
|
139
|
-
# @param [String] str Input Korean string
|
140
|
-
# @param [Boolean] options[:as_pronounced] If true, #pronounce is internally called before romanize
|
141
|
-
# @param [Boolean] options[:number] Whether to read numeric expressions in the string
|
142
|
-
# @param [Boolean] options[:slur] Same as :slur in #pronounce
|
143
|
-
# @return [String] Output string in Roman Alphabet
|
144
|
-
# @see Korean#pronounce
|
145
|
-
def romanize str, options = {}
|
146
|
-
options = {
|
147
|
-
:as_pronounced => true,
|
148
|
-
:number => true,
|
149
|
-
:slur => false
|
150
|
-
}.merge options
|
151
|
-
|
152
|
-
require 'yaml'
|
153
|
-
rdata = config['romanization']
|
154
|
-
post_subs = rdata["post substitution"]
|
155
|
-
rdata = [rdata["chosung"], rdata["jungsung"], rdata["jongsung"]]
|
156
|
-
|
157
|
-
str = pronounce str,
|
158
|
-
:pronounce_each_char => !options[:as_pronounced],
|
159
|
-
:number => options[:number],
|
160
|
-
:slur => options[:slur],
|
161
|
-
# 제1항 [붙임 1] ‘ㅢ’는 ‘ㅣ’로 소리 나더라도 ‘ui’로 적는다.
|
162
|
-
:except => %w[rule_5_3]
|
163
|
-
dash = rdata[0]["ㅇ"]
|
164
|
-
romanization = ""
|
165
|
-
|
166
|
-
romanize_chunk = lambda do | chunk |
|
167
|
-
convert(chunk).each do | kc |
|
168
|
-
kc.to_a.each_with_index do | comp, idx |
|
169
|
-
next if comp.nil?
|
170
|
-
comp = rdata[idx][comp] || comp
|
171
|
-
comp = comp[1..-1] if comp[0, 1] == dash &&
|
172
|
-
(romanization.empty? || romanization[-1, 1] =~ /\s/)
|
173
|
-
romanization += comp
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
|
-
return post_subs.keys.inject(romanization) { | output, pattern |
|
178
|
-
output.gsub(pattern, post_subs[pattern])
|
179
|
-
}
|
180
|
-
end
|
181
|
-
|
182
|
-
k_chunk = ""
|
183
|
-
str.each_char do | c |
|
184
|
-
if korean_char? c
|
185
|
-
k_chunk += c
|
186
|
-
else
|
187
|
-
unless k_chunk.empty?
|
188
|
-
romanization = romanize_chunk.call k_chunk
|
189
|
-
k_chunk = ""
|
190
|
-
end
|
191
|
-
romanization += c
|
192
|
-
end
|
193
|
-
end
|
194
|
-
romanization = romanize_chunk.call k_chunk unless k_chunk.empty?
|
195
|
-
romanization
|
196
|
-
end
|
197
|
-
|
198
|
-
private
|
199
|
-
def str_length str
|
200
|
-
str.length
|
201
|
-
end
|
202
|
-
|
203
|
-
def read_number_sub num, next_char
|
204
|
-
nconfig = config['number']
|
205
|
-
|
206
|
-
if num == '0'
|
207
|
-
return nconfig['digits'].first
|
208
|
-
end
|
209
|
-
|
210
|
-
num = num.gsub(',', '')
|
211
|
-
next_char = next_char.to_s
|
212
|
-
is_float = num.match(/[\.e]/) != nil
|
213
|
-
|
214
|
-
# Alternative notation for integers with proper suffix
|
215
|
-
alt = false
|
216
|
-
if is_float == false &&
|
217
|
-
nconfig['alt notation']['when suffix'].keys.include?(next_char.strip)
|
218
|
-
max = nconfig['alt notation']['when suffix'][next_char.strip]['max']
|
219
|
-
|
220
|
-
if max.nil? || num.to_i <= max
|
221
|
-
alt = true
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
# Sign
|
226
|
-
sign = []
|
227
|
-
negative = false
|
228
|
-
if num =~ /^-/
|
229
|
-
num = num.sub(/^-\s*/, '')
|
230
|
-
sign << nconfig['negative']
|
231
|
-
negative = true
|
232
|
-
elsif num =~ /^\+/
|
233
|
-
num = num.sub(/^\+\s*/, '')
|
234
|
-
sign << nconfig['positive']
|
235
|
-
end
|
236
|
-
|
237
|
-
if is_float
|
238
|
-
below = nconfig['decimal point']
|
239
|
-
below = nconfig['digits'][0] + below if num.to_f < 1
|
240
|
-
|
241
|
-
if md = num.match(/(.*)e(.*)/)
|
242
|
-
dp = md[1].index('.')
|
243
|
-
num = md[1].tr '.', ''
|
244
|
-
exp = md[2].to_i
|
245
|
-
|
246
|
-
dp += exp
|
247
|
-
if dp > num.length
|
248
|
-
num = num.ljust(dp, '0')
|
249
|
-
num = num.sub(/^0+([1-9])/, "\\1")
|
250
|
-
|
251
|
-
below = ""
|
252
|
-
elsif dp < 0
|
253
|
-
num = '0.' + '0' * (-dp) + num
|
254
|
-
else
|
255
|
-
num[dp, 1] = '.' + num[dp, 1]
|
256
|
-
end
|
257
|
-
end
|
258
|
-
num.sub(/.*\./, '').each_char do | char |
|
259
|
-
below += nconfig['digits'][char.to_i]
|
260
|
-
end if num.include? '.'
|
261
|
-
num = num.sub(/\..*/, '')
|
262
|
-
else
|
263
|
-
below = ""
|
264
|
-
end
|
265
|
-
|
266
|
-
tokens = []
|
267
|
-
unit_idx = -1
|
268
|
-
num = num.to_i
|
269
|
-
while num > 0
|
270
|
-
v = num % 10000
|
271
|
-
|
272
|
-
unit_idx += 1
|
273
|
-
if v > 0
|
274
|
-
if alt == false || unit_idx >= 1
|
275
|
-
str = ""
|
276
|
-
# Cannot use hash as they're unordered in 1.8
|
277
|
-
[[1000, '천'],
|
278
|
-
[100, '백'],
|
279
|
-
[10, '십']].each do | arr |
|
280
|
-
u, sub_unit = arr
|
281
|
-
str += (nconfig['digits'][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
|
282
|
-
v %= u
|
283
|
-
end
|
284
|
-
str += nconfig['digits'][v] if v > 0
|
285
|
-
|
286
|
-
tokens << str.sub(/ $/, '') + nconfig['units'][unit_idx]
|
287
|
-
else
|
288
|
-
str = ""
|
289
|
-
tenfolds = nconfig['alt notation']['tenfolds']
|
290
|
-
digits = nconfig['alt notation']['digits']
|
291
|
-
alt_post_subs = nconfig['alt notation']['post substitution']
|
292
|
-
|
293
|
-
# Likewise.
|
294
|
-
[[1000, '천'],
|
295
|
-
[100, '백']].each do | u, sub_unit |
|
296
|
-
str += (nconfig['digits'][v/u] if v/u != 1).to_s + sub_unit + ' ' if v / u > 0
|
297
|
-
v %= u
|
298
|
-
end
|
299
|
-
|
300
|
-
str += tenfolds[(v / 10) - 1] if v / 10 > 0
|
301
|
-
v %= 10
|
302
|
-
str += digits[v] if v > 0
|
303
|
-
|
304
|
-
alt_post_subs.each do | k, v |
|
305
|
-
str.gsub!(k, v)
|
306
|
-
end if alt
|
307
|
-
tokens << str.sub(/ $/, '') + nconfig['units'][unit_idx]
|
308
|
-
end
|
309
|
-
end
|
310
|
-
num /= 10000
|
311
|
-
end
|
312
|
-
|
313
|
-
tokens += sign unless sign.empty?
|
314
|
-
ret = tokens.reverse.join(' ') + below + next_char
|
315
|
-
nconfig['post substitution'].each do | k, v |
|
316
|
-
ret.gsub!(k, v)
|
317
|
-
end
|
318
|
-
ret
|
319
|
-
end
|
320
|
-
end#Korean
|
321
|
-
end#Gimchi
|
322
|
-
|
323
|
-
|