gimchi 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +134 -0
- data/config/default.yml +5 -10
- data/lib/gimchi/char.rb +104 -104
- data/lib/gimchi/korean.rb +291 -277
- data/lib/gimchi/patch_1.8.rb +18 -18
- data/lib/gimchi/pronouncer.rb +488 -488
- data/test/test_gimchi.rb +4 -0
- metadata +14 -14
- data/README.rdoc +0 -120
data/test/test_gimchi.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
$LOAD_PATH.unshift File.dirname(__FILE__)
|
3
4
|
require 'helper'
|
4
5
|
|
5
6
|
class TestGimchi < Test::Unit::TestCase
|
@@ -75,6 +76,9 @@ class TestGimchi < Test::Unit::TestCase
|
|
75
76
|
|
76
77
|
def test_read_number
|
77
78
|
ko = Gimchi::Korean.new
|
79
|
+
assert_equal "영", ko.read_number(0)
|
80
|
+
assert_equal "일", ko.read_number(1)
|
81
|
+
assert_equal "구", ko.read_number(9)
|
78
82
|
assert_equal "천 구백 구십 구", ko.read_number(1999)
|
79
83
|
assert_equal "마이너스 백점일이삼", ko.read_number(- 100.123)
|
80
84
|
assert_equal "오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사오육칠",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gimchi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-10-17 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
|
-
requirement: &
|
16
|
+
requirement: &2151822000 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.0.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2151822000
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: jeweler
|
27
|
-
requirement: &
|
27
|
+
requirement: &2151820760 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.5.2
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2151820760
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rcov
|
38
|
-
requirement: &
|
38
|
+
requirement: &2151819420 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2151819420
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: ansi
|
49
|
-
requirement: &
|
49
|
+
requirement: &2151818040 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: 1.2.2
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2151818040
|
58
58
|
description: Gimchi knows how to pronounce Korean strings and how to write them in
|
59
59
|
roman alphabet.
|
60
60
|
email: junegunn.c@gmail.com
|
@@ -63,7 +63,7 @@ extensions: []
|
|
63
63
|
extra_rdoc_files:
|
64
64
|
- LICENSE.txt
|
65
65
|
- README.ko.rdoc
|
66
|
-
- README.
|
66
|
+
- README.markdown
|
67
67
|
files:
|
68
68
|
- config/default.yml
|
69
69
|
- lib/gimchi.rb
|
@@ -73,7 +73,7 @@ files:
|
|
73
73
|
- lib/gimchi/pronouncer.rb
|
74
74
|
- LICENSE.txt
|
75
75
|
- README.ko.rdoc
|
76
|
-
- README.
|
76
|
+
- README.markdown
|
77
77
|
- test/helper.rb
|
78
78
|
- test/pronunciation.yml
|
79
79
|
- test/romanization.yml
|
@@ -93,7 +93,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
93
93
|
version: '0'
|
94
94
|
segments:
|
95
95
|
- 0
|
96
|
-
hash:
|
96
|
+
hash: -2449462105389622479
|
97
97
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
98
|
none: false
|
99
99
|
requirements:
|
@@ -102,7 +102,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
102
102
|
version: '0'
|
103
103
|
requirements: []
|
104
104
|
rubyforge_project:
|
105
|
-
rubygems_version: 1.
|
105
|
+
rubygems_version: 1.8.6
|
106
106
|
signing_key:
|
107
107
|
specification_version: 3
|
108
108
|
summary: Gimchi reads Korean.
|
data/README.rdoc
DELETED
@@ -1,120 +0,0 @@
|
|
1
|
-
= gimchi
|
2
|
-
|
3
|
-
Gimchi is a simple Ruby gem which knows how to handle Korean strings. It knows
|
4
|
-
how to dissect Korean characters into its 3 components, namely chosung,
|
5
|
-
jungsung and optional jongsung. It knows how Korean sentences are pronounced
|
6
|
-
and how they're written in roman alphabet.
|
7
|
-
|
8
|
-
Gimchi (only partially) implements the following rules dictated by
|
9
|
-
The National Institute of The Korean Language (http://www.korean.go.kr)
|
10
|
-
* Korean Standard Pronunciation
|
11
|
-
* Korean Romanization
|
12
|
-
|
13
|
-
== Installation
|
14
|
-
gem install gimchi
|
15
|
-
|
16
|
-
== Usage
|
17
|
-
|
18
|
-
=== Creating Gimchi::Korean instance
|
19
|
-
require 'gimchi'
|
20
|
-
|
21
|
-
ko = Gimchi::Korean.new
|
22
|
-
|
23
|
-
=== Checks if the given character is in Korean alphabet
|
24
|
-
ko.korean_char? 'ㄱ' # true
|
25
|
-
ko.complete_korean_char? 'ㄱ' # false
|
26
|
-
|
27
|
-
ko.korean_char? 'ㅏ' # true
|
28
|
-
ko.complete_korean_char? 'ㅏ' # false
|
29
|
-
|
30
|
-
ko.korean_char? '가' # true
|
31
|
-
ko.complete_korean_char? '가' # true
|
32
|
-
|
33
|
-
=== Usage of Gimchi::Korean::Char
|
34
|
-
arr = ko.dissect '이것은 한글입니다.'
|
35
|
-
# [이, 것, 은, " ", 한, 글, 입, 니, 다, "."]
|
36
|
-
|
37
|
-
arr[4].class # Gimchi::Korean::Char
|
38
|
-
|
39
|
-
arr[4].chosung # "ㅎ"
|
40
|
-
arr[4].jungsung # "ㅏ"
|
41
|
-
arr[4].jongsung # "ㄴ"
|
42
|
-
arr[4].to_a # ["ㅎ", "ㅏ", "ㄴ"]
|
43
|
-
arr[4].to_s # "한"
|
44
|
-
|
45
|
-
arr[4].chosung = 'ㄷ'
|
46
|
-
arr[4].jongsung = 'ㄹ'
|
47
|
-
arr[4].to_s # "달"
|
48
|
-
arr[4].complete? # true
|
49
|
-
arr[4].partial? # false
|
50
|
-
|
51
|
-
arr[4].chosung = nil
|
52
|
-
arr[4].jongsung = nil
|
53
|
-
arr[4].complete? # false
|
54
|
-
arr[4].partial? # true
|
55
|
-
|
56
|
-
=== Reading numbers in Korean
|
57
|
-
ko.read_number(1999) # "천 구백 구십 구"
|
58
|
-
ko.read_number(- 100.123) # "마이너스 백점일이삼"
|
59
|
-
ko.read_number("153,191,100,678.3214")
|
60
|
-
# "천 오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사"
|
61
|
-
|
62
|
-
# Age, Time ( -살, -시 )
|
63
|
-
ko.read_number("20살") # "스무살"
|
64
|
-
ko.read_number("13 살") # "열세 살"
|
65
|
-
ko.read_number("7시 30분") # "일곱시 삼십분"
|
66
|
-
|
67
|
-
=== Standard pronunciation (partially implemented)
|
68
|
-
str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고"
|
69
|
-
ko.pronounce str
|
70
|
-
# "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 모라너코"
|
71
|
-
|
72
|
-
ko.pronounce str, :slur => true
|
73
|
-
# "돼써 돼써 이제 그런 가르치믄 돼써 매이 라치 밀곱 씨 삼십 뿐까지 우릴 조그만 교실로 모라너코"
|
74
|
-
|
75
|
-
ko.pronounce str, :pronounce_each_char => true
|
76
|
-
# "됃어 됃어 이제 그런 가르침은 됃어 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 몰아너고"
|
77
|
-
|
78
|
-
ko.pronounce str, :number => false
|
79
|
-
# "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 7 시 30 분까지 우릴 조그만 교실로 모라너코"
|
80
|
-
|
81
|
-
=== Romanization (partially implemented)
|
82
|
-
str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고"
|
83
|
-
|
84
|
-
ko.romanize str
|
85
|
-
# "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo moraneoko"
|
86
|
-
ko.romanize str, :slur => true
|
87
|
-
# "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-i rachi milgop ssi samsip ppunkkaji uril jogeuman gyosillo moraneoko"
|
88
|
-
ko.romanize str, :as_pronounced => false
|
89
|
-
# "Dwaet-eo dwaet-eo ije geureon gareuchim-eun dwaet-eo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo mol-aneogo"
|
90
|
-
ko.romanize str, :number => false
|
91
|
-
# "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim 7 si 30 bunkkaji uril jogeuman gyosillo moraneoko"
|
92
|
-
|
93
|
-
== Limitation of the implementation
|
94
|
-
|
95
|
-
Unfortunately in order to implement the complete specification of Korean
|
96
|
-
pronunciation and romanization, we need NLP, huge Korean dictionaries and even
|
97
|
-
semantic analysis of the given string. And even with all those complex
|
98
|
-
processing, we cannot guarantee 100% accuracy of the output. So yes, that is
|
99
|
-
definitely not what this gem tries to achieve. Gimchi tries to achieve "some"
|
100
|
-
level of accuracy with relatively simple code.
|
101
|
-
|
102
|
-
Currently, Gimchi code contains a lot of ad-hoc (possibly invalid) patches
|
103
|
-
that try to improve the quality of the output, which should better be
|
104
|
-
refactored anytime soon.
|
105
|
-
|
106
|
-
== Contributing to gimchi
|
107
|
-
|
108
|
-
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
109
|
-
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
110
|
-
* Fork the project
|
111
|
-
* Start a feature/bugfix branch
|
112
|
-
* Commit and push until you are happy with your contribution
|
113
|
-
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
114
|
-
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
115
|
-
|
116
|
-
== Copyright
|
117
|
-
|
118
|
-
Copyright (c) 2011 Junegunn Choi. See LICENSE.txt for
|
119
|
-
further details.
|
120
|
-
|