gimchi 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
@@ -0,0 +1,42 @@
1
+ === 0.2.0 / 2013/03/28
2
+ * Completely backward-incompatable release :p
3
+
4
+ === 0.1.9 / 2012/02/20
5
+ * Bug fix: Failing test on 1.8
6
+ * `Gimchi::Korean#dissect` renamed to `Gimchi::Korean#convert`
7
+ * `Gimchi::korean#dissect` completely dissects the given String into an Array of Korean character components in String
8
+
9
+ === 0.1.8 / 2011/12/02
10
+ * Added `Gimchi::Korean#kchar`
11
+
12
+ === 0.1.7 / 2011/10/17
13
+ * Bug fix: Fixed reading 0
14
+ * Bug fix: Failing test on 1.8
15
+ * Bug fix: Fixed 'incompatible encoding regexp' problem on 1.9
16
+
17
+ === 0.1.6 / 2011/04/13
18
+ * More post substitution for read_number
19
+
20
+ === 0.1.5 / 2011/04/12
21
+ * Removed possible loss of precision during read_number
22
+ * read_number extended to read exponential notation properly.
23
+
24
+ === 0.1.4 / 2011/04/08
25
+ * Minor improvement in romanization output. -y => y
26
+
27
+ === 0.1.3 / 2011/04/08
28
+ * Now compatible with Ruby 1.8
29
+
30
+ === 0.1.2 / 2011/04/08
31
+ * Bug fix in pronouncer.rb. It was undetectable on Ruby 1.9, but on 1.8
32
+
33
+ === 0.1.1 / 2011/04/07
34
+ * Removed Gimchi::Korean::Char#org
35
+ * Code refactoring
36
+ * `Gimchi::Korean#romanize` no more capitalizes the output string
37
+ * `Gimchi::Korean#romanize` does not affect non-Korean characters
38
+ * yard documentation
39
+
40
+ === 0.1.0 / 2011/04/05
41
+ * Prototype release.
42
+
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
@@ -1,4 +1,6 @@
1
- Copyright (c) 2011 Junegunn Choi
1
+ Copyright (c) 2013 Junegunn Choi
2
+
3
+ MIT License
2
4
 
3
5
  Permission is hereby granted, free of charge, to any person obtaining
4
6
  a copy of this software and associated documentation files (the
@@ -18,97 +18,98 @@ gem install gimchi
18
18
 
19
19
  ## 사용법
20
20
 
21
- ### Gimchi::Korean 인스턴스의 생성
21
+ ### 초/중/종성 분해/합체
22
+
22
23
  ```ruby
23
- require 'gimchi'
24
+ chosung, jungsung, jongsung = Gimchi.decompose "한"
24
25
 
25
- ko = Gimchi::Korean.new
26
+ Gimchi.compose chosung, jungsung, jongsung # 한
27
+ Gimchi.compose chosung, "ㅗ", jongsung # 혼
26
28
  ```
27
29
 
28
30
  ### 한글 캐릭터 여부 판단
29
31
  ```ruby
30
- ko.korean_char? 'ㄱ' # true
31
- ko.complete_korean_char? 'ㄱ' # false
32
+ Gimchi.korean_char? 'ㄱ' # true
33
+ Gimchi.complete_korean_char? 'ㄱ' # false
32
34
 
33
- ko.korean_char? 'ㅏ' # true
34
- ko.complete_korean_char? 'ㅏ' # false
35
+ Gimchi.korean_char? 'ㅏ' # true
36
+ Gimchi.complete_korean_char? 'ㅏ' # false
35
37
 
36
- ko.korean_char? '가' # true
37
- ko.complete_korean_char? '가' # true
38
+ Gimchi.korean_char? '가' # true
39
+ Gimchi.complete_korean_char? '가' # true
38
40
 
39
41
  # Alias of korean_char?
40
- ko.kchar? '가' # true
42
+ Gimchi.kchar? '가' # true
43
+
44
+ Gimchi.chosung? 'ㄱ' # true
45
+ Gimchi.jungsung? 'ㄱ' # false
46
+ Gimchi.jongsung? 'ㄱ' # true
47
+
48
+ Gimchi.chosung? 'ㅏ' # false
49
+ Gimchi.jungsung? 'ㅏ' # true
50
+ Gimchi.jongsung? 'ㅏ' # false
51
+
52
+ Gimchi.chosung? 'ㄺ' # false
53
+ Gimchi.jungsung? 'ㄺ' # false
54
+ Gimchi.jongsung? 'ㄺ' # true
41
55
  ```
42
56
 
43
- ### Gimchi::Korean::Char
57
+ ### Gimchi::Char 의 사용
58
+
44
59
  ```ruby
45
- kc = ko.kchar "한"
46
- kc.class # Gimchi::Korean::Char
60
+ kc = Gimchi::Char("한")
61
+ kc.class # Gimchi::Char
62
+
63
+ kc.chosung # "ㅎ"
64
+ kc.jungsung # "ㅏ"
65
+ kc.jongsung # "ㄴ"
66
+ kc.to_a # ["ㅎ", "ㅏ", "ㄴ"]
67
+ kc.to_s # "한"
47
68
 
48
- kc.chosung # "ㅎ"
49
- kc.jungsung # "ㅏ"
50
- kc.jongsung # "ㄴ"
51
- kc.to_a # ["ㅎ", "ㅏ", "ㄴ"]
52
- kc.to_s # "한"
69
+ kc.complete? # true
70
+ kc.partial? # false
53
71
 
54
- kc.complete? # true
55
- kc.partial? # false
56
- ko.kchar("ㅏ").partial? # true
72
+ Gimchi::Char("ㅏ").partial? # true
57
73
 
58
74
  # Modifying its elements
59
75
  kc.chosung = 'ㄷ'
60
76
  kc.jongsung = 'ㄹ'
61
- kc.to_s # "달"
62
- kc.complete? # true
63
- kc.partial? # false
77
+ kc.to_s # "달"
78
+ kc.complete? # true
79
+ kc.partial? # false
64
80
 
65
81
  kc.chosung = nil
66
82
  kc.jongsung = nil
67
- kc.complete? # false
68
- kc.partial? # true
69
-
70
- # Alias of kchar
71
- kc = ko.korean_char "한"
72
-
73
- # Array of Gimchi::Korean::Char's
74
- arr = ko.convert '이것은 한글입니다.'
75
- # [이, 것, 은, " ", 한, 글, 입, 니, 다, "."]
76
-
77
- arr[0].class # Gimchi::Korean::Char
78
-
79
- # Dissects given String
80
- arr = ko.dissect '이것은 한글입니다.'
81
- # ["ㅇ", "ㅣ", "ㄱ", "ㅓ", "ㅅ", "ㅇ", "ㅡ", "ㄴ", " ",
82
- # "H", "a", "n", "g", "u", "l", " ", "ㅇ", "ㅣ", "ㅂ",
83
- # "ㄴ", "ㅣ", "ㄷ", "ㅏ", "."]
83
+ kc.complete? # false
84
+ kc.partial? # true
84
85
  ```
85
86
 
86
87
  ### 숫자 읽기
87
88
  ```ruby
88
- ko.read_number(1999) # "천 구백 구십 구"
89
- ko.read_number(- 100.123) # "마이너스 백점일이삼"
90
- ko.read_number("153,191,100,678.3214")
89
+ Gimchi.read_number(1999) # "천 구백 구십 구"
90
+ Gimchi.read_number(- 100.123) # "마이너스 백점일이삼"
91
+ Gimchi.read_number("153,191,100,678.3214")
91
92
  # "천 오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사"
92
93
 
93
94
  # 나이, 시간 ( -살, -시 )
94
- ko.read_number("20살") # "스무살"
95
- ko.read_number("13 살") # "열세 살"
96
- ko.read_number("7시 30분") # "일곱시 삼십분"
95
+ Gimchi.read_number("20살") # "스무살"
96
+ Gimchi.read_number("13 살") # "열세 살"
97
+ Gimchi.read_number("7시 30분") # "일곱시 삼십분"
97
98
  ```
98
99
 
99
100
  ### 표준 발음 (부분 구현)
100
101
  ```ruby
101
102
  str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고"
102
- ko.pronounce str
103
+ Gimchi.pronounce str
103
104
  # "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 모라너코"
104
105
 
105
- ko.pronounce str, :slur => true
106
+ Gimchi.pronounce str, :slur => true
106
107
  # "돼써 돼써 이제 그런 가르치믄 돼써 매이 라치 밀곱 씨 삼십 뿐까지 우릴 조그만 교실로 모라너코"
107
108
 
108
- ko.pronounce str, :pronounce_each_char => true
109
+ Gimchi.pronounce str, :each_char => true
109
110
  # "됃어 됃어 이제 그런 가르침은 됃어 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 몰아너고"
110
111
 
111
- ko.pronounce str, :number => false
112
+ Gimchi.pronounce str, :number => false
112
113
  # "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 7 시 30 분까지 우릴 조그만 교실로 모라너코"
113
114
  ```
114
115
 
@@ -116,14 +117,17 @@ ko.pronounce str, :number => false
116
117
  ```ruby
117
118
  str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고"
118
119
 
119
- ko.romanize str
120
+ Gimchi.romanize str
120
121
  # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo moraneoko"
121
- ko.romanize str, :slur => true
122
+
123
+ Gimchi.romanize str, :slur => true
122
124
  # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-i rachi milgop ssi samsip ppunkkaji uril jogeuman gyosillo moraneoko"
123
- ko.romanize str, :as_pronounced => false
124
- # "Dwaet-eo dwaet-eo ije geureon gareuchim-eun dwaet-eo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo mol-aneogo"
125
- ko.romanize str, :number => false
125
+
126
+ Gimchi.romanize str, :number => false
126
127
  # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim 7 si 30 bunkkaji uril jogeuman gyosillo moraneoko"
128
+
129
+ Gimchi.romanize str, :as_pronounced => false
130
+ # "Dwaet-eo dwaet-eo ije geureon gareuchim-eun dwaet-eo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo mol-aneogo"
127
131
  ```
128
132
 
129
133
  ## 구현의 한계
@@ -136,18 +140,16 @@ ko.romanize str, :number => false
136
140
  위해 Ad-hoc한 patch 등이 코드에 상당량 포함된 상태인데 이를 정제하고 체계화하는
137
141
  노력이 필요합니다.
138
142
 
139
- ## Contributing to gimchi
140
-
141
- * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
142
- * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
143
- * Fork the project
144
- * Start a feature/bugfix branch
145
- * Commit and push until you are happy with your contribution
146
- * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
147
- * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
143
+ ## Contributing
144
+
145
+ 1. Fork it
146
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
147
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
148
+ 4. Push to the branch (`git push origin my-new-feature`)
149
+ 5. Create new Pull Request
148
150
 
149
151
  ## Copyright
150
152
 
151
- Copyright (c) 2011 Junegunn Choi. See LICENSE.txt for
153
+ Copyright (c) 2013 Junegunn Choi. See LICENSE.txt for
152
154
  further details.
153
155
 
@@ -0,0 +1,162 @@
1
+ # gimchi
2
+
3
+ Gimchi is a simple Ruby gem for handling Korean characters.
4
+
5
+ Features:
6
+ - Decompose a Korean character into its 3 components, namely chosung, jungsung and optional jongsung
7
+ - Compose elements back into the Korean character
8
+ - Read numbers in Korean
9
+ - Pronounce Korean characters
10
+ - Romanize Korean characters
11
+
12
+ Gimchi (partially) implements the following rules dictated by
13
+ The National Institute of The Korean Language (http://www.korean.go.kr)
14
+ - Korean Standard Pronunciation
15
+ - Korean Romanization
16
+
17
+ ## Installation
18
+
19
+ ```
20
+ gem install gimchi
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ### Composing and decomposing Korean character
26
+
27
+ ```ruby
28
+ chosung, jungsung, jongsung = Gimchi.decompose "한"
29
+
30
+ Gimchi.compose chosung, jungsung, jongsung # 한
31
+ Gimchi.compose chosung, "ㅗ", jongsung # 혼
32
+ ```
33
+
34
+ ### Inspecting Korean characters
35
+ ```ruby
36
+ Gimchi.korean_char? 'ㄱ' # true
37
+ Gimchi.complete_korean_char? 'ㄱ' # false
38
+
39
+ Gimchi.korean_char? 'ㅏ' # true
40
+ Gimchi.complete_korean_char? 'ㅏ' # false
41
+
42
+ Gimchi.korean_char? '가' # true
43
+ Gimchi.complete_korean_char? '가' # true
44
+
45
+ # Alias of korean_char?
46
+ Gimchi.kchar? '가' # true
47
+
48
+ Gimchi.chosung? 'ㄱ' # true
49
+ Gimchi.jungsung? 'ㄱ' # false
50
+ Gimchi.jongsung? 'ㄱ' # true
51
+
52
+ Gimchi.chosung? 'ㅏ' # false
53
+ Gimchi.jungsung? 'ㅏ' # true
54
+ Gimchi.jongsung? 'ㅏ' # false
55
+
56
+ Gimchi.chosung? 'ㄺ' # false
57
+ Gimchi.jungsung? 'ㄺ' # false
58
+ Gimchi.jongsung? 'ㄺ' # true
59
+ ```
60
+
61
+ ### Using Gimchi::Char
62
+
63
+ ```ruby
64
+ kc = Gimchi::Char("한")
65
+ kc.class # Gimchi::Char
66
+
67
+ kc.chosung # "ㅎ"
68
+ kc.jungsung # "ㅏ"
69
+ kc.jongsung # "ㄴ"
70
+ kc.to_a # ["ㅎ", "ㅏ", "ㄴ"]
71
+ kc.to_s # "한"
72
+
73
+ kc.complete? # true
74
+ kc.partial? # false
75
+
76
+ Gimchi::Char("ㅏ").partial? # true
77
+
78
+ # Modifying its elements
79
+ kc.chosung = 'ㄷ'
80
+ kc.jongsung = 'ㄹ'
81
+ kc.to_s # "달"
82
+ kc.complete? # true
83
+ kc.partial? # false
84
+
85
+ kc.chosung = nil
86
+ kc.jongsung = nil
87
+ kc.complete? # false
88
+ kc.partial? # true
89
+ ```
90
+
91
+ ### Reading numbers in Korean
92
+ ```ruby
93
+ Gimchi.read_number(1999) # "천 구백 구십 구"
94
+ Gimchi.read_number(- 100.123) # "마이너스 백점일이삼"
95
+ Gimchi.read_number("153,191,100,678.3214")
96
+ # "천 오백 삼십 일억 구천 백 십만 육백 칠십 팔점삼이일사"
97
+
98
+ # Age, Time ( -살, -시 )
99
+ Gimchi.read_number("20살") # "스무살"
100
+ Gimchi.read_number("13 살") # "열세 살"
101
+ Gimchi.read_number("7시 30분") # "일곱시 삼십분"
102
+ ```
103
+
104
+ ### Standard pronunciation (partially implemented)
105
+ ```ruby
106
+ str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고"
107
+ Gimchi.pronounce str
108
+ # "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 모라너코"
109
+
110
+ Gimchi.pronounce str, :slur => true
111
+ # "돼써 돼써 이제 그런 가르치믄 돼써 매이 라치 밀곱 씨 삼십 뿐까지 우릴 조그만 교실로 모라너코"
112
+
113
+ Gimchi.pronounce str, :number => false
114
+ # "돼써 돼써 이제 그런 가르치믄 돼써 매일 아침 7 시 30 분까지 우릴 조그만 교실로 모라너코"
115
+
116
+ Gimchi.pronounce str, :each_char => true
117
+ # "됃어 됃어 이제 그런 가르침은 됃어 매일 아침 일곱 시 삼십 분까지 우릴 조그만 교실로 몰아너고"
118
+ ```
119
+
120
+ ### Romanization (partially implemented)
121
+ ```ruby
122
+ str = "됐어 됐어 이제 그런 가르침은 됐어 매일 아침 7 시 30 분까지 우릴 조그만 교실로 몰아넣고"
123
+
124
+ Gimchi.romanize str
125
+ # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo moraneoko"
126
+
127
+ Gimchi.romanize str, :slur => true
128
+ # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-i rachi milgop ssi samsip ppunkkaji uril jogeuman gyosillo moraneoko"
129
+
130
+ Gimchi.romanize str, :number => false
131
+ # "Dwaesseo dwaesseo ije geureon gareuchimeun dwaesseo mae-il achim 7 si 30 bunkkaji uril jogeuman gyosillo moraneoko"
132
+
133
+ Gimchi.romanize str, :as_pronounced => false
134
+ # "Dwaet-eo dwaet-eo ije geureon gareuchim-eun dwaet-eo mae-il achim ilgop si samsip bunkkaji uril jogeuman gyosillo mol-aneogo"
135
+ ```
136
+
137
+ ## Limitation of the implementation
138
+
139
+ Unfortunately in order to implement the complete specification of Korean
140
+ pronunciation and romanization, we need NLP, huge Korean dictionaries and even
141
+ semantic analysis of the given string. And even with all those complex
142
+ processing, we cannot guarantee 100% accuracy of the output. So yes, that is
143
+ definitely not what this gem tries to achieve. Gimchi tries to achieve "some"
144
+ level of accuracy with relatively simple code.
145
+
146
+ Currently, Gimchi code contains a lot of ad-hoc (possibly invalid) patches
147
+ that try to improve the quality of the output, which should better be
148
+ refactored anytime soon.
149
+
150
+ ## Contributing
151
+
152
+ 1. Fork it
153
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
154
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
155
+ 4. Push to the branch (`git push origin my-new-feature`)
156
+ 5. Create new Pull Request
157
+
158
+ ## Copyright
159
+
160
+ Copyright (c) 2013 Junegunn Choi. See LICENSE.txt for
161
+ further details.
162
+
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+ Rake::TestTask.new(:test) do |test|
4
+ test.libs << 'lib' << 'test'
5
+ test.pattern = 'test/**/test_*.rb'
6
+ test.verbose = true
7
+ end