korean-string 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/lib/korean-string.rb +112 -0
- data/test/helper.rb +9 -0
- data/test/test_korean-string.rb +38 -0
- metadata +77 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Ben Humphreys
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
= korean-string
|
2
|
+
|
3
|
+
Split Korean characters to individual compontents, join components together to create characters.
|
4
|
+
|
5
|
+
You could use it to make some weird conjugation rules. Go wild.
|
6
|
+
|
7
|
+
== Methods
|
8
|
+
|
9
|
+
=== String.split_ko
|
10
|
+
|
11
|
+
Return an array of arrays of Korean character components
|
12
|
+
|
13
|
+
require 'korean-string'
|
14
|
+
'읽어싶'.split_ko
|
15
|
+
=> [["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]]
|
16
|
+
|
17
|
+
=== Array.join_ko
|
18
|
+
|
19
|
+
Accepts an array of character pieces
|
20
|
+
|
21
|
+
require 'korean-string'
|
22
|
+
[["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]].join_ko
|
23
|
+
=> '읽어싶'
|
24
|
+
|
25
|
+
|
26
|
+
== Todo
|
27
|
+
|
28
|
+
Come up with other useful methods?
|
29
|
+
|
30
|
+
|
31
|
+
== Copyright
|
32
|
+
|
33
|
+
Copyright (c) 2010 Ben Humphreys. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "korean-string"
|
8
|
+
gem.summary = %Q{Korean string join and split}
|
9
|
+
gem.description = %Q{Split Korean characters to individual compontents, join components together to create characters}
|
10
|
+
gem.email = "benhumphreys@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/bhumphreys/korean-string"
|
12
|
+
gem.authors = ["Ben Humphreys"]
|
13
|
+
#gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/test_*.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "korean-string #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# Originally transliterate-hacked from Perl from
|
2
|
+
# http://blog.naver.com/PostView.nhn?blogId=mokomoji&logNo=130013133481
|
3
|
+
#
|
4
|
+
# For the theory of why this works, check out the W3C spec on Korean encoding
|
5
|
+
# http://www.w3c.or.kr/i18n/hangul-i18n/ko-code.html
|
6
|
+
# (Thanks to @ntrolls for this)
|
7
|
+
|
8
|
+
$KCODE = 'UTF8'
|
9
|
+
|
10
|
+
# ㄱ ㄲ ㄴ ㄷ ㄸ ㄹ ㅁ ㅂ
|
11
|
+
CHOSUNG = [0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
|
12
|
+
# ㅃ ㅅ ㅆ ㅇ ㅈ ㅉ ㅊ ㅋ
|
13
|
+
0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
|
14
|
+
# ㅌ ㅍ ㅎ
|
15
|
+
0x314c, 0x314d, 0x314e]
|
16
|
+
|
17
|
+
# ㅏ ㅐ ㅑ ㅒ ㅓ ㅔ ㅕ ㅖ
|
18
|
+
JWUNGSUNG = [0x314f, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156,
|
19
|
+
# ㅗ ㅘ ㅙ ㅚ ㅛ ㅜ ㅝ ㅞ
|
20
|
+
0x3157, 0x3158, 0x3159, 0x315a, 0x315b, 0x315c, 0x315d, 0x315e,
|
21
|
+
# ㅟ ㅠ ㅡ ㅢ ㅣ
|
22
|
+
0x315f, 0x3160, 0x3161, 0x3162, 0x3163]
|
23
|
+
|
24
|
+
# ㄱ ㄲ ㄳ ㄴ ㄵ ㄶ ㄷ ㄹ
|
25
|
+
JONGSUNG = [ 0, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137,
|
26
|
+
# ㄺ ㄻ ㄼ ㄽ ㄾ ㄿ ㅀ ㅁ
|
27
|
+
0x3139, 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f, 0x3140,
|
28
|
+
# ㅂ ㅄ ㅅ ㅆ ㅇ ㅈ ㅊ ㅋ
|
29
|
+
0x3141, 0x3142, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x314a,
|
30
|
+
# ㅌ ㅍ ㅎ ?whoops
|
31
|
+
0x314b, 0x314c, 0x314d, 0x314e ]
|
32
|
+
|
33
|
+
|
34
|
+
# Not wrapping this in a module... not sure if that's a terrible idea
|
35
|
+
|
36
|
+
class String
|
37
|
+
def split_ko
|
38
|
+
|
39
|
+
raw_chars = self.unpack("U*")
|
40
|
+
|
41
|
+
final_result = Array.new
|
42
|
+
|
43
|
+
raw_chars.each do |char|
|
44
|
+
result = Array.new
|
45
|
+
if (char >= 0xAC00 && char <= 0xD7A3)
|
46
|
+
# Move it down in the range
|
47
|
+
c = char - 0xAC00;
|
48
|
+
|
49
|
+
# Here be dragons
|
50
|
+
a = c.to_f / (21 * 28)
|
51
|
+
c = c % (21 * 28)
|
52
|
+
b = c.to_f / 28
|
53
|
+
c = c % 28
|
54
|
+
|
55
|
+
a = a.to_i
|
56
|
+
b = b.to_i
|
57
|
+
c = c.to_i
|
58
|
+
|
59
|
+
result.push( CHOSUNG[a], JWUNGSUNG[b] )
|
60
|
+
|
61
|
+
if c != 0
|
62
|
+
result.push( JONGSUNG[c] )
|
63
|
+
end
|
64
|
+
else
|
65
|
+
result.push(char)
|
66
|
+
end
|
67
|
+
|
68
|
+
final_result.push(result.pack("U*").split(''))
|
69
|
+
end
|
70
|
+
|
71
|
+
return final_result
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
class Array
|
78
|
+
# We've got our sploded array of korean bits
|
79
|
+
# need to put them back into Real Words
|
80
|
+
def join_ko
|
81
|
+
# http://www.w3c.or.kr/i18n/hangul-i18n/ko-code.html
|
82
|
+
# Leading, middle, following (optional)
|
83
|
+
a = self[0].unpack("U*").first
|
84
|
+
b = self[1].unpack("U*").first
|
85
|
+
c = self[2].unpack("U*").first if self[2]
|
86
|
+
|
87
|
+
offset_a = CHOSUNG.index(a)
|
88
|
+
if offset_a.nil?
|
89
|
+
raise
|
90
|
+
end
|
91
|
+
offset_b = JWUNGSUNG.index(b)
|
92
|
+
if offset_b.nil?
|
93
|
+
raise
|
94
|
+
end
|
95
|
+
|
96
|
+
offset_c = 0
|
97
|
+
if c
|
98
|
+
offset_c = JONGSUNG.index(c)
|
99
|
+
if offset_c.nil?
|
100
|
+
raise
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
raw = 0xAC00 +
|
105
|
+
offset_a * (21 * 28) +
|
106
|
+
offset_b * 28 +
|
107
|
+
offset_c
|
108
|
+
|
109
|
+
[ raw ].pack("U*")
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
data/test/helper.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestKoreanString < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_split
|
6
|
+
assert_equal(
|
7
|
+
[["ㅇ", "ㅏ", "ㄴ"],
|
8
|
+
["ㄴ", "ㅕ", "ㅇ"],
|
9
|
+
["ㅎ", "ㅏ"],
|
10
|
+
["ㅅ", "ㅔ"],
|
11
|
+
["ㅇ", "ㅛ"]],
|
12
|
+
'안녕하세요'.split_ko
|
13
|
+
)
|
14
|
+
|
15
|
+
assert_equal(
|
16
|
+
[["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]],
|
17
|
+
'읽어싶'.split_ko
|
18
|
+
)
|
19
|
+
|
20
|
+
assert_equal(
|
21
|
+
[["ㄱ", "ㅙ", "ㄴ"], ["ㅊ", "ㅏ", "ㄶ"], ["ㅇ", "ㅏ"]],
|
22
|
+
'괜찮아'.split_ko
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_join
|
27
|
+
assert_equal(
|
28
|
+
"아",
|
29
|
+
%w(ㅇ ㅏ).join_ko
|
30
|
+
)
|
31
|
+
|
32
|
+
assert_equal(
|
33
|
+
"일",
|
34
|
+
%w(ㅇ ㅣ ㄹ).join_ko
|
35
|
+
)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: korean-string
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Ben Humphreys
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-11-03 00:00:00 +09:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: Split Korean characters to individual compontents, join components together to create characters
|
23
|
+
email: benhumphreys@gmail.com
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files:
|
29
|
+
- LICENSE
|
30
|
+
- README.rdoc
|
31
|
+
files:
|
32
|
+
- .document
|
33
|
+
- .gitignore
|
34
|
+
- LICENSE
|
35
|
+
- README.rdoc
|
36
|
+
- Rakefile
|
37
|
+
- VERSION
|
38
|
+
- lib/korean-string.rb
|
39
|
+
- test/helper.rb
|
40
|
+
- test/test_korean-string.rb
|
41
|
+
has_rdoc: true
|
42
|
+
homepage: http://github.com/bhumphreys/korean-string
|
43
|
+
licenses: []
|
44
|
+
|
45
|
+
post_install_message:
|
46
|
+
rdoc_options:
|
47
|
+
- --charset=UTF-8
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
hash: 3
|
56
|
+
segments:
|
57
|
+
- 0
|
58
|
+
version: "0"
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
hash: 3
|
65
|
+
segments:
|
66
|
+
- 0
|
67
|
+
version: "0"
|
68
|
+
requirements: []
|
69
|
+
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.3.7
|
72
|
+
signing_key:
|
73
|
+
specification_version: 3
|
74
|
+
summary: Korean string join and split
|
75
|
+
test_files:
|
76
|
+
- test/helper.rb
|
77
|
+
- test/test_korean-string.rb
|