korean-string 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Ben Humphreys
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ = korean-string
2
+
3
+ Split Korean characters to individual compontents, join components together to create characters.
4
+
5
+ You could use it to make some weird conjugation rules. Go wild.
6
+
7
+ == Methods
8
+
9
+ === String.split_ko
10
+
11
+ Return an array of arrays of Korean character components
12
+
13
+ require 'korean-string'
14
+ '읽어싶'.split_ko
15
+ => [["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]]
16
+
17
+ === Array.join_ko
18
+
19
+ Accepts an array of character pieces
20
+
21
+ require 'korean-string'
22
+ [["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]].join_ko
23
+ => '읽어싶'
24
+
25
+
26
+ == Todo
27
+
28
+ Come up with other useful methods?
29
+
30
+
31
+ == Copyright
32
+
33
+ Copyright (c) 2010 Ben Humphreys. See LICENSE for details.
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "korean-string"
8
+ gem.summary = %Q{Korean string join and split}
9
+ gem.description = %Q{Split Korean characters to individual compontents, join components together to create characters}
10
+ gem.email = "benhumphreys@gmail.com"
11
+ gem.homepage = "http://github.com/bhumphreys/korean-string"
12
+ gem.authors = ["Ben Humphreys"]
13
+ #gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "korean-string #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,112 @@
1
+ # Originally transliterate-hacked from Perl from
2
+ # http://blog.naver.com/PostView.nhn?blogId=mokomoji&logNo=130013133481
3
+ #
4
+ # For the theory of why this works, check out the W3C spec on Korean encoding
5
+ # http://www.w3c.or.kr/i18n/hangul-i18n/ko-code.html
6
+ # (Thanks to @ntrolls for this)
7
+
8
+ $KCODE = 'UTF8'
9
+
10
+ # ㄱ ㄲ ㄴ ㄷ ㄸ ㄹ ㅁ ㅂ
11
+ CHOSUNG = [0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
12
+ # ㅃ ㅅ ㅆ ㅇ ㅈ ㅉ ㅊ ㅋ
13
+ 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
14
+ # ㅌ ㅍ ㅎ
15
+ 0x314c, 0x314d, 0x314e]
16
+
17
+ # ㅏ ㅐ ㅑ ㅒ ㅓ ㅔ ㅕ ㅖ
18
+ JWUNGSUNG = [0x314f, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156,
19
+ # ㅗ ㅘ ㅙ ㅚ ㅛ ㅜ ㅝ ㅞ
20
+ 0x3157, 0x3158, 0x3159, 0x315a, 0x315b, 0x315c, 0x315d, 0x315e,
21
+ # ㅟ ㅠ ㅡ ㅢ ㅣ
22
+ 0x315f, 0x3160, 0x3161, 0x3162, 0x3163]
23
+
24
+ # ㄱ ㄲ ㄳ ㄴ ㄵ ㄶ ㄷ ㄹ
25
+ JONGSUNG = [ 0, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137,
26
+ # ㄺ ㄻ ㄼ ㄽ ㄾ ㄿ ㅀ ㅁ
27
+ 0x3139, 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f, 0x3140,
28
+ # ㅂ ㅄ ㅅ ㅆ ㅇ ㅈ ㅊ ㅋ
29
+ 0x3141, 0x3142, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x314a,
30
+ # ㅌ ㅍ ㅎ ?whoops
31
+ 0x314b, 0x314c, 0x314d, 0x314e ]
32
+
33
+
34
+ # Not wrapping this in a module... not sure if that's a terrible idea
35
+
36
+ class String
37
+ def split_ko
38
+
39
+ raw_chars = self.unpack("U*")
40
+
41
+ final_result = Array.new
42
+
43
+ raw_chars.each do |char|
44
+ result = Array.new
45
+ if (char >= 0xAC00 && char <= 0xD7A3)
46
+ # Move it down in the range
47
+ c = char - 0xAC00;
48
+
49
+ # Here be dragons
50
+ a = c.to_f / (21 * 28)
51
+ c = c % (21 * 28)
52
+ b = c.to_f / 28
53
+ c = c % 28
54
+
55
+ a = a.to_i
56
+ b = b.to_i
57
+ c = c.to_i
58
+
59
+ result.push( CHOSUNG[a], JWUNGSUNG[b] )
60
+
61
+ if c != 0
62
+ result.push( JONGSUNG[c] )
63
+ end
64
+ else
65
+ result.push(char)
66
+ end
67
+
68
+ final_result.push(result.pack("U*").split(''))
69
+ end
70
+
71
+ return final_result
72
+
73
+ end
74
+ end
75
+
76
+
77
+ class Array
78
+ # We've got our sploded array of korean bits
79
+ # need to put them back into Real Words
80
+ def join_ko
81
+ # http://www.w3c.or.kr/i18n/hangul-i18n/ko-code.html
82
+ # Leading, middle, following (optional)
83
+ a = self[0].unpack("U*").first
84
+ b = self[1].unpack("U*").first
85
+ c = self[2].unpack("U*").first if self[2]
86
+
87
+ offset_a = CHOSUNG.index(a)
88
+ if offset_a.nil?
89
+ raise
90
+ end
91
+ offset_b = JWUNGSUNG.index(b)
92
+ if offset_b.nil?
93
+ raise
94
+ end
95
+
96
+ offset_c = 0
97
+ if c
98
+ offset_c = JONGSUNG.index(c)
99
+ if offset_c.nil?
100
+ raise
101
+ end
102
+ end
103
+
104
+ raw = 0xAC00 +
105
+ offset_a * (21 * 28) +
106
+ offset_b * 28 +
107
+ offset_c
108
+
109
+ [ raw ].pack("U*")
110
+ end
111
+ end
112
+
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
6
+ require 'korean-string'
7
+
8
+ class Test::Unit::TestCase
9
+ end
@@ -0,0 +1,38 @@
1
+ require 'helper'
2
+
3
+ class TestKoreanString < Test::Unit::TestCase
4
+
5
+ def test_split
6
+ assert_equal(
7
+ [["ㅇ", "ㅏ", "ㄴ"],
8
+ ["ㄴ", "ㅕ", "ㅇ"],
9
+ ["ㅎ", "ㅏ"],
10
+ ["ㅅ", "ㅔ"],
11
+ ["ㅇ", "ㅛ"]],
12
+ '안녕하세요'.split_ko
13
+ )
14
+
15
+ assert_equal(
16
+ [["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]],
17
+ '읽어싶'.split_ko
18
+ )
19
+
20
+ assert_equal(
21
+ [["ㄱ", "ㅙ", "ㄴ"], ["ㅊ", "ㅏ", "ㄶ"], ["ㅇ", "ㅏ"]],
22
+ '괜찮아'.split_ko
23
+ )
24
+ end
25
+
26
+ def test_join
27
+ assert_equal(
28
+ "아",
29
+ %w(ㅇ ㅏ).join_ko
30
+ )
31
+
32
+ assert_equal(
33
+ "일",
34
+ %w(ㅇ ㅣ ㄹ).join_ko
35
+ )
36
+ end
37
+ end
38
+
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: korean-string
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Ben Humphreys
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-03 00:00:00 +09:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Split Korean characters to individual compontents, join components together to create characters
23
+ email: benhumphreys@gmail.com
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - LICENSE
30
+ - README.rdoc
31
+ files:
32
+ - .document
33
+ - .gitignore
34
+ - LICENSE
35
+ - README.rdoc
36
+ - Rakefile
37
+ - VERSION
38
+ - lib/korean-string.rb
39
+ - test/helper.rb
40
+ - test/test_korean-string.rb
41
+ has_rdoc: true
42
+ homepage: http://github.com/bhumphreys/korean-string
43
+ licenses: []
44
+
45
+ post_install_message:
46
+ rdoc_options:
47
+ - --charset=UTF-8
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ hash: 3
56
+ segments:
57
+ - 0
58
+ version: "0"
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ hash: 3
65
+ segments:
66
+ - 0
67
+ version: "0"
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.3.7
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: Korean string join and split
75
+ test_files:
76
+ - test/helper.rb
77
+ - test/test_korean-string.rb