korean-string 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Ben Humphreys
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ = korean-string
2
+
3
+ Split Korean characters to individual compontents, join components together to create characters.
4
+
5
+ You could use it to make some weird conjugation rules. Go wild.
6
+
7
+ == Methods
8
+
9
+ === String.split_ko
10
+
11
+ Return an array of arrays of Korean character components
12
+
13
+ require 'korean-string'
14
+ '읽어싶'.split_ko
15
+ => [["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]]
16
+
17
+ === Array.join_ko
18
+
19
+ Accepts an array of character pieces
20
+
21
+ require 'korean-string'
22
+ [["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]].join_ko
23
+ => '읽어싶'
24
+
25
+
26
+ == Todo
27
+
28
+ Come up with other useful methods?
29
+
30
+
31
+ == Copyright
32
+
33
+ Copyright (c) 2010 Ben Humphreys. See LICENSE for details.
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "korean-string"
8
+ gem.summary = %Q{Korean string join and split}
9
+ gem.description = %Q{Split Korean characters to individual compontents, join components together to create characters}
10
+ gem.email = "benhumphreys@gmail.com"
11
+ gem.homepage = "http://github.com/bhumphreys/korean-string"
12
+ gem.authors = ["Ben Humphreys"]
13
+ #gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "korean-string #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,112 @@
1
+ # Originally transliterate-hacked from Perl from
2
+ # http://blog.naver.com/PostView.nhn?blogId=mokomoji&logNo=130013133481
3
+ #
4
+ # For the theory of why this works, check out the W3C spec on Korean encoding
5
+ # http://www.w3c.or.kr/i18n/hangul-i18n/ko-code.html
6
+ # (Thanks to @ntrolls for this)
7
+
8
+ $KCODE = 'UTF8'
9
+
10
+ # ㄱ ㄲ ㄴ ㄷ ㄸ ㄹ ㅁ ㅂ
11
+ CHOSUNG = [0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
12
+ # ㅃ ㅅ ㅆ ㅇ ㅈ ㅉ ㅊ ㅋ
13
+ 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
14
+ # ㅌ ㅍ ㅎ
15
+ 0x314c, 0x314d, 0x314e]
16
+
17
+ # ㅏ ㅐ ㅑ ㅒ ㅓ ㅔ ㅕ ㅖ
18
+ JWUNGSUNG = [0x314f, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156,
19
+ # ㅗ ㅘ ㅙ ㅚ ㅛ ㅜ ㅝ ㅞ
20
+ 0x3157, 0x3158, 0x3159, 0x315a, 0x315b, 0x315c, 0x315d, 0x315e,
21
+ # ㅟ ㅠ ㅡ ㅢ ㅣ
22
+ 0x315f, 0x3160, 0x3161, 0x3162, 0x3163]
23
+
24
+ # ㄱ ㄲ ㄳ ㄴ ㄵ ㄶ ㄷ ㄹ
25
+ JONGSUNG = [ 0, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137,
26
+ # ㄺ ㄻ ㄼ ㄽ ㄾ ㄿ ㅀ ㅁ
27
+ 0x3139, 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f, 0x3140,
28
+ # ㅂ ㅄ ㅅ ㅆ ㅇ ㅈ ㅊ ㅋ
29
+ 0x3141, 0x3142, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x314a,
30
+ # ㅌ ㅍ ㅎ ?whoops
31
+ 0x314b, 0x314c, 0x314d, 0x314e ]
32
+
33
+
34
+ # Not wrapping this in a module... not sure if that's a terrible idea
35
+
36
+ class String
37
+ def split_ko
38
+
39
+ raw_chars = self.unpack("U*")
40
+
41
+ final_result = Array.new
42
+
43
+ raw_chars.each do |char|
44
+ result = Array.new
45
+ if (char >= 0xAC00 && char <= 0xD7A3)
46
+ # Move it down in the range
47
+ c = char - 0xAC00;
48
+
49
+ # Here be dragons
50
+ a = c.to_f / (21 * 28)
51
+ c = c % (21 * 28)
52
+ b = c.to_f / 28
53
+ c = c % 28
54
+
55
+ a = a.to_i
56
+ b = b.to_i
57
+ c = c.to_i
58
+
59
+ result.push( CHOSUNG[a], JWUNGSUNG[b] )
60
+
61
+ if c != 0
62
+ result.push( JONGSUNG[c] )
63
+ end
64
+ else
65
+ result.push(char)
66
+ end
67
+
68
+ final_result.push(result.pack("U*").split(''))
69
+ end
70
+
71
+ return final_result
72
+
73
+ end
74
+ end
75
+
76
+
77
+ class Array
78
+ # We've got our sploded array of korean bits
79
+ # need to put them back into Real Words
80
+ def join_ko
81
+ # http://www.w3c.or.kr/i18n/hangul-i18n/ko-code.html
82
+ # Leading, middle, following (optional)
83
+ a = self[0].unpack("U*").first
84
+ b = self[1].unpack("U*").first
85
+ c = self[2].unpack("U*").first if self[2]
86
+
87
+ offset_a = CHOSUNG.index(a)
88
+ if offset_a.nil?
89
+ raise
90
+ end
91
+ offset_b = JWUNGSUNG.index(b)
92
+ if offset_b.nil?
93
+ raise
94
+ end
95
+
96
+ offset_c = 0
97
+ if c
98
+ offset_c = JONGSUNG.index(c)
99
+ if offset_c.nil?
100
+ raise
101
+ end
102
+ end
103
+
104
+ raw = 0xAC00 +
105
+ offset_a * (21 * 28) +
106
+ offset_b * 28 +
107
+ offset_c
108
+
109
+ [ raw ].pack("U*")
110
+ end
111
+ end
112
+
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
6
+ require 'korean-string'
7
+
8
+ class Test::Unit::TestCase
9
+ end
@@ -0,0 +1,38 @@
1
+ require 'helper'
2
+
3
+ class TestKoreanString < Test::Unit::TestCase
4
+
5
+ def test_split
6
+ assert_equal(
7
+ [["ㅇ", "ㅏ", "ㄴ"],
8
+ ["ㄴ", "ㅕ", "ㅇ"],
9
+ ["ㅎ", "ㅏ"],
10
+ ["ㅅ", "ㅔ"],
11
+ ["ㅇ", "ㅛ"]],
12
+ '안녕하세요'.split_ko
13
+ )
14
+
15
+ assert_equal(
16
+ [["ㅇ", "ㅣ", "ㄺ"], ["ㅇ", "ㅓ"], ["ㅅ", "ㅣ", "ㅍ"]],
17
+ '읽어싶'.split_ko
18
+ )
19
+
20
+ assert_equal(
21
+ [["ㄱ", "ㅙ", "ㄴ"], ["ㅊ", "ㅏ", "ㄶ"], ["ㅇ", "ㅏ"]],
22
+ '괜찮아'.split_ko
23
+ )
24
+ end
25
+
26
+ def test_join
27
+ assert_equal(
28
+ "아",
29
+ %w(ㅇ ㅏ).join_ko
30
+ )
31
+
32
+ assert_equal(
33
+ "일",
34
+ %w(ㅇ ㅣ ㄹ).join_ko
35
+ )
36
+ end
37
+ end
38
+
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: korean-string
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Ben Humphreys
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-03 00:00:00 +09:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Split Korean characters to individual compontents, join components together to create characters
23
+ email: benhumphreys@gmail.com
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - LICENSE
30
+ - README.rdoc
31
+ files:
32
+ - .document
33
+ - .gitignore
34
+ - LICENSE
35
+ - README.rdoc
36
+ - Rakefile
37
+ - VERSION
38
+ - lib/korean-string.rb
39
+ - test/helper.rb
40
+ - test/test_korean-string.rb
41
+ has_rdoc: true
42
+ homepage: http://github.com/bhumphreys/korean-string
43
+ licenses: []
44
+
45
+ post_install_message:
46
+ rdoc_options:
47
+ - --charset=UTF-8
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ hash: 3
56
+ segments:
57
+ - 0
58
+ version: "0"
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ hash: 3
65
+ segments:
66
+ - 0
67
+ version: "0"
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.3.7
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: Korean string join and split
75
+ test_files:
76
+ - test/helper.rb
77
+ - test/test_korean-string.rb