to_sep 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bd72fa4deaeb39617bf973d5f7e1bdaa79885ae3
4
+ data.tar.gz: b9dc779594c86bda26582eb3dbc61597ffa95ee8
5
+ SHA512:
6
+ metadata.gz: 0e56f0a9ab86992ba384d7de60fd3c8c0b36a144322c3d56866565980ce99c3a437ebed6be8bf58a9ae7ddae981485f68ae1deca136913292a72548e45f58433
7
+ data.tar.gz: 1f2c99522ba2d5e0e57141a17bbb2b69482f5bd104ff20fa0b53c06289a190a75354bd2549c32f21a70c01159d98bd162c5f35c94ba13ece7d88e781c40d7853
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in to_sep.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Kim,SeongJun
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # to_sep
2
+
3
+ Ruby charactor separator.
4
+ When you make search in website or document, Korean charactor should be separated. to_sep help you separate a Korean charactor.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'to_sep'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install to_sep
19
+
20
+ ## Usage
21
+
22
+ "안녕하세요".to_sep
23
+
24
+ ## Contributing
25
+
26
+ 1. Fork it ( http://github.com/<my-github-username>/to_sep/fork )
27
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
28
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
29
+ 4. Push to the branch (`git push origin my-new-feature`)
30
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/lib/to_sep.rb ADDED
@@ -0,0 +1,35 @@
1
+ require "to_sep/version"
2
+ require "to_sep/korean_separator"
3
+
4
+ module ToSep
5
+ def to_unicode
6
+ self.unpack("U*").first
7
+ end
8
+
9
+ def korean?
10
+ if (self.match(/[ㄱ-힇]/))
11
+ if self.match(/[ㄱ-ㅎ]/) or self.match(/[ㅏ-ㅣ]/)
12
+ false
13
+ else
14
+ true
15
+ end
16
+ else
17
+ false
18
+ end
19
+ end
20
+
21
+ def to_sep
22
+ KoreanSeparator.split(self)
23
+ end
24
+ end
25
+
26
+ class Fixnum
27
+ def unicode_chr
28
+ self.chr(Encoding::UTF_8)
29
+ end
30
+ end
31
+
32
+ class String
33
+ include ToSep
34
+ end
35
+
@@ -0,0 +1,61 @@
1
+ class KoreanSeparator
2
+ def self.split string
3
+ self.new(string).split
4
+ end
5
+
6
+ def initialize string
7
+ @string = string
8
+ end
9
+
10
+ def split
11
+ @string.split('').inject("") do |string, ch|
12
+ if ch.korean?
13
+ string += (sep_by ch)
14
+ else
15
+ string += ch
16
+ end
17
+ end
18
+ end
19
+
20
+ def sep_by ch
21
+ unicode = ch.to_unicode
22
+ i = initial_consonant[ (unicode-offset) /
23
+ (vowel.length * final_consonant.length) ]
24
+ v = vowel[((unicode-offset) %
25
+ (vowel.length * final_consonant.length)) /
26
+ final_consonant.length ]
27
+ f = final_consonant[ (unicode-offset) % final_consonant.length ]
28
+
29
+ [i, v, f].join.strip
30
+ end
31
+
32
+ def unicode_composite u_initial_consonant, u_vowel, u_final_consonant
33
+ i = initial_consonant.index u_initial_consonant.unicode_chr
34
+ v = vowel.index u_vowel.unicode_chr
35
+ f = final_consonant.index u_final_consonant.unicode_chr
36
+ unicode = offset + (i * 588) + (v * 28) + (f)
37
+ unicode.unicode_chr
38
+ end
39
+
40
+ def offset
41
+ "가".to_unicode
42
+ end
43
+
44
+ # 초성
45
+ def initial_consonant
46
+ ["ㄱ","ㄲ","ㄴ","ㄷ","ㄸ","ㄹ","ㅁ","ㅂ","ㅃ","ㅅ","ㅆ","ㅇ","ㅈ","ㅉ","ㅊ","ㅋ","ㅌ","ㅍ","ㅎ"]
47
+ end
48
+
49
+ # 중성
50
+ def vowel
51
+ ["ㅏ","ㅐ","ㅑ","ㅒ","ㅓ","ㅔ","ㅕ","ㅖ","ㅗ","ㅘ",
52
+ "ㅙ","ㅚ","ㅛ","ㅜ","ㅝ","ㅞ","ㅟ","ㅠ","ㅡ","ㅢ","ㅣ"]
53
+ end
54
+
55
+ # 종성
56
+ def final_consonant
57
+ [" ","ㄱ","ㄲ","ㄳ","ㄴ","ㄵ","ㄶ","ㄷ","ㄹ","ㄺ",
58
+ "ㄻ","ㄼ","ㄽ","ㄾ","ㄿ","ㅀ","ㅁ","ㅂ","ㅄ","ㅅ",
59
+ "ㅆ","ㅇ","ㅈ","ㅊ","ㅋ","ㅌ","ㅍ","ㅎ"]
60
+ end
61
+ end
@@ -0,0 +1,3 @@
1
+ module ToSep
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,2 @@
1
+ require File.expand_path('../../lib/to_sep', __FILE__)
2
+ require 'rspec'
@@ -0,0 +1,18 @@
1
+ require File.expand_path('../../spec_helper', __FILE__)
2
+
3
+ describe KoreanSeparator do
4
+ it ".split should return separated charactors" do
5
+ KoreanSeparator.split("안녕하세요.").should == "ㅇㅏㄴㄴㅕㅇㅎㅏㅅㅔㅇㅛ."
6
+ KoreanSeparator.split("반갑습니다.").should == "ㅂㅏㄴㄱㅏㅂㅅㅡㅂㄴㅣㄷㅏ."
7
+ end
8
+
9
+ it "#sep_by should return separated charactors for just a charactor" do
10
+ korean_separator = KoreanSeparator.new("Something")
11
+ korean_separator.sep_by("안").should == "ㅇㅏㄴ"
12
+ end
13
+
14
+ it "#unicode_composite should return a charactor that composition of [u_initial_consonant, u_vowel, u_final_consonant]" do
15
+ korean_separator = KoreanSeparator.new("Something")
16
+ korean_separator.unicode_composite("ㅇ".to_unicode, "ㅏ".to_unicode, "ㄴ".to_unicode).should == "안"
17
+ end
18
+ end
@@ -0,0 +1,8 @@
1
+ require File.expand_path('../spec_helper', __FILE__)
2
+
3
+ describe String do
4
+ it "#to_sep should return separated charactors" do
5
+ KoreanSeparator.split("안녕하세요.").should == "ㅇㅏㄴㄴㅕㅇㅎㅏㅅㅔㅇㅛ."
6
+ KoreanSeparator.split("반갑습니다.").should == "ㅂㅏㄴㄱㅏㅂㅅㅡㅂㄴㅣㄷㅏ."
7
+ end
8
+ end
data/to_sep.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'to_sep/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "to_sep"
8
+ spec.version = ToSep::VERSION
9
+ spec.platform = Gem::Platform::RUBY
10
+ spec.authors = ["Kim,SeongJun"]
11
+ spec.email = ["me@kimseongjun.co.kr"]
12
+ spec.summary = %q{Ruby charactor separator.}
13
+ spec.description = %q{When you make search in website or document, Korean charactor should be separated. to_sep help you separate a Korean charactor.}
14
+ spec.homepage = "https://github.com/victorkim/to_sep"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files`.split($/)
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.5"
23
+ spec.add_development_dependency "rake", '~> 0'
24
+ spec.add_development_dependency('rspec', '~> 2.4')
25
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: to_sep
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Kim,SeongJun
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-03-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.4'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.4'
55
+ description: When you make search in website or document, Korean charactor should
56
+ be separated. to_sep help you separate a Korean charactor.
57
+ email:
58
+ - me@kimseongjun.co.kr
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - lib/to_sep.rb
69
+ - lib/to_sep/korean_separator.rb
70
+ - lib/to_sep/version.rb
71
+ - spec/spec_helper.rb
72
+ - spec/to_sep/korean_separator_spec.rb
73
+ - spec/to_sep_spec.rb
74
+ - to_sep.gemspec
75
+ homepage: https://github.com/victorkim/to_sep
76
+ licenses:
77
+ - MIT
78
+ metadata: {}
79
+ post_install_message:
80
+ rdoc_options: []
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 2.2.0
96
+ signing_key:
97
+ specification_version: 4
98
+ summary: Ruby charactor separator.
99
+ test_files:
100
+ - spec/spec_helper.rb
101
+ - spec/to_sep/korean_separator_spec.rb
102
+ - spec/to_sep_spec.rb
103
+ has_rdoc: