twkorean 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7c036bc9d6da0eb2bfa259b0eafa83b54a182eee
4
- data.tar.gz: 0a8824c1b459e991791523d8b390aff58bcfb09b
3
+ metadata.gz: 9fa5c2b3f783010b5b67c23db599ffd3ec75ef61
4
+ data.tar.gz: 2117abd053395906001c822082aef413c91441c2
5
5
  SHA512:
6
- metadata.gz: 76796b2e4b794b76270c3e46d39fe11d847aebf8e7bf890665e34c7748f2f5a8e68b2a7179b701bad4c2e5184935fd85a16ab49142e4e4f54f301f114ed77491
7
- data.tar.gz: c83bdfdfc02020422a1f2a6afdbce104cd03640b7722ad700a81f2a3cd9bc4c70b0e17e26366a47a8c4adbadae525ebf1d44fb6e1fe4f6d515dd8f2b823fc091
6
+ metadata.gz: 7ed15b2886e42e367c0652e1a86aad07b98c09279b37e0cd02b1e733499076e69c9481573e91c51d01ae70ed4e3feec4d0610e8dd0bed059d5a536c9193ce776
7
+ data.tar.gz: 854b177257cf8252e1a81916a02ea721e01ef94ea3e3d4a4d02b02e65a9872bb30934d46c0b7c24a3cb8a8ae5fbca06dceb688496807a4d0f24a6c65ece08198
data/README.md CHANGED
@@ -1,6 +1,8 @@
1
1
  # Twkorean
2
2
 
3
- TODO: Write a gem description
3
+ ## Compatibility
4
+
5
+ Currently wraps [twitter-korean-text 3.0](https://github.com/twitter/twitter-korean-text/tree/korean-text-3.0) / 현재 이 프로젝트는 [twitter-korean-text 3.0](https://github.com/twitter/twitter-korean-text/tree/korean-text-3.0)을 사용중입니다.
4
6
 
5
7
  ## Installation
6
8
 
@@ -18,7 +20,8 @@ Or install it yourself as:
18
20
 
19
21
  ## Required
20
22
 
21
- $ export JAVA_HOME=/etc/alternatives/java_sdk_{Your version}
23
+ $ export JAVA_HOME={Your Path}
24
+ $ gem install 'rjb'
22
25
 
23
26
  ## Test
24
27
 
@@ -26,8 +29,40 @@ Or install it yourself as:
26
29
 
27
30
  ## Usage
28
31
 
29
- TODO: Write usage instructions here
32
+ describe "Twkorean" do
33
+ TEXT = "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ #한국어"
34
+ before do
35
+
36
+ end
37
+
38
+ it "Normalize" do
39
+ twkorean = Twkorean::TwitterKoreanText.new
40
+ p "Normlize"
41
+ p twkorean.normalize(TEXT)
42
+ # 한국어를 처리하는 예시입니다ㅋㅋ #한국어
43
+ end
44
+
45
+ it "Tokenize" do
46
+ twkorean = Twkorean::TwitterKoreanText.new(true, false)
47
+ p "#Tokenize"
48
+ p twkorean.tokenize(TEXT)
49
+ # ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
50
+ end
51
+
52
+ it "Stemming" do
53
+ twkorean = Twkorean::TwitterKoreanText.new
54
+ p "#Stemming"
55
+ p twkorean.tokenize(TEXT)
56
+ # ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하다(Verb: 7, 2)", "예시(Noun: 10, 2)", "이다(Adjective: 12, 3)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
57
+ end
30
58
 
59
+ it "Phrase extraction" do
60
+ twkorean = Twkorean::TwitterKoreanText.new
61
+ p "Phrase extraction"
62
+ p twkorean.extract_phrases(TEXT)
63
+ # ["한국어(Noun: 0, 3)", "처리(Noun: 5, 2)", "처리하는 예시(Noun: 5, 7)", "예시(Noun: 10, 2)", "#한국어(Hashtag: 18, 4)"]
64
+ end
65
+ end
31
66
  ## Contributing
32
67
 
33
68
  1. Fork it ( https://github.com/[my-github-username]/twkorean/fork )
@@ -1,6 +1,6 @@
1
1
  # @name twkorean-ruby
2
2
  # @author JunSangPil
3
- # @version 0.0.2
3
+ # @version 0.0.3
4
4
  # @url https://github.com/jun85664396/twkorean-ruby
5
5
  # @license Apache License 2.0
6
6
  module Twkorean
@@ -43,5 +43,9 @@ module Twkorean
43
43
  phrases.toArray.map{|x| x.toString}
44
44
  end
45
45
 
46
+ def parser(text)
47
+ text.match(/(.*)\(([a-zA-Z]*): ([0-9]+), ([0-9]+)\)/).to_a
48
+ end
49
+
46
50
  end
47
51
  end
@@ -1,8 +1,8 @@
1
1
  # @name twkorean-ruby
2
2
  # @author JunSangPil
3
- # @version 0.0.2
3
+ # @version 0.0.3
4
4
  # @url https://github.com/jun85664396/twkorean-ruby
5
5
  # @license Apache License 2.0
6
6
  module Twkorean
7
- VERSION = "0.0.2"
7
+ VERSION = "0.0.3"
8
8
  end
data/lib/twkorean.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # @name twkorean-ruby
2
2
  # @author JunSangPil
3
- # @version 0.0.2
3
+ # @version 0.0.3
4
4
  # @url https://github.com/jun85664396/twkorean-ruby
5
5
  # @license Apache License 2.0
6
6
  require "twkorean/version"
data/test/test_helper.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # @name twkorean-ruby
2
2
  # @author JunSangPil
3
- # @version 0.0.2
3
+ # @version 0.0.3
4
4
  # @url https://github.com/jun85664396/twkorean-ruby
5
5
  # @license Apache License 2.0
6
6
  require 'minitest/autorun'
data/test/twkorean.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # @name twkorean-ruby
2
2
  # @author JunSangPil
3
- # @version 0.0.2
3
+ # @version 0.0.3
4
4
  # @url https://github.com/jun85664396/twkorean-ruby
5
5
  # @license Apache License 2.0
6
6
  require_relative 'test_helper'
@@ -9,31 +9,42 @@ require 'twkorean'
9
9
  describe "Twkorean" do
10
10
  TEXT = "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ #한국어"
11
11
  before do
12
-
12
+
13
13
  end
14
14
 
15
15
  it "Normalize" do
16
16
  twkorean = Twkorean::TwitterKoreanText.new
17
17
  p "Normlize"
18
18
  p twkorean.normalize(TEXT)
19
+ # 한국어를 처리하는 예시입니다ㅋㅋ #한국어
19
20
  end
20
21
 
21
22
  it "Tokenize" do
22
23
  twkorean = Twkorean::TwitterKoreanText.new(true, false)
23
24
  p "#Tokenize"
24
25
  p twkorean.tokenize(TEXT)
26
+ # ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
25
27
  end
26
28
 
27
29
  it "Stemming" do
28
30
  twkorean = Twkorean::TwitterKoreanText.new
29
31
  p "#Stemming"
30
32
  p twkorean.tokenize(TEXT)
33
+ # ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하다(Verb: 7, 2)", "예시(Noun: 10, 2)", "이다(Adjective: 12, 3)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
31
34
  end
32
35
 
33
36
  it "Phrase extraction" do
34
37
  twkorean = Twkorean::TwitterKoreanText.new
35
38
  p "Phrase extraction"
36
39
  p twkorean.extract_phrases(TEXT)
40
+ # ["한국어(Noun: 0, 3)", "처리(Noun: 5, 2)", "처리하는 예시(Noun: 5, 7)", "예시(Noun: 10, 2)", "#한국어(Hashtag: 18, 4)"]
41
+ end
42
+
43
+ it "Parser" do
44
+ twkorean = Twkorean::TwitterKoreanText.new(true, false)
45
+ p "#Tokenize Parser"
46
+ p twkorean.tokenize(TEXT).map{|x| twkorean.parser(x) }
47
+ # ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
37
48
  end
38
49
 
39
50
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twkorean
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - JunSangPil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-21 00:00:00.000000000 Z
11
+ date: 2015-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler