twkorean 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7c036bc9d6da0eb2bfa259b0eafa83b54a182eee
4
- data.tar.gz: 0a8824c1b459e991791523d8b390aff58bcfb09b
3
+ metadata.gz: 9fa5c2b3f783010b5b67c23db599ffd3ec75ef61
4
+ data.tar.gz: 2117abd053395906001c822082aef413c91441c2
5
5
  SHA512:
6
- metadata.gz: 76796b2e4b794b76270c3e46d39fe11d847aebf8e7bf890665e34c7748f2f5a8e68b2a7179b701bad4c2e5184935fd85a16ab49142e4e4f54f301f114ed77491
7
- data.tar.gz: c83bdfdfc02020422a1f2a6afdbce104cd03640b7722ad700a81f2a3cd9bc4c70b0e17e26366a47a8c4adbadae525ebf1d44fb6e1fe4f6d515dd8f2b823fc091
6
+ metadata.gz: 7ed15b2886e42e367c0652e1a86aad07b98c09279b37e0cd02b1e733499076e69c9481573e91c51d01ae70ed4e3feec4d0610e8dd0bed059d5a536c9193ce776
7
+ data.tar.gz: 854b177257cf8252e1a81916a02ea721e01ef94ea3e3d4a4d02b02e65a9872bb30934d46c0b7c24a3cb8a8ae5fbca06dceb688496807a4d0f24a6c65ece08198
data/README.md CHANGED
@@ -1,6 +1,8 @@
1
1
  # Twkorean
2
2
 
3
- TODO: Write a gem description
3
+ ## Compatibility
4
+
5
+ Currently wraps [twitter-korean-text 3.0](https://github.com/twitter/twitter-korean-text/tree/korean-text-3.0) / 현재 이 프로젝트는 [twitter-korean-text 3.0](https://github.com/twitter/twitter-korean-text/tree/korean-text-3.0)을 사용중입니다.
4
6
 
5
7
  ## Installation
6
8
 
@@ -18,7 +20,8 @@ Or install it yourself as:
18
20
 
19
21
  ## Required
20
22
 
21
- $ export JAVA_HOME=/etc/alternatives/java_sdk_{Your version}
23
+ $ export JAVA_HOME={Your Path}
24
+ $ gem install 'rjb'
22
25
 
23
26
  ## Test
24
27
 
@@ -26,8 +29,40 @@ Or install it yourself as:
26
29
 
27
30
  ## Usage
28
31
 
29
- TODO: Write usage instructions here
32
+ describe "Twkorean" do
33
+ TEXT = "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ #한국어"
34
+ before do
35
+
36
+ end
37
+
38
+ it "Normalize" do
39
+ twkorean = Twkorean::TwitterKoreanText.new
40
+ p "Normlize"
41
+ p twkorean.normalize(TEXT)
42
+ # 한국어를 처리하는 예시입니다ㅋㅋ #한국어
43
+ end
44
+
45
+ it "Tokenize" do
46
+ twkorean = Twkorean::TwitterKoreanText.new(true, false)
47
+ p "#Tokenize"
48
+ p twkorean.tokenize(TEXT)
49
+ # ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
50
+ end
51
+
52
+ it "Stemming" do
53
+ twkorean = Twkorean::TwitterKoreanText.new
54
+ p "#Stemming"
55
+ p twkorean.tokenize(TEXT)
56
+ # ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하다(Verb: 7, 2)", "예시(Noun: 10, 2)", "이다(Adjective: 12, 3)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
57
+ end
30
58
 
59
+ it "Phrase extraction" do
60
+ twkorean = Twkorean::TwitterKoreanText.new
61
+ p "Phrase extraction"
62
+ p twkorean.extract_phrases(TEXT)
63
+ # ["한국어(Noun: 0, 3)", "처리(Noun: 5, 2)", "처리하는 예시(Noun: 5, 7)", "예시(Noun: 10, 2)", "#한국어(Hashtag: 18, 4)"]
64
+ end
65
+ end
31
66
  ## Contributing
32
67
 
33
68
  1. Fork it ( https://github.com/[my-github-username]/twkorean/fork )
@@ -1,6 +1,6 @@
1
1
  # @name twkorean-ruby
2
2
  # @author JunSangPil
3
- # @version 0.0.2
3
+ # @version 0.0.3
4
4
  # @url https://github.com/jun85664396/twkorean-ruby
5
5
  # @license Apache License 2.0
6
6
  module Twkorean
@@ -43,5 +43,9 @@ module Twkorean
43
43
  phrases.toArray.map{|x| x.toString}
44
44
  end
45
45
 
46
+ def parser(text)
47
+ text.match(/(.*)\(([a-zA-Z]*): ([0-9]+), ([0-9]+)\)/).to_a
48
+ end
49
+
46
50
  end
47
51
  end
@@ -1,8 +1,8 @@
1
1
  # @name twkorean-ruby
2
2
  # @author JunSangPil
3
- # @version 0.0.2
3
+ # @version 0.0.3
4
4
  # @url https://github.com/jun85664396/twkorean-ruby
5
5
  # @license Apache License 2.0
6
6
  module Twkorean
7
- VERSION = "0.0.2"
7
+ VERSION = "0.0.3"
8
8
  end
data/lib/twkorean.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # @name twkorean-ruby
2
2
  # @author JunSangPil
3
- # @version 0.0.2
3
+ # @version 0.0.3
4
4
  # @url https://github.com/jun85664396/twkorean-ruby
5
5
  # @license Apache License 2.0
6
6
  require "twkorean/version"
data/test/test_helper.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # @name twkorean-ruby
2
2
  # @author JunSangPil
3
- # @version 0.0.2
3
+ # @version 0.0.3
4
4
  # @url https://github.com/jun85664396/twkorean-ruby
5
5
  # @license Apache License 2.0
6
6
  require 'minitest/autorun'
data/test/twkorean.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # @name twkorean-ruby
2
2
  # @author JunSangPil
3
- # @version 0.0.2
3
+ # @version 0.0.3
4
4
  # @url https://github.com/jun85664396/twkorean-ruby
5
5
  # @license Apache License 2.0
6
6
  require_relative 'test_helper'
@@ -9,31 +9,42 @@ require 'twkorean'
9
9
  describe "Twkorean" do
10
10
  TEXT = "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ #한국어"
11
11
  before do
12
-
12
+
13
13
  end
14
14
 
15
15
  it "Normalize" do
16
16
  twkorean = Twkorean::TwitterKoreanText.new
17
17
  p "Normlize"
18
18
  p twkorean.normalize(TEXT)
19
+ # 한국어를 처리하는 예시입니다ㅋㅋ #한국어
19
20
  end
20
21
 
21
22
  it "Tokenize" do
22
23
  twkorean = Twkorean::TwitterKoreanText.new(true, false)
23
24
  p "#Tokenize"
24
25
  p twkorean.tokenize(TEXT)
26
+ # ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
25
27
  end
26
28
 
27
29
  it "Stemming" do
28
30
  twkorean = Twkorean::TwitterKoreanText.new
29
31
  p "#Stemming"
30
32
  p twkorean.tokenize(TEXT)
33
+ # ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하다(Verb: 7, 2)", "예시(Noun: 10, 2)", "이다(Adjective: 12, 3)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
31
34
  end
32
35
 
33
36
  it "Phrase extraction" do
34
37
  twkorean = Twkorean::TwitterKoreanText.new
35
38
  p "Phrase extraction"
36
39
  p twkorean.extract_phrases(TEXT)
40
+ # ["한국어(Noun: 0, 3)", "처리(Noun: 5, 2)", "처리하는 예시(Noun: 5, 7)", "예시(Noun: 10, 2)", "#한국어(Hashtag: 18, 4)"]
41
+ end
42
+
43
+ it "Parser" do
44
+ twkorean = Twkorean::TwitterKoreanText.new(true, false)
45
+ p "#Tokenize Parser"
46
+ p twkorean.tokenize(TEXT).map{|x| twkorean.parser(x) }
47
+ # ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
37
48
  end
38
49
 
39
50
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twkorean
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - JunSangPil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-21 00:00:00.000000000 Z
11
+ date: 2015-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler