twkorean 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +38 -3
- data/lib/twkorean/twitter_korean_text.rb +5 -1
- data/lib/twkorean/version.rb +2 -2
- data/lib/twkorean.rb +1 -1
- data/test/test_helper.rb +1 -1
- data/test/twkorean.rb +13 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9fa5c2b3f783010b5b67c23db599ffd3ec75ef61
|
4
|
+
data.tar.gz: 2117abd053395906001c822082aef413c91441c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ed15b2886e42e367c0652e1a86aad07b98c09279b37e0cd02b1e733499076e69c9481573e91c51d01ae70ed4e3feec4d0610e8dd0bed059d5a536c9193ce776
|
7
|
+
data.tar.gz: 854b177257cf8252e1a81916a02ea721e01ef94ea3e3d4a4d02b02e65a9872bb30934d46c0b7c24a3cb8a8ae5fbca06dceb688496807a4d0f24a6c65ece08198
|
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# Twkorean
|
2
2
|
|
3
|
-
|
3
|
+
## Compatibility
|
4
|
+
|
5
|
+
Currently wraps [twitter-korean-text 3.0](https://github.com/twitter/twitter-korean-text/tree/korean-text-3.0) / 현재 이 프로젝트는 [twitter-korean-text 3.0](https://github.com/twitter/twitter-korean-text/tree/korean-text-3.0)을 사용중입니다.
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -18,7 +20,8 @@ Or install it yourself as:
|
|
18
20
|
|
19
21
|
## Required
|
20
22
|
|
21
|
-
$ export JAVA_HOME
|
23
|
+
$ export JAVA_HOME={Your Path}
|
24
|
+
$ gem install 'rjb'
|
22
25
|
|
23
26
|
## Test
|
24
27
|
|
@@ -26,8 +29,40 @@ Or install it yourself as:
|
|
26
29
|
|
27
30
|
## Usage
|
28
31
|
|
29
|
-
|
32
|
+
describe "Twkorean" do
|
33
|
+
TEXT = "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ #한국어"
|
34
|
+
before do
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
it "Normalize" do
|
39
|
+
twkorean = Twkorean::TwitterKoreanText.new
|
40
|
+
p "Normlize"
|
41
|
+
p twkorean.normalize(TEXT)
|
42
|
+
# 한국어를 처리하는 예시입니다ㅋㅋ #한국어
|
43
|
+
end
|
44
|
+
|
45
|
+
it "Tokenize" do
|
46
|
+
twkorean = Twkorean::TwitterKoreanText.new(true, false)
|
47
|
+
p "#Tokenize"
|
48
|
+
p twkorean.tokenize(TEXT)
|
49
|
+
# ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
|
50
|
+
end
|
51
|
+
|
52
|
+
it "Stemming" do
|
53
|
+
twkorean = Twkorean::TwitterKoreanText.new
|
54
|
+
p "#Stemming"
|
55
|
+
p twkorean.tokenize(TEXT)
|
56
|
+
# ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하다(Verb: 7, 2)", "예시(Noun: 10, 2)", "이다(Adjective: 12, 3)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
|
57
|
+
end
|
30
58
|
|
59
|
+
it "Phrase extraction" do
|
60
|
+
twkorean = Twkorean::TwitterKoreanText.new
|
61
|
+
p "Phrase extraction"
|
62
|
+
p twkorean.extract_phrases(TEXT)
|
63
|
+
# ["한국어(Noun: 0, 3)", "처리(Noun: 5, 2)", "처리하는 예시(Noun: 5, 7)", "예시(Noun: 10, 2)", "#한국어(Hashtag: 18, 4)"]
|
64
|
+
end
|
65
|
+
end
|
31
66
|
## Contributing
|
32
67
|
|
33
68
|
1. Fork it ( https://github.com/[my-github-username]/twkorean/fork )
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# @name twkorean-ruby
|
2
2
|
# @author JunSangPil
|
3
|
-
# @version 0.0.
|
3
|
+
# @version 0.0.3
|
4
4
|
# @url https://github.com/jun85664396/twkorean-ruby
|
5
5
|
# @license Apache License 2.0
|
6
6
|
module Twkorean
|
@@ -43,5 +43,9 @@ module Twkorean
|
|
43
43
|
phrases.toArray.map{|x| x.toString}
|
44
44
|
end
|
45
45
|
|
46
|
+
def parser(text)
|
47
|
+
text.match(/(.*)\(([a-zA-Z]*): ([0-9]+), ([0-9]+)\)/).to_a
|
48
|
+
end
|
49
|
+
|
46
50
|
end
|
47
51
|
end
|
data/lib/twkorean/version.rb
CHANGED
data/lib/twkorean.rb
CHANGED
data/test/test_helper.rb
CHANGED
data/test/twkorean.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# @name twkorean-ruby
|
2
2
|
# @author JunSangPil
|
3
|
-
# @version 0.0.
|
3
|
+
# @version 0.0.3
|
4
4
|
# @url https://github.com/jun85664396/twkorean-ruby
|
5
5
|
# @license Apache License 2.0
|
6
6
|
require_relative 'test_helper'
|
@@ -9,31 +9,42 @@ require 'twkorean'
|
|
9
9
|
describe "Twkorean" do
|
10
10
|
TEXT = "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ #한국어"
|
11
11
|
before do
|
12
|
-
|
12
|
+
|
13
13
|
end
|
14
14
|
|
15
15
|
it "Normalize" do
|
16
16
|
twkorean = Twkorean::TwitterKoreanText.new
|
17
17
|
p "Normlize"
|
18
18
|
p twkorean.normalize(TEXT)
|
19
|
+
# 한국어를 처리하는 예시입니다ㅋㅋ #한국어
|
19
20
|
end
|
20
21
|
|
21
22
|
it "Tokenize" do
|
22
23
|
twkorean = Twkorean::TwitterKoreanText.new(true, false)
|
23
24
|
p "#Tokenize"
|
24
25
|
p twkorean.tokenize(TEXT)
|
26
|
+
# ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
|
25
27
|
end
|
26
28
|
|
27
29
|
it "Stemming" do
|
28
30
|
twkorean = Twkorean::TwitterKoreanText.new
|
29
31
|
p "#Stemming"
|
30
32
|
p twkorean.tokenize(TEXT)
|
33
|
+
# ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하다(Verb: 7, 2)", "예시(Noun: 10, 2)", "이다(Adjective: 12, 3)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
|
31
34
|
end
|
32
35
|
|
33
36
|
it "Phrase extraction" do
|
34
37
|
twkorean = Twkorean::TwitterKoreanText.new
|
35
38
|
p "Phrase extraction"
|
36
39
|
p twkorean.extract_phrases(TEXT)
|
40
|
+
# ["한국어(Noun: 0, 3)", "처리(Noun: 5, 2)", "처리하는 예시(Noun: 5, 7)", "예시(Noun: 10, 2)", "#한국어(Hashtag: 18, 4)"]
|
41
|
+
end
|
42
|
+
|
43
|
+
it "Parser" do
|
44
|
+
twkorean = Twkorean::TwitterKoreanText.new(true, false)
|
45
|
+
p "#Tokenize Parser"
|
46
|
+
p twkorean.tokenize(TEXT).map{|x| twkorean.parser(x) }
|
47
|
+
# ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
|
37
48
|
end
|
38
49
|
|
39
50
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twkorean
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- JunSangPil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|