twkorean 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +38 -3
- data/lib/twkorean/twitter_korean_text.rb +5 -1
- data/lib/twkorean/version.rb +2 -2
- data/lib/twkorean.rb +1 -1
- data/test/test_helper.rb +1 -1
- data/test/twkorean.rb +13 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9fa5c2b3f783010b5b67c23db599ffd3ec75ef61
|
4
|
+
data.tar.gz: 2117abd053395906001c822082aef413c91441c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ed15b2886e42e367c0652e1a86aad07b98c09279b37e0cd02b1e733499076e69c9481573e91c51d01ae70ed4e3feec4d0610e8dd0bed059d5a536c9193ce776
|
7
|
+
data.tar.gz: 854b177257cf8252e1a81916a02ea721e01ef94ea3e3d4a4d02b02e65a9872bb30934d46c0b7c24a3cb8a8ae5fbca06dceb688496807a4d0f24a6c65ece08198
|
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# Twkorean
|
2
2
|
|
3
|
-
|
3
|
+
## Compatibility
|
4
|
+
|
5
|
+
Currently wraps [twitter-korean-text 3.0](https://github.com/twitter/twitter-korean-text/tree/korean-text-3.0) / 현재 이 프로젝트는 [twitter-korean-text 3.0](https://github.com/twitter/twitter-korean-text/tree/korean-text-3.0)을 사용중입니다.
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -18,7 +20,8 @@ Or install it yourself as:
|
|
18
20
|
|
19
21
|
## Required
|
20
22
|
|
21
|
-
$ export JAVA_HOME
|
23
|
+
$ export JAVA_HOME={Your Path}
|
24
|
+
$ gem install 'rjb'
|
22
25
|
|
23
26
|
## Test
|
24
27
|
|
@@ -26,8 +29,40 @@ Or install it yourself as:
|
|
26
29
|
|
27
30
|
## Usage
|
28
31
|
|
29
|
-
|
32
|
+
describe "Twkorean" do
|
33
|
+
TEXT = "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ #한국어"
|
34
|
+
before do
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
it "Normalize" do
|
39
|
+
twkorean = Twkorean::TwitterKoreanText.new
|
40
|
+
p "Normlize"
|
41
|
+
p twkorean.normalize(TEXT)
|
42
|
+
# 한국어를 처리하는 예시입니다ㅋㅋ #한국어
|
43
|
+
end
|
44
|
+
|
45
|
+
it "Tokenize" do
|
46
|
+
twkorean = Twkorean::TwitterKoreanText.new(true, false)
|
47
|
+
p "#Tokenize"
|
48
|
+
p twkorean.tokenize(TEXT)
|
49
|
+
# ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
|
50
|
+
end
|
51
|
+
|
52
|
+
it "Stemming" do
|
53
|
+
twkorean = Twkorean::TwitterKoreanText.new
|
54
|
+
p "#Stemming"
|
55
|
+
p twkorean.tokenize(TEXT)
|
56
|
+
# ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하다(Verb: 7, 2)", "예시(Noun: 10, 2)", "이다(Adjective: 12, 3)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
|
57
|
+
end
|
30
58
|
|
59
|
+
it "Phrase extraction" do
|
60
|
+
twkorean = Twkorean::TwitterKoreanText.new
|
61
|
+
p "Phrase extraction"
|
62
|
+
p twkorean.extract_phrases(TEXT)
|
63
|
+
# ["한국어(Noun: 0, 3)", "처리(Noun: 5, 2)", "처리하는 예시(Noun: 5, 7)", "예시(Noun: 10, 2)", "#한국어(Hashtag: 18, 4)"]
|
64
|
+
end
|
65
|
+
end
|
31
66
|
## Contributing
|
32
67
|
|
33
68
|
1. Fork it ( https://github.com/[my-github-username]/twkorean/fork )
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# @name twkorean-ruby
|
2
2
|
# @author JunSangPil
|
3
|
-
# @version 0.0.
|
3
|
+
# @version 0.0.3
|
4
4
|
# @url https://github.com/jun85664396/twkorean-ruby
|
5
5
|
# @license Apache License 2.0
|
6
6
|
module Twkorean
|
@@ -43,5 +43,9 @@ module Twkorean
|
|
43
43
|
phrases.toArray.map{|x| x.toString}
|
44
44
|
end
|
45
45
|
|
46
|
+
def parser(text)
|
47
|
+
text.match(/(.*)\(([a-zA-Z]*): ([0-9]+), ([0-9]+)\)/).to_a
|
48
|
+
end
|
49
|
+
|
46
50
|
end
|
47
51
|
end
|
data/lib/twkorean/version.rb
CHANGED
data/lib/twkorean.rb
CHANGED
data/test/test_helper.rb
CHANGED
data/test/twkorean.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# @name twkorean-ruby
|
2
2
|
# @author JunSangPil
|
3
|
-
# @version 0.0.
|
3
|
+
# @version 0.0.3
|
4
4
|
# @url https://github.com/jun85664396/twkorean-ruby
|
5
5
|
# @license Apache License 2.0
|
6
6
|
require_relative 'test_helper'
|
@@ -9,31 +9,42 @@ require 'twkorean'
|
|
9
9
|
describe "Twkorean" do
|
10
10
|
TEXT = "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ #한국어"
|
11
11
|
before do
|
12
|
-
|
12
|
+
|
13
13
|
end
|
14
14
|
|
15
15
|
it "Normalize" do
|
16
16
|
twkorean = Twkorean::TwitterKoreanText.new
|
17
17
|
p "Normlize"
|
18
18
|
p twkorean.normalize(TEXT)
|
19
|
+
# 한국어를 처리하는 예시입니다ㅋㅋ #한국어
|
19
20
|
end
|
20
21
|
|
21
22
|
it "Tokenize" do
|
22
23
|
twkorean = Twkorean::TwitterKoreanText.new(true, false)
|
23
24
|
p "#Tokenize"
|
24
25
|
p twkorean.tokenize(TEXT)
|
26
|
+
# ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
|
25
27
|
end
|
26
28
|
|
27
29
|
it "Stemming" do
|
28
30
|
twkorean = Twkorean::TwitterKoreanText.new
|
29
31
|
p "#Stemming"
|
30
32
|
p twkorean.tokenize(TEXT)
|
33
|
+
# ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하다(Verb: 7, 2)", "예시(Noun: 10, 2)", "이다(Adjective: 12, 3)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
|
31
34
|
end
|
32
35
|
|
33
36
|
it "Phrase extraction" do
|
34
37
|
twkorean = Twkorean::TwitterKoreanText.new
|
35
38
|
p "Phrase extraction"
|
36
39
|
p twkorean.extract_phrases(TEXT)
|
40
|
+
# ["한국어(Noun: 0, 3)", "처리(Noun: 5, 2)", "처리하는 예시(Noun: 5, 7)", "예시(Noun: 10, 2)", "#한국어(Hashtag: 18, 4)"]
|
41
|
+
end
|
42
|
+
|
43
|
+
it "Parser" do
|
44
|
+
twkorean = Twkorean::TwitterKoreanText.new(true, false)
|
45
|
+
p "#Tokenize Parser"
|
46
|
+
p twkorean.tokenize(TEXT).map{|x| twkorean.parser(x) }
|
47
|
+
# ["한국어(Noun: 0, 3)", "를(Josa: 3, 1)", "처리(Noun: 5, 2)", "하는(Verb: 7, 2)", "예시(Noun: 10, 2)", "입니(Adjective: 12, 2)", "다(Eomi: 14, 1)", "ㅋㅋ(KoreanParticle: 15, 2)", "#한국어(Hashtag: 18, 4)"]
|
37
48
|
end
|
38
49
|
|
39
50
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twkorean
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- JunSangPil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|