nabatheon 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/nabatheon.rb +15 -7
- data/lib/nabatheon/entity.rb +3 -0
- data/lib/nabatheon/rule.rb +34 -0
- data/lib/nabatheon/search.rb +10 -0
- data/lib/nabatheon/version.rb +1 -1
- data/nabatheon.gemspec +1 -2
- metadata +5 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35fc21082f3b4056a1995d300683c37459743a5b
|
4
|
+
data.tar.gz: 5333c759976de623a600bd335588bb10eba7aaa1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86d96b766265bdb24a105d7c25491f4a39d5313bbd32385abc2d05f2b3e98ad284b189c30e9ce967950321aeb6ba03b1e575b07cac64c6ce61798217015d6ba1
|
7
|
+
data.tar.gz: 52e74491a71c41a991610bbd4ac737469b2974c8e24d308b63b9a37a6987f9c9670cc68793629343d88ae36d226e664a50e9fa37cf123f94d01daff98ecb11fa
|
data/lib/nabatheon.rb
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
require 'nabatheon/version'
|
2
|
+
require 'nabatheon/entity'
|
3
|
+
require 'nabatheon/rule'
|
4
|
+
require 'nabatheon/search'
|
5
|
+
|
2
6
|
require 'stanford-core-nlp'
|
3
7
|
|
4
8
|
module Nabatheon
|
@@ -9,18 +13,22 @@ module Nabatheon
|
|
9
13
|
StanfordCoreNLP.use :english
|
10
14
|
Pipeline = StanfordCoreNLP.load(:tokenize, :ssplit, :pos, :lemma, :parse, :ner)
|
11
15
|
|
12
|
-
def self.annotate(
|
13
|
-
|
14
|
-
Pipeline.annotate(
|
16
|
+
def self.annotate(text)
|
17
|
+
annotated_text = StanfordCoreNLP::Annotation.new(text)
|
18
|
+
Pipeline.annotate(annotated_text)
|
15
19
|
|
16
|
-
|
17
|
-
|
20
|
+
tagged = []
|
21
|
+
annotated_text.get(:sentences).each do |sentence|
|
18
22
|
sentence.get(:tokens).each do |token|
|
19
23
|
entity_tag = token.get(:named_entity_tag).to_s
|
20
24
|
lemma = token.get(:lemma).to_s
|
21
|
-
|
25
|
+
tagged << { named_entity: entity_tag, type: lemma }
|
22
26
|
end
|
23
27
|
end
|
24
|
-
|
28
|
+
tagged
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.relevant_searches_for(annotations)
|
32
|
+
Rule.apply_on(annotations)
|
25
33
|
end
|
26
34
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Nabatheon
|
2
|
+
class Rule
|
3
|
+
PERSON = 'PERSON'
|
4
|
+
ORG = 'ORGANIZATION'
|
5
|
+
|
6
|
+
def self.apply_on(*args)
|
7
|
+
self.new.apply_on(*args)
|
8
|
+
end
|
9
|
+
|
10
|
+
def apply_on(named_entities)
|
11
|
+
searches = []
|
12
|
+
previous = Entity.new(nil, nil)
|
13
|
+
named_entities.map do |entity_arr|
|
14
|
+
entity = Entity.new(entity_arr[:named_entity], entity_arr[:type])
|
15
|
+
if rule_of_two?(entity.type, previous.type)
|
16
|
+
searches.last.value = "#{searches.last.value} #{entity.value}"
|
17
|
+
else
|
18
|
+
searches << Search.new(entity.type, entity.value)
|
19
|
+
end
|
20
|
+
previous = entity
|
21
|
+
end
|
22
|
+
searches
|
23
|
+
end
|
24
|
+
|
25
|
+
# * (2 persons || 2 orgs) in a row => #contact
|
26
|
+
def rule_of_two?(type, previous_type)
|
27
|
+
# FIXME: Does not return correct value if if the types are:
|
28
|
+
# type = PERSON && previous[:type] = ORG
|
29
|
+
# # => returns true
|
30
|
+
(type == PERSON || type == ORG) && \
|
31
|
+
(previous_type == PERSON || previous_type == ORG)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/nabatheon/version.rb
CHANGED
data/nabatheon.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.version = Nabatheon::VERSION
|
9
9
|
spec.authors = ['Jacob Burenstam']
|
10
10
|
spec.email = ['burenstam@gmail.com']
|
11
|
-
spec.summary = %q{Extract named entities and lemmas using StanfordCoreNLP.}
|
11
|
+
spec.summary = %q{Extract named entities and lemmas using StanfordCoreNLP (alpha).}
|
12
12
|
spec.description = %q{Extract named entities and lemmas using StanfordCoreNLP (alpha). Assumes that all dependencies are in ~/.stanford-nlp}
|
13
13
|
spec.homepage = 'https://github.com/buren/nabatheon'
|
14
14
|
spec.license = 'MIT'
|
@@ -22,5 +22,4 @@ Gem::Specification.new do |spec|
|
|
22
22
|
|
23
23
|
spec.add_development_dependency 'bundler', '~> 1.7'
|
24
24
|
spec.add_development_dependency 'rake', '~> 10.0'
|
25
|
-
spec.add_development_dependency 'pry'
|
26
25
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nabatheon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jacob Burenstam
|
@@ -52,20 +52,6 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '10.0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: pry
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
55
|
description: Extract named entities and lemmas using StanfordCoreNLP (alpha). Assumes
|
70
56
|
that all dependencies are in ~/.stanford-nlp
|
71
57
|
email:
|
@@ -82,6 +68,9 @@ files:
|
|
82
68
|
- Rakefile
|
83
69
|
- bin/nabatheon
|
84
70
|
- lib/nabatheon.rb
|
71
|
+
- lib/nabatheon/entity.rb
|
72
|
+
- lib/nabatheon/rule.rb
|
73
|
+
- lib/nabatheon/search.rb
|
85
74
|
- lib/nabatheon/version.rb
|
86
75
|
- nabatheon.gemspec
|
87
76
|
homepage: https://github.com/buren/nabatheon
|
@@ -107,6 +96,6 @@ rubyforge_project:
|
|
107
96
|
rubygems_version: 2.2.2
|
108
97
|
signing_key:
|
109
98
|
specification_version: 4
|
110
|
-
summary: Extract named entities and lemmas using StanfordCoreNLP.
|
99
|
+
summary: Extract named entities and lemmas using StanfordCoreNLP (alpha).
|
111
100
|
test_files: []
|
112
101
|
has_rdoc:
|