nabatheon 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4fa252b6b9ee408a134f7453076b1e17d2430a16
4
- data.tar.gz: 31c076ac8c39735b8c6450ce961aea4a9ad16420
3
+ metadata.gz: 35fc21082f3b4056a1995d300683c37459743a5b
4
+ data.tar.gz: 5333c759976de623a600bd335588bb10eba7aaa1
5
5
  SHA512:
6
- metadata.gz: 611dc73388d6adbc0054e17e70df524efceff6ae3b11436f4221cd87bc9fc4776e0634e51dace1b3124e379e062454090ea6e6f30324c2b2dd3f81ccd9b94678
7
- data.tar.gz: d2ece7d10fdb835c080af46f3c6750968eb37f1a7e98417dc49df1c13edced95a651efec16f3e768e5d601224183aeaaf926b46888b73a5a047e87a1173bfecd
6
+ metadata.gz: 86d96b766265bdb24a105d7c25491f4a39d5313bbd32385abc2d05f2b3e98ad284b189c30e9ce967950321aeb6ba03b1e575b07cac64c6ce61798217015d6ba1
7
+ data.tar.gz: 52e74491a71c41a991610bbd4ac737469b2974c8e24d308b63b9a37a6987f9c9670cc68793629343d88ae36d226e664a50e9fa37cf123f94d01daff98ecb11fa
@@ -1,4 +1,8 @@
1
1
  require 'nabatheon/version'
2
+ require 'nabatheon/entity'
3
+ require 'nabatheon/rule'
4
+ require 'nabatheon/search'
5
+
2
6
  require 'stanford-core-nlp'
3
7
 
4
8
  module Nabatheon
@@ -9,18 +13,22 @@ module Nabatheon
9
13
  StanfordCoreNLP.use :english
10
14
  Pipeline = StanfordCoreNLP.load(:tokenize, :ssplit, :pos, :lemma, :parse, :ner)
11
15
 
12
- def self.annotate(raw_text)
13
- text = StanfordCoreNLP::Annotation.new(raw_text)
14
- Pipeline.annotate(text)
16
+ def self.annotate(text)
17
+ annotated_text = StanfordCoreNLP::Annotation.new(text)
18
+ Pipeline.annotate(annotated_text)
15
19
 
16
- named_lemma = []
17
- text.get(:sentences).each do |sentence|
20
+ tagged = []
21
+ annotated_text.get(:sentences).each do |sentence|
18
22
  sentence.get(:tokens).each do |token|
19
23
  entity_tag = token.get(:named_entity_tag).to_s
20
24
  lemma = token.get(:lemma).to_s
21
- named_lemma << [entity_tag, lemma] unless entity_tag == 'O'
25
+ tagged << { named_entity: entity_tag, type: lemma }
22
26
  end
23
27
  end
24
- named_lemma
28
+ tagged
29
+ end
30
+
31
+ def self.relevant_searches_for(annotations)
32
+ Rule.apply_on(annotations)
25
33
  end
26
34
  end
@@ -0,0 +1,3 @@
1
+ module Nabatheon
2
+ Entity = Struct.new(:type, :value)
3
+ end
@@ -0,0 +1,34 @@
1
+ module Nabatheon
2
+ class Rule
3
+ PERSON = 'PERSON'
4
+ ORG = 'ORGANIZATION'
5
+
6
+ def self.apply_on(*args)
7
+ self.new.apply_on(*args)
8
+ end
9
+
10
+ def apply_on(named_entities)
11
+ searches = []
12
+ previous = Entity.new(nil, nil)
13
+ named_entities.map do |entity_arr|
14
+ entity = Entity.new(entity_arr[:named_entity], entity_arr[:type])
15
+ if rule_of_two?(entity.type, previous.type)
16
+ searches.last.value = "#{searches.last.value} #{entity.value}"
17
+ else
18
+ searches << Search.new(entity.type, entity.value)
19
+ end
20
+ previous = entity
21
+ end
22
+ searches
23
+ end
24
+
25
+ # * (2 persons || 2 orgs) in a row => #contact
26
+ def rule_of_two?(type, previous_type)
27
+ # FIXME: Does not return correct value if if the types are:
28
+ # type = PERSON && previous[:type] = ORG
29
+ # # => returns true
30
+ (type == PERSON || type == ORG) && \
31
+ (previous_type == PERSON || previous_type == ORG)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,10 @@
1
+ module Nabatheon
2
+ class Search
3
+ attr_accessor :type, :value
4
+
5
+ def initialize(type, value)
6
+ @type = type
7
+ @value = value
8
+ end
9
+ end
10
+ end
@@ -1,3 +1,3 @@
1
1
  module Nabatheon
2
- VERSION = '0.0.2'
2
+ VERSION = '0.0.3'
3
3
  end
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.version = Nabatheon::VERSION
9
9
  spec.authors = ['Jacob Burenstam']
10
10
  spec.email = ['burenstam@gmail.com']
11
- spec.summary = %q{Extract named entities and lemmas using StanfordCoreNLP.}
11
+ spec.summary = %q{Extract named entities and lemmas using StanfordCoreNLP (alpha).}
12
12
  spec.description = %q{Extract named entities and lemmas using StanfordCoreNLP (alpha). Assumes that all dependencies are in ~/.stanford-nlp}
13
13
  spec.homepage = 'https://github.com/buren/nabatheon'
14
14
  spec.license = 'MIT'
@@ -22,5 +22,4 @@ Gem::Specification.new do |spec|
22
22
 
23
23
  spec.add_development_dependency 'bundler', '~> 1.7'
24
24
  spec.add_development_dependency 'rake', '~> 10.0'
25
- spec.add_development_dependency 'pry'
26
25
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nabatheon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jacob Burenstam
@@ -52,20 +52,6 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '10.0'
55
- - !ruby/object:Gem::Dependency
56
- name: pry
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
55
  description: Extract named entities and lemmas using StanfordCoreNLP (alpha). Assumes
70
56
  that all dependencies are in ~/.stanford-nlp
71
57
  email:
@@ -82,6 +68,9 @@ files:
82
68
  - Rakefile
83
69
  - bin/nabatheon
84
70
  - lib/nabatheon.rb
71
+ - lib/nabatheon/entity.rb
72
+ - lib/nabatheon/rule.rb
73
+ - lib/nabatheon/search.rb
85
74
  - lib/nabatheon/version.rb
86
75
  - nabatheon.gemspec
87
76
  homepage: https://github.com/buren/nabatheon
@@ -107,6 +96,6 @@ rubyforge_project:
107
96
  rubygems_version: 2.2.2
108
97
  signing_key:
109
98
  specification_version: 4
110
- summary: Extract named entities and lemmas using StanfordCoreNLP.
99
+ summary: Extract named entities and lemmas using StanfordCoreNLP (alpha).
111
100
  test_files: []
112
101
  has_rdoc: