nabatheon 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4fa252b6b9ee408a134f7453076b1e17d2430a16
4
- data.tar.gz: 31c076ac8c39735b8c6450ce961aea4a9ad16420
3
+ metadata.gz: 35fc21082f3b4056a1995d300683c37459743a5b
4
+ data.tar.gz: 5333c759976de623a600bd335588bb10eba7aaa1
5
5
  SHA512:
6
- metadata.gz: 611dc73388d6adbc0054e17e70df524efceff6ae3b11436f4221cd87bc9fc4776e0634e51dace1b3124e379e062454090ea6e6f30324c2b2dd3f81ccd9b94678
7
- data.tar.gz: d2ece7d10fdb835c080af46f3c6750968eb37f1a7e98417dc49df1c13edced95a651efec16f3e768e5d601224183aeaaf926b46888b73a5a047e87a1173bfecd
6
+ metadata.gz: 86d96b766265bdb24a105d7c25491f4a39d5313bbd32385abc2d05f2b3e98ad284b189c30e9ce967950321aeb6ba03b1e575b07cac64c6ce61798217015d6ba1
7
+ data.tar.gz: 52e74491a71c41a991610bbd4ac737469b2974c8e24d308b63b9a37a6987f9c9670cc68793629343d88ae36d226e664a50e9fa37cf123f94d01daff98ecb11fa
@@ -1,4 +1,8 @@
1
1
  require 'nabatheon/version'
2
+ require 'nabatheon/entity'
3
+ require 'nabatheon/rule'
4
+ require 'nabatheon/search'
5
+
2
6
  require 'stanford-core-nlp'
3
7
 
4
8
  module Nabatheon
@@ -9,18 +13,22 @@ module Nabatheon
9
13
  StanfordCoreNLP.use :english
10
14
  Pipeline = StanfordCoreNLP.load(:tokenize, :ssplit, :pos, :lemma, :parse, :ner)
11
15
 
12
- def self.annotate(raw_text)
13
- text = StanfordCoreNLP::Annotation.new(raw_text)
14
- Pipeline.annotate(text)
16
+ def self.annotate(text)
17
+ annotated_text = StanfordCoreNLP::Annotation.new(text)
18
+ Pipeline.annotate(annotated_text)
15
19
 
16
- named_lemma = []
17
- text.get(:sentences).each do |sentence|
20
+ tagged = []
21
+ annotated_text.get(:sentences).each do |sentence|
18
22
  sentence.get(:tokens).each do |token|
19
23
  entity_tag = token.get(:named_entity_tag).to_s
20
24
  lemma = token.get(:lemma).to_s
21
- named_lemma << [entity_tag, lemma] unless entity_tag == 'O'
25
+ tagged << { named_entity: entity_tag, type: lemma }
22
26
  end
23
27
  end
24
- named_lemma
28
+ tagged
29
+ end
30
+
31
+ def self.relevant_searches_for(annotations)
32
+ Rule.apply_on(annotations)
25
33
  end
26
34
  end
@@ -0,0 +1,3 @@
1
+ module Nabatheon
2
+ Entity = Struct.new(:type, :value)
3
+ end
@@ -0,0 +1,34 @@
1
+ module Nabatheon
2
+ class Rule
3
+ PERSON = 'PERSON'
4
+ ORG = 'ORGANIZATION'
5
+
6
+ def self.apply_on(*args)
7
+ self.new.apply_on(*args)
8
+ end
9
+
10
+ def apply_on(named_entities)
11
+ searches = []
12
+ previous = Entity.new(nil, nil)
13
+ named_entities.map do |entity_arr|
14
+ entity = Entity.new(entity_arr[:named_entity], entity_arr[:type])
15
+ if rule_of_two?(entity.type, previous.type)
16
+ searches.last.value = "#{searches.last.value} #{entity.value}"
17
+ else
18
+ searches << Search.new(entity.type, entity.value)
19
+ end
20
+ previous = entity
21
+ end
22
+ searches
23
+ end
24
+
25
+ # * (2 persons || 2 orgs) in a row => #contact
26
+ def rule_of_two?(type, previous_type)
27
+ # FIXME: Does not return correct value if if the types are:
28
+ # type = PERSON && previous[:type] = ORG
29
+ # # => returns true
30
+ (type == PERSON || type == ORG) && \
31
+ (previous_type == PERSON || previous_type == ORG)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,10 @@
1
+ module Nabatheon
2
+ class Search
3
+ attr_accessor :type, :value
4
+
5
+ def initialize(type, value)
6
+ @type = type
7
+ @value = value
8
+ end
9
+ end
10
+ end
@@ -1,3 +1,3 @@
1
1
  module Nabatheon
2
- VERSION = '0.0.2'
2
+ VERSION = '0.0.3'
3
3
  end
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.version = Nabatheon::VERSION
9
9
  spec.authors = ['Jacob Burenstam']
10
10
  spec.email = ['burenstam@gmail.com']
11
- spec.summary = %q{Extract named entities and lemmas using StanfordCoreNLP.}
11
+ spec.summary = %q{Extract named entities and lemmas using StanfordCoreNLP (alpha).}
12
12
  spec.description = %q{Extract named entities and lemmas using StanfordCoreNLP (alpha). Assumes that all dependencies are in ~/.stanford-nlp}
13
13
  spec.homepage = 'https://github.com/buren/nabatheon'
14
14
  spec.license = 'MIT'
@@ -22,5 +22,4 @@ Gem::Specification.new do |spec|
22
22
 
23
23
  spec.add_development_dependency 'bundler', '~> 1.7'
24
24
  spec.add_development_dependency 'rake', '~> 10.0'
25
- spec.add_development_dependency 'pry'
26
25
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nabatheon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jacob Burenstam
@@ -52,20 +52,6 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '10.0'
55
- - !ruby/object:Gem::Dependency
56
- name: pry
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
55
  description: Extract named entities and lemmas using StanfordCoreNLP (alpha). Assumes
70
56
  that all dependencies are in ~/.stanford-nlp
71
57
  email:
@@ -82,6 +68,9 @@ files:
82
68
  - Rakefile
83
69
  - bin/nabatheon
84
70
  - lib/nabatheon.rb
71
+ - lib/nabatheon/entity.rb
72
+ - lib/nabatheon/rule.rb
73
+ - lib/nabatheon/search.rb
85
74
  - lib/nabatheon/version.rb
86
75
  - nabatheon.gemspec
87
76
  homepage: https://github.com/buren/nabatheon
@@ -107,6 +96,6 @@ rubyforge_project:
107
96
  rubygems_version: 2.2.2
108
97
  signing_key:
109
98
  specification_version: 4
110
- summary: Extract named entities and lemmas using StanfordCoreNLP.
99
+ summary: Extract named entities and lemmas using StanfordCoreNLP (alpha).
111
100
  test_files: []
112
101
  has_rdoc: