opener-opinion-detector-basic 3.2.2 → 3.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/opener/opinion_detector_basic.rb +13 -2
- data/lib/opener/opinion_detector_basic/base_processor.rb +56 -0
- data/lib/opener/opinion_detector_basic/kaf/document.rb +146 -0
- data/lib/opener/opinion_detector_basic/kaf/opinion.rb +152 -0
- data/lib/opener/opinion_detector_basic/kaf/term.rb +185 -0
- data/lib/opener/opinion_detector_basic/legacy_processor.rb +136 -0
- data/lib/opener/opinion_detector_basic/processor.rb +22 -310
- data/lib/opener/opinion_detector_basic/version.rb +1 -1
- data/opener-opinion-detector-basic.gemspec +3 -1
- metadata +37 -13
- data/lib/opener/opinion_detector_basic/opinion.rb +0 -170
- data/lib/opener/opinion_detector_basic/term.rb +0 -159
@@ -27,7 +27,9 @@ Gem::Specification.new do |gem|
|
|
27
27
|
gem.add_dependency 'opener-webservice', '~> 2.1'
|
28
28
|
gem.add_dependency 'opener-core', '~> 2.2'
|
29
29
|
|
30
|
-
gem.add_dependency '
|
30
|
+
gem.add_dependency 'activesupport'
|
31
|
+
gem.add_dependency 'nokogiri'
|
32
|
+
gem.add_dependency 'hashie'
|
31
33
|
|
32
34
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
33
35
|
gem.add_development_dependency 'cucumber'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-opinion-detector-basic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2.
|
4
|
+
version: 3.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -53,25 +53,47 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.2'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: activesupport
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
60
67
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: nokogiri
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
62
73
|
- - ">="
|
63
74
|
- !ruby/object:Gem::Version
|
64
|
-
version:
|
75
|
+
version: '0'
|
65
76
|
type: :runtime
|
66
77
|
prerelease: false
|
67
78
|
version_requirements: !ruby/object:Gem::Requirement
|
68
79
|
requirements:
|
69
|
-
- - "
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: hashie
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
70
88
|
- !ruby/object:Gem::Version
|
71
|
-
version: '
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
72
94
|
- - ">="
|
73
95
|
- !ruby/object:Gem::Version
|
74
|
-
version:
|
96
|
+
version: '0'
|
75
97
|
- !ruby/object:Gem::Dependency
|
76
98
|
name: rspec
|
77
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,12 +167,15 @@ files:
|
|
145
167
|
- config.ru
|
146
168
|
- exec/opinion-detector-basic.rb
|
147
169
|
- lib/opener/opinion_detector_basic.rb
|
170
|
+
- lib/opener/opinion_detector_basic/base_processor.rb
|
148
171
|
- lib/opener/opinion_detector_basic/cli.rb
|
149
|
-
- lib/opener/opinion_detector_basic/
|
172
|
+
- lib/opener/opinion_detector_basic/kaf/document.rb
|
173
|
+
- lib/opener/opinion_detector_basic/kaf/opinion.rb
|
174
|
+
- lib/opener/opinion_detector_basic/kaf/term.rb
|
175
|
+
- lib/opener/opinion_detector_basic/legacy_processor.rb
|
150
176
|
- lib/opener/opinion_detector_basic/processor.rb
|
151
177
|
- lib/opener/opinion_detector_basic/public/markdown.css
|
152
178
|
- lib/opener/opinion_detector_basic/server.rb
|
153
|
-
- lib/opener/opinion_detector_basic/term.rb
|
154
179
|
- lib/opener/opinion_detector_basic/version.rb
|
155
180
|
- lib/opener/opinion_detector_basic/views/index.erb
|
156
181
|
- opener-opinion-detector-basic.gemspec
|
@@ -174,8 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
174
199
|
- !ruby/object:Gem::Version
|
175
200
|
version: '0'
|
176
201
|
requirements: []
|
177
|
-
|
178
|
-
rubygems_version: 2.7.6
|
202
|
+
rubygems_version: 3.2.14
|
179
203
|
signing_key:
|
180
204
|
specification_version: 4
|
181
205
|
summary: Basic Opinion Detector.
|
@@ -1,170 +0,0 @@
|
|
1
|
-
module Opener
|
2
|
-
class OpinionDetectorBasic
|
3
|
-
class Opinion
|
4
|
-
attr_reader :term
|
5
|
-
attr_accessor :left_candidates, :right_candidates, :target_ids, :holders
|
6
|
-
|
7
|
-
# Opinion holders for each language code.
|
8
|
-
OPINION_HOLDERS = {
|
9
|
-
'nl' => [
|
10
|
-
'ik','we','wij','ze','zij','jullie','u','hij','het','jij','je','mij',
|
11
|
-
'me','hem','haar','ons','hen','hun'
|
12
|
-
],
|
13
|
-
'en' => ['i','we','he','she','they','it','you'],
|
14
|
-
'es' => [
|
15
|
-
'yo','tu','nosotros','vosotros','ellos','ellas','nosotras','vosotras'
|
16
|
-
],
|
17
|
-
'it' => ['io','tu','noi','voi','loro','lei','lui'],
|
18
|
-
'de' => ['ich','du','wir','ihr','sie','er'],
|
19
|
-
'fr' => ['je','tu','lui','elle','nous','vous','ils','elles']
|
20
|
-
}
|
21
|
-
|
22
|
-
def initialize(term)
|
23
|
-
@term = term
|
24
|
-
@left_candidates = []
|
25
|
-
@right_candidates = []
|
26
|
-
@holders = []
|
27
|
-
@target_ids = []
|
28
|
-
end
|
29
|
-
|
30
|
-
##
|
31
|
-
# Returns the term ids of the opinion expression.
|
32
|
-
#
|
33
|
-
# @return [Array]
|
34
|
-
#
|
35
|
-
def ids
|
36
|
-
@ids ||= term.list_ids.sort
|
37
|
-
end
|
38
|
-
|
39
|
-
##
|
40
|
-
# Returns the sentence id of the opinion.
|
41
|
-
#
|
42
|
-
# @return [String]
|
43
|
-
#
|
44
|
-
def sentence
|
45
|
-
@sentence ||= term.sentence
|
46
|
-
end
|
47
|
-
|
48
|
-
##
|
49
|
-
# Returns the strength of the opinion.
|
50
|
-
#
|
51
|
-
# @return [Integer]
|
52
|
-
#
|
53
|
-
def strength
|
54
|
-
@strength ||= term.accumulated_strength
|
55
|
-
end
|
56
|
-
|
57
|
-
##
|
58
|
-
# Returns the polarity of the opinion.
|
59
|
-
#
|
60
|
-
# @return [String]
|
61
|
-
#
|
62
|
-
def polarity
|
63
|
-
@polarity ||= if strength > 0
|
64
|
-
"positive"
|
65
|
-
elsif strength < 0
|
66
|
-
"negative"
|
67
|
-
else
|
68
|
-
"neutral"
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
##
|
73
|
-
# Obtain the opinion holders from the terms that belong to the same
|
74
|
-
# sentence.
|
75
|
-
#
|
76
|
-
def obtain_holders(sentences, language)
|
77
|
-
sentence_terms = sentences[sentence]
|
78
|
-
sentence_terms.each do |term|
|
79
|
-
if OPINION_HOLDERS[language]&.include?(term.lemma)
|
80
|
-
@holders << term.id
|
81
|
-
break
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
##
|
87
|
-
# Get the potential right and left candidates of the sentence and
|
88
|
-
# decide which ones are the actual targets of the opinion
|
89
|
-
#
|
90
|
-
def obtain_targets(sentences)
|
91
|
-
sentence_terms = sentences[sentence]
|
92
|
-
max_distance = 3
|
93
|
-
terms_count = sentence_terms.count
|
94
|
-
|
95
|
-
index = -1
|
96
|
-
sentence_terms.each_with_index do |term, i|
|
97
|
-
if ids.include?(term.id)
|
98
|
-
index = i
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
unless index+1 >= terms_count
|
103
|
-
min = index+1
|
104
|
-
max = [index+1+max_distance,terms_count].min
|
105
|
-
@right_candidates = filter_candidates(sentence_terms[min..max])
|
106
|
-
end
|
107
|
-
|
108
|
-
index = 0
|
109
|
-
sentence_terms.each_with_index do |term, i|
|
110
|
-
if ids.include?(term.id)
|
111
|
-
index = i
|
112
|
-
break # needed for left_candidates
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
unless index == 0
|
117
|
-
min = [0, index-1-max_distance].max
|
118
|
-
max = index
|
119
|
-
@left_candidates = filter_candidates(sentence_terms[min..max])
|
120
|
-
end
|
121
|
-
|
122
|
-
unless right_candidates.empty?
|
123
|
-
candidate = right_candidates.first
|
124
|
-
@target_ids << candidate.id
|
125
|
-
end
|
126
|
-
|
127
|
-
if target_ids.empty?
|
128
|
-
list = mix_lists(right_candidates, left_candidates)
|
129
|
-
list.each do |l|
|
130
|
-
@target_ids << l.id
|
131
|
-
break
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
protected
|
137
|
-
|
138
|
-
##
|
139
|
-
# If there are no opinion targets, right and left candidates
|
140
|
-
# are mixed into one list and the first one is picked as the target.
|
141
|
-
#
|
142
|
-
# @return [Array]
|
143
|
-
#
|
144
|
-
def mix_lists(lista, listb)
|
145
|
-
list = []
|
146
|
-
min = [lista.count, listb.count].min
|
147
|
-
(0..min).each do |i|
|
148
|
-
list << lista[i]
|
149
|
-
list << listb[i]
|
150
|
-
if lista.count > listb.count
|
151
|
-
list << lista[min]
|
152
|
-
elsif listb.count > lista.count
|
153
|
-
list << listb[min]
|
154
|
-
end
|
155
|
-
end
|
156
|
-
return list.compact
|
157
|
-
end
|
158
|
-
|
159
|
-
##
|
160
|
-
# Filters candidate terms depending on their part of speech and if
|
161
|
-
# they are already part of the expression.
|
162
|
-
#
|
163
|
-
# @return [Hash]
|
164
|
-
#
|
165
|
-
def filter_candidates(sentence_terms)
|
166
|
-
sentence_terms.select{|t| (t.pos == "N" || t.pos == "R") && !ids.include?(t.id)}
|
167
|
-
end
|
168
|
-
end # Opinion
|
169
|
-
end # OpinionDetectorBasic
|
170
|
-
end # Opener
|
@@ -1,159 +0,0 @@
|
|
1
|
-
module Opener
|
2
|
-
class OpinionDetectorBasic
|
3
|
-
class Term
|
4
|
-
attr_reader :node, :sentence, :is_conjunction
|
5
|
-
attr_accessor :use, :accumulated_strength, :list_ids
|
6
|
-
|
7
|
-
# Map of conjunctions per language code
|
8
|
-
CONJUNCTIONS = {
|
9
|
-
'nl' => %w{, en},
|
10
|
-
'en' => %w{, and},
|
11
|
-
'es' => %w{, y e},
|
12
|
-
'it' => %w{, e ed},
|
13
|
-
'de' => %w{, und},
|
14
|
-
'fr' => %w{, et}
|
15
|
-
}
|
16
|
-
|
17
|
-
def initialize(node, document, language)
|
18
|
-
@node = node
|
19
|
-
@sentence = get_sentence(document)
|
20
|
-
@use = true
|
21
|
-
@accumulated_strength = strength
|
22
|
-
@list_ids = [id]
|
23
|
-
@is_conjunction = is_conjunction?(language)
|
24
|
-
end
|
25
|
-
|
26
|
-
##
|
27
|
-
# Returns the term id.
|
28
|
-
#
|
29
|
-
# @return [String]
|
30
|
-
#
|
31
|
-
def id
|
32
|
-
@id ||= node.get('tid')
|
33
|
-
end
|
34
|
-
|
35
|
-
##
|
36
|
-
# Returns the lemma of the term.
|
37
|
-
#
|
38
|
-
# @return [String]
|
39
|
-
#
|
40
|
-
def lemma
|
41
|
-
@lemma ||= node.get('lemma')
|
42
|
-
end
|
43
|
-
|
44
|
-
##
|
45
|
-
# Returns the part of speech of the term.
|
46
|
-
#
|
47
|
-
# @return [String]
|
48
|
-
#
|
49
|
-
def pos
|
50
|
-
@pos ||= node.get('pos')
|
51
|
-
end
|
52
|
-
|
53
|
-
##
|
54
|
-
# Returns the sentiment modifier type if it exists.
|
55
|
-
#
|
56
|
-
# @return [String|NilClass]
|
57
|
-
#
|
58
|
-
def sentiment_modifier
|
59
|
-
@sentiment_modifier ||=
|
60
|
-
first_sentiment ? first_sentiment.get('sentiment_modifier') : nil
|
61
|
-
end
|
62
|
-
|
63
|
-
##
|
64
|
-
# Returns the polarity of the term if it exists.
|
65
|
-
#
|
66
|
-
# @return [String|NilClass]
|
67
|
-
#
|
68
|
-
def polarity
|
69
|
-
@polarity ||= first_sentiment ? first_sentiment.get('polarity') : nil
|
70
|
-
end
|
71
|
-
|
72
|
-
##
|
73
|
-
# Returns the actual word ids that construct the lemma.
|
74
|
-
#
|
75
|
-
# @return [Array]
|
76
|
-
#
|
77
|
-
def target_ids
|
78
|
-
@target_ids ||= node.xpath('span/target')
|
79
|
-
.map { |target| target.get('id') }
|
80
|
-
end
|
81
|
-
|
82
|
-
##
|
83
|
-
# Returns the strength of the term depending on its type.
|
84
|
-
#
|
85
|
-
# @return [Integer]
|
86
|
-
#
|
87
|
-
def strength
|
88
|
-
if polarity == "positive"
|
89
|
-
return 1
|
90
|
-
elsif polarity == "negative"
|
91
|
-
return -1
|
92
|
-
end
|
93
|
-
|
94
|
-
if is_intensifier?
|
95
|
-
return 2
|
96
|
-
elsif is_shifter?
|
97
|
-
return -1
|
98
|
-
end
|
99
|
-
|
100
|
-
return 0
|
101
|
-
end
|
102
|
-
|
103
|
-
##
|
104
|
-
# Returns the sentence id that the term belongs to in the document.
|
105
|
-
#
|
106
|
-
# @return [String]
|
107
|
-
#
|
108
|
-
def get_sentence(document)
|
109
|
-
document
|
110
|
-
.xpath("KAF/text/wf[@wid='#{target_ids.first}']")
|
111
|
-
.first
|
112
|
-
.get('sent')
|
113
|
-
end
|
114
|
-
|
115
|
-
##
|
116
|
-
# Checks if a term is an intensifier.
|
117
|
-
#
|
118
|
-
# @return [TrueClass|FalseClass]
|
119
|
-
#
|
120
|
-
def is_intensifier?
|
121
|
-
sentiment_modifier == "intensifier"
|
122
|
-
end
|
123
|
-
|
124
|
-
##
|
125
|
-
# Checks if a term is a shifter.
|
126
|
-
#
|
127
|
-
# @return [TrueClass|FalseClass]
|
128
|
-
#
|
129
|
-
def is_shifter?
|
130
|
-
sentiment_modifier == "shifter"
|
131
|
-
end
|
132
|
-
|
133
|
-
##
|
134
|
-
# Checks if a term is an expression.
|
135
|
-
#
|
136
|
-
# @return [TrueClass|FalseClass]
|
137
|
-
#
|
138
|
-
def is_expression?
|
139
|
-
use && !!polarity
|
140
|
-
end
|
141
|
-
|
142
|
-
##
|
143
|
-
# Checks if a term is a conjunction.
|
144
|
-
#
|
145
|
-
# @return [TrueClass|FalseClass]
|
146
|
-
#
|
147
|
-
def is_conjunction?(language)
|
148
|
-
pos == 'J' || CONJUNCTIONS[language]&.include?(lemma)
|
149
|
-
end
|
150
|
-
|
151
|
-
private
|
152
|
-
|
153
|
-
# @return [Oga::XML::Element]
|
154
|
-
def first_sentiment
|
155
|
-
@first_sentiment ||= node.xpath('sentiment').first
|
156
|
-
end
|
157
|
-
end # Term
|
158
|
-
end # OpinionDetectorBasic
|
159
|
-
end # Opener
|