opener-opinion-detector-basic 3.2.2 → 3.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/opener/opinion_detector_basic.rb +13 -2
- data/lib/opener/opinion_detector_basic/base_processor.rb +56 -0
- data/lib/opener/opinion_detector_basic/kaf/document.rb +146 -0
- data/lib/opener/opinion_detector_basic/kaf/opinion.rb +152 -0
- data/lib/opener/opinion_detector_basic/kaf/term.rb +185 -0
- data/lib/opener/opinion_detector_basic/legacy_processor.rb +136 -0
- data/lib/opener/opinion_detector_basic/processor.rb +22 -310
- data/lib/opener/opinion_detector_basic/version.rb +1 -1
- data/opener-opinion-detector-basic.gemspec +3 -1
- metadata +37 -13
- data/lib/opener/opinion_detector_basic/opinion.rb +0 -170
- data/lib/opener/opinion_detector_basic/term.rb +0 -159
@@ -27,7 +27,9 @@ Gem::Specification.new do |gem|
|
|
27
27
|
gem.add_dependency 'opener-webservice', '~> 2.1'
|
28
28
|
gem.add_dependency 'opener-core', '~> 2.2'
|
29
29
|
|
30
|
-
gem.add_dependency '
|
30
|
+
gem.add_dependency 'activesupport'
|
31
|
+
gem.add_dependency 'nokogiri'
|
32
|
+
gem.add_dependency 'hashie'
|
31
33
|
|
32
34
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
33
35
|
gem.add_development_dependency 'cucumber'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-opinion-detector-basic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2.
|
4
|
+
version: 3.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -53,25 +53,47 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.2'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: activesupport
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
60
67
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: nokogiri
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
62
73
|
- - ">="
|
63
74
|
- !ruby/object:Gem::Version
|
64
|
-
version:
|
75
|
+
version: '0'
|
65
76
|
type: :runtime
|
66
77
|
prerelease: false
|
67
78
|
version_requirements: !ruby/object:Gem::Requirement
|
68
79
|
requirements:
|
69
|
-
- - "
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: hashie
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
70
88
|
- !ruby/object:Gem::Version
|
71
|
-
version: '
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
72
94
|
- - ">="
|
73
95
|
- !ruby/object:Gem::Version
|
74
|
-
version:
|
96
|
+
version: '0'
|
75
97
|
- !ruby/object:Gem::Dependency
|
76
98
|
name: rspec
|
77
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,12 +167,15 @@ files:
|
|
145
167
|
- config.ru
|
146
168
|
- exec/opinion-detector-basic.rb
|
147
169
|
- lib/opener/opinion_detector_basic.rb
|
170
|
+
- lib/opener/opinion_detector_basic/base_processor.rb
|
148
171
|
- lib/opener/opinion_detector_basic/cli.rb
|
149
|
-
- lib/opener/opinion_detector_basic/
|
172
|
+
- lib/opener/opinion_detector_basic/kaf/document.rb
|
173
|
+
- lib/opener/opinion_detector_basic/kaf/opinion.rb
|
174
|
+
- lib/opener/opinion_detector_basic/kaf/term.rb
|
175
|
+
- lib/opener/opinion_detector_basic/legacy_processor.rb
|
150
176
|
- lib/opener/opinion_detector_basic/processor.rb
|
151
177
|
- lib/opener/opinion_detector_basic/public/markdown.css
|
152
178
|
- lib/opener/opinion_detector_basic/server.rb
|
153
|
-
- lib/opener/opinion_detector_basic/term.rb
|
154
179
|
- lib/opener/opinion_detector_basic/version.rb
|
155
180
|
- lib/opener/opinion_detector_basic/views/index.erb
|
156
181
|
- opener-opinion-detector-basic.gemspec
|
@@ -174,8 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
174
199
|
- !ruby/object:Gem::Version
|
175
200
|
version: '0'
|
176
201
|
requirements: []
|
177
|
-
|
178
|
-
rubygems_version: 2.7.6
|
202
|
+
rubygems_version: 3.2.14
|
179
203
|
signing_key:
|
180
204
|
specification_version: 4
|
181
205
|
summary: Basic Opinion Detector.
|
@@ -1,170 +0,0 @@
|
|
1
|
-
module Opener
|
2
|
-
class OpinionDetectorBasic
|
3
|
-
class Opinion
|
4
|
-
attr_reader :term
|
5
|
-
attr_accessor :left_candidates, :right_candidates, :target_ids, :holders
|
6
|
-
|
7
|
-
# Opinion holders for each language code.
|
8
|
-
OPINION_HOLDERS = {
|
9
|
-
'nl' => [
|
10
|
-
'ik','we','wij','ze','zij','jullie','u','hij','het','jij','je','mij',
|
11
|
-
'me','hem','haar','ons','hen','hun'
|
12
|
-
],
|
13
|
-
'en' => ['i','we','he','she','they','it','you'],
|
14
|
-
'es' => [
|
15
|
-
'yo','tu','nosotros','vosotros','ellos','ellas','nosotras','vosotras'
|
16
|
-
],
|
17
|
-
'it' => ['io','tu','noi','voi','loro','lei','lui'],
|
18
|
-
'de' => ['ich','du','wir','ihr','sie','er'],
|
19
|
-
'fr' => ['je','tu','lui','elle','nous','vous','ils','elles']
|
20
|
-
}
|
21
|
-
|
22
|
-
def initialize(term)
|
23
|
-
@term = term
|
24
|
-
@left_candidates = []
|
25
|
-
@right_candidates = []
|
26
|
-
@holders = []
|
27
|
-
@target_ids = []
|
28
|
-
end
|
29
|
-
|
30
|
-
##
|
31
|
-
# Returns the term ids of the opinion expression.
|
32
|
-
#
|
33
|
-
# @return [Array]
|
34
|
-
#
|
35
|
-
def ids
|
36
|
-
@ids ||= term.list_ids.sort
|
37
|
-
end
|
38
|
-
|
39
|
-
##
|
40
|
-
# Returns the sentence id of the opinion.
|
41
|
-
#
|
42
|
-
# @return [String]
|
43
|
-
#
|
44
|
-
def sentence
|
45
|
-
@sentence ||= term.sentence
|
46
|
-
end
|
47
|
-
|
48
|
-
##
|
49
|
-
# Returns the strength of the opinion.
|
50
|
-
#
|
51
|
-
# @return [Integer]
|
52
|
-
#
|
53
|
-
def strength
|
54
|
-
@strength ||= term.accumulated_strength
|
55
|
-
end
|
56
|
-
|
57
|
-
##
|
58
|
-
# Returns the polarity of the opinion.
|
59
|
-
#
|
60
|
-
# @return [String]
|
61
|
-
#
|
62
|
-
def polarity
|
63
|
-
@polarity ||= if strength > 0
|
64
|
-
"positive"
|
65
|
-
elsif strength < 0
|
66
|
-
"negative"
|
67
|
-
else
|
68
|
-
"neutral"
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
##
|
73
|
-
# Obtain the opinion holders from the terms that belong to the same
|
74
|
-
# sentence.
|
75
|
-
#
|
76
|
-
def obtain_holders(sentences, language)
|
77
|
-
sentence_terms = sentences[sentence]
|
78
|
-
sentence_terms.each do |term|
|
79
|
-
if OPINION_HOLDERS[language]&.include?(term.lemma)
|
80
|
-
@holders << term.id
|
81
|
-
break
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
##
|
87
|
-
# Get the potential right and left candidates of the sentence and
|
88
|
-
# decide which ones are the actual targets of the opinion
|
89
|
-
#
|
90
|
-
def obtain_targets(sentences)
|
91
|
-
sentence_terms = sentences[sentence]
|
92
|
-
max_distance = 3
|
93
|
-
terms_count = sentence_terms.count
|
94
|
-
|
95
|
-
index = -1
|
96
|
-
sentence_terms.each_with_index do |term, i|
|
97
|
-
if ids.include?(term.id)
|
98
|
-
index = i
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
unless index+1 >= terms_count
|
103
|
-
min = index+1
|
104
|
-
max = [index+1+max_distance,terms_count].min
|
105
|
-
@right_candidates = filter_candidates(sentence_terms[min..max])
|
106
|
-
end
|
107
|
-
|
108
|
-
index = 0
|
109
|
-
sentence_terms.each_with_index do |term, i|
|
110
|
-
if ids.include?(term.id)
|
111
|
-
index = i
|
112
|
-
break # needed for left_candidates
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
unless index == 0
|
117
|
-
min = [0, index-1-max_distance].max
|
118
|
-
max = index
|
119
|
-
@left_candidates = filter_candidates(sentence_terms[min..max])
|
120
|
-
end
|
121
|
-
|
122
|
-
unless right_candidates.empty?
|
123
|
-
candidate = right_candidates.first
|
124
|
-
@target_ids << candidate.id
|
125
|
-
end
|
126
|
-
|
127
|
-
if target_ids.empty?
|
128
|
-
list = mix_lists(right_candidates, left_candidates)
|
129
|
-
list.each do |l|
|
130
|
-
@target_ids << l.id
|
131
|
-
break
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
protected
|
137
|
-
|
138
|
-
##
|
139
|
-
# If there are no opinion targets, right and left candidates
|
140
|
-
# are mixed into one list and the first one is picked as the target.
|
141
|
-
#
|
142
|
-
# @return [Array]
|
143
|
-
#
|
144
|
-
def mix_lists(lista, listb)
|
145
|
-
list = []
|
146
|
-
min = [lista.count, listb.count].min
|
147
|
-
(0..min).each do |i|
|
148
|
-
list << lista[i]
|
149
|
-
list << listb[i]
|
150
|
-
if lista.count > listb.count
|
151
|
-
list << lista[min]
|
152
|
-
elsif listb.count > lista.count
|
153
|
-
list << listb[min]
|
154
|
-
end
|
155
|
-
end
|
156
|
-
return list.compact
|
157
|
-
end
|
158
|
-
|
159
|
-
##
|
160
|
-
# Filters candidate terms depending on their part of speech and if
|
161
|
-
# they are already part of the expression.
|
162
|
-
#
|
163
|
-
# @return [Hash]
|
164
|
-
#
|
165
|
-
def filter_candidates(sentence_terms)
|
166
|
-
sentence_terms.select{|t| (t.pos == "N" || t.pos == "R") && !ids.include?(t.id)}
|
167
|
-
end
|
168
|
-
end # Opinion
|
169
|
-
end # OpinionDetectorBasic
|
170
|
-
end # Opener
|
@@ -1,159 +0,0 @@
|
|
1
|
-
module Opener
|
2
|
-
class OpinionDetectorBasic
|
3
|
-
class Term
|
4
|
-
attr_reader :node, :sentence, :is_conjunction
|
5
|
-
attr_accessor :use, :accumulated_strength, :list_ids
|
6
|
-
|
7
|
-
# Map of conjunctions per language code
|
8
|
-
CONJUNCTIONS = {
|
9
|
-
'nl' => %w{, en},
|
10
|
-
'en' => %w{, and},
|
11
|
-
'es' => %w{, y e},
|
12
|
-
'it' => %w{, e ed},
|
13
|
-
'de' => %w{, und},
|
14
|
-
'fr' => %w{, et}
|
15
|
-
}
|
16
|
-
|
17
|
-
def initialize(node, document, language)
|
18
|
-
@node = node
|
19
|
-
@sentence = get_sentence(document)
|
20
|
-
@use = true
|
21
|
-
@accumulated_strength = strength
|
22
|
-
@list_ids = [id]
|
23
|
-
@is_conjunction = is_conjunction?(language)
|
24
|
-
end
|
25
|
-
|
26
|
-
##
|
27
|
-
# Returns the term id.
|
28
|
-
#
|
29
|
-
# @return [String]
|
30
|
-
#
|
31
|
-
def id
|
32
|
-
@id ||= node.get('tid')
|
33
|
-
end
|
34
|
-
|
35
|
-
##
|
36
|
-
# Returns the lemma of the term.
|
37
|
-
#
|
38
|
-
# @return [String]
|
39
|
-
#
|
40
|
-
def lemma
|
41
|
-
@lemma ||= node.get('lemma')
|
42
|
-
end
|
43
|
-
|
44
|
-
##
|
45
|
-
# Returns the part of speech of the term.
|
46
|
-
#
|
47
|
-
# @return [String]
|
48
|
-
#
|
49
|
-
def pos
|
50
|
-
@pos ||= node.get('pos')
|
51
|
-
end
|
52
|
-
|
53
|
-
##
|
54
|
-
# Returns the sentiment modifier type if it exists.
|
55
|
-
#
|
56
|
-
# @return [String|NilClass]
|
57
|
-
#
|
58
|
-
def sentiment_modifier
|
59
|
-
@sentiment_modifier ||=
|
60
|
-
first_sentiment ? first_sentiment.get('sentiment_modifier') : nil
|
61
|
-
end
|
62
|
-
|
63
|
-
##
|
64
|
-
# Returns the polarity of the term if it exists.
|
65
|
-
#
|
66
|
-
# @return [String|NilClass]
|
67
|
-
#
|
68
|
-
def polarity
|
69
|
-
@polarity ||= first_sentiment ? first_sentiment.get('polarity') : nil
|
70
|
-
end
|
71
|
-
|
72
|
-
##
|
73
|
-
# Returns the actual word ids that construct the lemma.
|
74
|
-
#
|
75
|
-
# @return [Array]
|
76
|
-
#
|
77
|
-
def target_ids
|
78
|
-
@target_ids ||= node.xpath('span/target')
|
79
|
-
.map { |target| target.get('id') }
|
80
|
-
end
|
81
|
-
|
82
|
-
##
|
83
|
-
# Returns the strength of the term depending on its type.
|
84
|
-
#
|
85
|
-
# @return [Integer]
|
86
|
-
#
|
87
|
-
def strength
|
88
|
-
if polarity == "positive"
|
89
|
-
return 1
|
90
|
-
elsif polarity == "negative"
|
91
|
-
return -1
|
92
|
-
end
|
93
|
-
|
94
|
-
if is_intensifier?
|
95
|
-
return 2
|
96
|
-
elsif is_shifter?
|
97
|
-
return -1
|
98
|
-
end
|
99
|
-
|
100
|
-
return 0
|
101
|
-
end
|
102
|
-
|
103
|
-
##
|
104
|
-
# Returns the sentence id that the term belongs to in the document.
|
105
|
-
#
|
106
|
-
# @return [String]
|
107
|
-
#
|
108
|
-
def get_sentence(document)
|
109
|
-
document
|
110
|
-
.xpath("KAF/text/wf[@wid='#{target_ids.first}']")
|
111
|
-
.first
|
112
|
-
.get('sent')
|
113
|
-
end
|
114
|
-
|
115
|
-
##
|
116
|
-
# Checks if a term is an intensifier.
|
117
|
-
#
|
118
|
-
# @return [TrueClass|FalseClass]
|
119
|
-
#
|
120
|
-
def is_intensifier?
|
121
|
-
sentiment_modifier == "intensifier"
|
122
|
-
end
|
123
|
-
|
124
|
-
##
|
125
|
-
# Checks if a term is a shifter.
|
126
|
-
#
|
127
|
-
# @return [TrueClass|FalseClass]
|
128
|
-
#
|
129
|
-
def is_shifter?
|
130
|
-
sentiment_modifier == "shifter"
|
131
|
-
end
|
132
|
-
|
133
|
-
##
|
134
|
-
# Checks if a term is an expression.
|
135
|
-
#
|
136
|
-
# @return [TrueClass|FalseClass]
|
137
|
-
#
|
138
|
-
def is_expression?
|
139
|
-
use && !!polarity
|
140
|
-
end
|
141
|
-
|
142
|
-
##
|
143
|
-
# Checks if a term is a conjunction.
|
144
|
-
#
|
145
|
-
# @return [TrueClass|FalseClass]
|
146
|
-
#
|
147
|
-
def is_conjunction?(language)
|
148
|
-
pos == 'J' || CONJUNCTIONS[language]&.include?(lemma)
|
149
|
-
end
|
150
|
-
|
151
|
-
private
|
152
|
-
|
153
|
-
# @return [Oga::XML::Element]
|
154
|
-
def first_sentiment
|
155
|
-
@first_sentiment ||= node.xpath('sentiment').first
|
156
|
-
end
|
157
|
-
end # Term
|
158
|
-
end # OpinionDetectorBasic
|
159
|
-
end # Opener
|