opener-opinion-detector-basic 3.0.1 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exec/opinion-detector-basic.rb +38 -0
- data/lib/opener/opinion_detector_basic.rb +3 -4
- data/lib/opener/opinion_detector_basic/cli.rb +9 -2
- data/lib/opener/opinion_detector_basic/opinion.rb +32 -33
- data/lib/opener/opinion_detector_basic/processor.rb +42 -45
- data/lib/opener/opinion_detector_basic/term.rb +39 -40
- data/lib/opener/opinion_detector_basic/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fa83717460841775dcf28e9f640208bd16e2cc5d
|
4
|
+
data.tar.gz: 8c8f56b11a38b57c07945bffd666b646ef041f80
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 957e43027ccae0ab98becec3641eb82c10a9531d74f278d3557e3f1f8aa19e8ec59260bebd31a105e5e283e55bc2a04d67b70f9a582c00b93b771210e491fbe8
|
7
|
+
data.tar.gz: 02a6df21160370204fb1a72084d36663f974ff816a615028567e701370bd0a6cfabe96b16f7281f0576d7f656bbd21de010f129a05d1f2d3126e71ed4b1706ff
|
@@ -4,6 +4,44 @@ require 'opener/daemons'
|
|
4
4
|
|
5
5
|
require_relative '../lib/opener/opinion_detector_basic'
|
6
6
|
|
7
|
+
Oga::XML::Parser.class_eval do
|
8
|
+
include NewRelic::Agent::Instrumentation::ControllerInstrumentation
|
9
|
+
include NewRelic::Agent::MethodTracer
|
10
|
+
|
11
|
+
add_method_tracer(:parse)
|
12
|
+
end
|
13
|
+
|
14
|
+
Oga::XPath::Parser.class_eval do
|
15
|
+
class << self
|
16
|
+
include NewRelic::Agent::Instrumentation::ControllerInstrumentation
|
17
|
+
include NewRelic::Agent::MethodTracer
|
18
|
+
|
19
|
+
add_method_tracer(:parse_with_cache, 'Oga::XPath::Parser/parse_with_cache')
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Oga::XPath::Evaluator.class_eval do
|
24
|
+
include NewRelic::Agent::Instrumentation::ControllerInstrumentation
|
25
|
+
include NewRelic::Agent::MethodTracer
|
26
|
+
|
27
|
+
add_method_tracer(:evaluate)
|
28
|
+
add_method_tracer(:evaluate_ast)
|
29
|
+
end
|
30
|
+
|
31
|
+
Opener::OpinionDetectorBasic::Processor.class_eval do
|
32
|
+
include NewRelic::Agent::Instrumentation::ControllerInstrumentation
|
33
|
+
include NewRelic::Agent::MethodTracer
|
34
|
+
|
35
|
+
add_method_tracer(:process)
|
36
|
+
add_method_tracer(:terms)
|
37
|
+
add_method_tracer(:opinions)
|
38
|
+
add_method_tracer(:add_opinion_element)
|
39
|
+
add_method_tracer(:pretty_print)
|
40
|
+
add_method_tracer(:set_accumulated_strength)
|
41
|
+
add_method_tracer(:apply_modifiers)
|
42
|
+
add_method_tracer(:apply_conjunctions)
|
43
|
+
end
|
44
|
+
|
7
45
|
daemon = Opener::Daemons::Daemon.new(Opener::OpinionDetectorBasic)
|
8
46
|
|
9
47
|
daemon.start
|
@@ -1,6 +1,7 @@
|
|
1
|
+
gem 'slop', '~> 3.0'
|
2
|
+
|
1
3
|
require 'slop'
|
2
4
|
require 'oga'
|
3
|
-
require 'monitor'
|
4
5
|
|
5
6
|
require 'rexml/document'
|
6
7
|
require 'rexml/formatters/pretty'
|
@@ -32,7 +33,7 @@ module Opener
|
|
32
33
|
@args = options.delete(:args) || []
|
33
34
|
@options = options
|
34
35
|
end
|
35
|
-
|
36
|
+
|
36
37
|
##
|
37
38
|
# Processes the input KAF document.
|
38
39
|
#
|
@@ -40,8 +41,6 @@ module Opener
|
|
40
41
|
# @return [String]
|
41
42
|
#
|
42
43
|
def run(input)
|
43
|
-
options[:timestamp] = !options.delete(:no_time)
|
44
|
-
|
45
44
|
return Processor.new(input, options).process
|
46
45
|
end
|
47
46
|
end # OpinionDetectorBasic
|
@@ -45,10 +45,17 @@ Example:
|
|
45
45
|
abort "opinion-detector-basic v#{VERSION} on #{RUBY_DESCRIPTION}"
|
46
46
|
end
|
47
47
|
|
48
|
+
on :p, :pretty, 'Enables pretty-printing of XML',
|
49
|
+
:default => false
|
50
|
+
|
51
|
+
on :'no-timestamp', 'Disables timestamps in XML output'
|
52
|
+
|
48
53
|
run do |opts, args|
|
49
54
|
detector = OpinionDetectorBasic.new(
|
50
|
-
:args
|
51
|
-
:domain
|
55
|
+
:args => args,
|
56
|
+
:domain => opts[:domain],
|
57
|
+
:pretty => opts[:pretty],
|
58
|
+
:timestamp => !opts[:'no-timestamp']
|
52
59
|
)
|
53
60
|
|
54
61
|
input = STDIN.tty? ? nil : STDIN.read
|
@@ -3,7 +3,22 @@ module Opener
|
|
3
3
|
class Opinion
|
4
4
|
attr_reader :term
|
5
5
|
attr_accessor :left_candidates, :right_candidates, :target_ids, :holders
|
6
|
-
|
6
|
+
|
7
|
+
# Opinion holders for each language code.
|
8
|
+
OPINION_HOLDERS = {
|
9
|
+
'nl' => [
|
10
|
+
'ik','we','wij','ze','zij','jullie','u','hij','het','jij','je','mij',
|
11
|
+
'me','hem','haar','ons','hen','hun'
|
12
|
+
],
|
13
|
+
'en' => ['i','we','he','she','they','it','you'],
|
14
|
+
'es' => [
|
15
|
+
'yo','tu','nosotros','vosotros','ellos','ellas','nosotras','vosotras'
|
16
|
+
],
|
17
|
+
'it' => ['io','tu','noi','voi','loro','lei','lui'],
|
18
|
+
'de' => ['ich','du','wir','ihr','sie','er'],
|
19
|
+
'fr' => ['je','tu','lui','elle','nous','vous','ils','elles']
|
20
|
+
}
|
21
|
+
|
7
22
|
def initialize(term)
|
8
23
|
@term = term
|
9
24
|
@left_candidates = []
|
@@ -11,7 +26,7 @@ module Opener
|
|
11
26
|
@holders = []
|
12
27
|
@target_ids = []
|
13
28
|
end
|
14
|
-
|
29
|
+
|
15
30
|
##
|
16
31
|
# Returns the term ids of the opinion expression.
|
17
32
|
#
|
@@ -20,7 +35,7 @@ module Opener
|
|
20
35
|
def ids
|
21
36
|
@ids ||= term.list_ids.sort
|
22
37
|
end
|
23
|
-
|
38
|
+
|
24
39
|
##
|
25
40
|
# Returns the sentence id of the opinion.
|
26
41
|
#
|
@@ -29,7 +44,7 @@ module Opener
|
|
29
44
|
def sentence
|
30
45
|
@sentence ||= term.sentence
|
31
46
|
end
|
32
|
-
|
47
|
+
|
33
48
|
##
|
34
49
|
# Returns the strength of the opinion.
|
35
50
|
#
|
@@ -38,7 +53,7 @@ module Opener
|
|
38
53
|
def strength
|
39
54
|
@strength ||= term.accumulated_strength
|
40
55
|
end
|
41
|
-
|
56
|
+
|
42
57
|
##
|
43
58
|
# Returns the polarity of the opinion.
|
44
59
|
#
|
@@ -53,7 +68,7 @@ module Opener
|
|
53
68
|
"neutral"
|
54
69
|
end
|
55
70
|
end
|
56
|
-
|
71
|
+
|
57
72
|
##
|
58
73
|
# Obtain the opinion holders from the terms that belong to the same
|
59
74
|
# sentence.
|
@@ -61,13 +76,13 @@ module Opener
|
|
61
76
|
def obtain_holders(sentences, language)
|
62
77
|
sentence_terms = sentences[sentence]
|
63
78
|
sentence_terms.each do |term|
|
64
|
-
if
|
79
|
+
if OPINION_HOLDERS[language].include?(term.lemma)
|
65
80
|
@holders << term.id
|
66
81
|
break
|
67
82
|
end
|
68
83
|
end
|
69
84
|
end
|
70
|
-
|
85
|
+
|
71
86
|
##
|
72
87
|
# Get the potential right and left candidates of the sentence and
|
73
88
|
# decide which ones are the actual targets of the opinion
|
@@ -76,20 +91,20 @@ module Opener
|
|
76
91
|
sentence_terms = sentences[sentence]
|
77
92
|
max_distance = 3
|
78
93
|
terms_count = sentence_terms.count
|
79
|
-
|
94
|
+
|
80
95
|
index = -1
|
81
96
|
sentence_terms.each_with_index do |term, i|
|
82
97
|
if ids.include?(term.id)
|
83
98
|
index = i
|
84
99
|
end
|
85
100
|
end
|
86
|
-
|
101
|
+
|
87
102
|
unless index+1 >= terms_count
|
88
103
|
min = index+1
|
89
104
|
max = [index+1+max_distance,terms_count].min
|
90
105
|
@right_candidates = filter_candidates(sentence_terms[min..max])
|
91
106
|
end
|
92
|
-
|
107
|
+
|
93
108
|
index = 0
|
94
109
|
sentence_terms.each_with_index do |term, i|
|
95
110
|
if ids.include?(term.id)
|
@@ -97,7 +112,7 @@ module Opener
|
|
97
112
|
break # needed for left_candidates
|
98
113
|
end
|
99
114
|
end
|
100
|
-
|
115
|
+
|
101
116
|
unless index == 0
|
102
117
|
min = [0, index-1-max_distance].max
|
103
118
|
max = index
|
@@ -108,7 +123,7 @@ module Opener
|
|
108
123
|
candidate = right_candidates.first
|
109
124
|
@target_ids << candidate.id
|
110
125
|
end
|
111
|
-
|
126
|
+
|
112
127
|
if target_ids.empty?
|
113
128
|
list = mix_lists(right_candidates, left_candidates)
|
114
129
|
list.each do |l|
|
@@ -117,9 +132,9 @@ module Opener
|
|
117
132
|
end
|
118
133
|
end
|
119
134
|
end
|
120
|
-
|
135
|
+
|
121
136
|
protected
|
122
|
-
|
137
|
+
|
123
138
|
##
|
124
139
|
# If there are no opinion targets, right and left candidates
|
125
140
|
# are mixed into one list and the first one is picked as the target.
|
@@ -140,7 +155,7 @@ module Opener
|
|
140
155
|
end
|
141
156
|
return list.compact
|
142
157
|
end
|
143
|
-
|
158
|
+
|
144
159
|
##
|
145
160
|
# Filters candidate terms depending on their part of speech and if
|
146
161
|
# they are already part of the expression.
|
@@ -150,22 +165,6 @@ module Opener
|
|
150
165
|
def filter_candidates(sentence_terms)
|
151
166
|
sentence_terms.select{|t| (t.pos == "N" || t.pos == "R") && !ids.include?(t.id)}
|
152
167
|
end
|
153
|
-
|
154
|
-
##
|
155
|
-
# Opinion holders for each language code.
|
156
|
-
#
|
157
|
-
# @return [Hash]
|
158
|
-
#
|
159
|
-
def opinion_holders
|
160
|
-
{
|
161
|
-
'nl' => ['ik','we','wij','ze','zij','jullie','u','hij','het','jij','je','mij','me','hem','haar','ons','hen','hun'],
|
162
|
-
'en' => ['i','we','he','she','they','it','you'],
|
163
|
-
'es' => ['yo','tu','nosotros','vosotros','ellos','ellas','nosotras','vosotras'],
|
164
|
-
'it' => ['io','tu','noi','voi','loro','lei','lui'],
|
165
|
-
'de' => ['ich','du','wir','ihr','sie','er'],
|
166
|
-
'fr' => ['je','tu','lui','elle','nous','vous','ils','elles']
|
167
|
-
}
|
168
|
-
end
|
169
168
|
end # Opinion
|
170
169
|
end # OpinionDetectorBasic
|
171
|
-
end # Opener
|
170
|
+
end # Opener
|
@@ -17,11 +17,11 @@ module Opener
|
|
17
17
|
# by default due to the performance overhead.
|
18
18
|
#
|
19
19
|
def initialize(file, options = {})
|
20
|
-
@document
|
20
|
+
@document = Oga.parse_xml(file)
|
21
21
|
|
22
|
-
@timestamp
|
23
|
-
@opinion_strength
|
24
|
-
@pretty
|
22
|
+
@timestamp = options[:timestamp]
|
23
|
+
@opinion_strength = options[:opinion_strength]
|
24
|
+
@pretty = options[:pretty] || false
|
25
25
|
|
26
26
|
raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
|
27
27
|
end
|
@@ -30,18 +30,18 @@ module Opener
|
|
30
30
|
# Processes the input and returns the new KAF output.
|
31
31
|
# @return [String]
|
32
32
|
#
|
33
|
-
def process
|
33
|
+
def process
|
34
34
|
add_opinions_layer
|
35
|
-
|
35
|
+
|
36
36
|
index = 1
|
37
|
-
opinions.each do |opinion|
|
37
|
+
opinions.each do |opinion|
|
38
38
|
add_opinion(opinion, index)
|
39
39
|
index += 1
|
40
40
|
end
|
41
41
|
|
42
42
|
add_linguistic_processor
|
43
43
|
|
44
|
-
|
44
|
+
pretty ? pretty_print(document) : document.to_xml
|
45
45
|
end
|
46
46
|
|
47
47
|
##
|
@@ -50,25 +50,19 @@ module Opener
|
|
50
50
|
# @return [String]
|
51
51
|
#
|
52
52
|
def language
|
53
|
-
|
53
|
+
@language ||= document.at_xpath('KAF').get('xml:lang')
|
54
54
|
end
|
55
|
-
|
55
|
+
|
56
56
|
##
|
57
57
|
# Get the terms from the input file
|
58
58
|
# @return [Hash]
|
59
59
|
#
|
60
60
|
def terms
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
document.xpath('KAF/terms/term').each do |term|
|
65
|
-
@terms << Term.new(term, document, language)
|
66
|
-
end
|
61
|
+
@terms ||= document.xpath('KAF/terms/term').map do |term|
|
62
|
+
Term.new(term, document, language)
|
67
63
|
end
|
68
|
-
|
69
|
-
return @terms
|
70
64
|
end
|
71
|
-
|
65
|
+
|
72
66
|
##
|
73
67
|
# Get the opinions.
|
74
68
|
#
|
@@ -79,7 +73,7 @@ module Opener
|
|
79
73
|
set_accumulated_strength
|
80
74
|
apply_modifiers
|
81
75
|
apply_conjunctions
|
82
|
-
|
76
|
+
|
83
77
|
##
|
84
78
|
# Initialize opinions with their expressions.
|
85
79
|
#
|
@@ -88,14 +82,14 @@ module Opener
|
|
88
82
|
o = Opinion.new(term)
|
89
83
|
end
|
90
84
|
end.compact
|
91
|
-
|
85
|
+
|
92
86
|
##
|
93
87
|
# Obtain targets for each opinion.
|
94
88
|
#
|
95
89
|
@opinions.each do |opinion|
|
96
90
|
opinion.obtain_targets(sentences)
|
97
91
|
end
|
98
|
-
|
92
|
+
|
99
93
|
##
|
100
94
|
# Obtain holders for each opinion.
|
101
95
|
#
|
@@ -103,10 +97,10 @@ module Opener
|
|
103
97
|
opinion.obtain_holders(sentences, language)
|
104
98
|
end
|
105
99
|
end
|
106
|
-
|
107
|
-
|
100
|
+
|
101
|
+
@opinions
|
108
102
|
end
|
109
|
-
|
103
|
+
|
110
104
|
##
|
111
105
|
# Remove the opinions layer from the KAF file if it exists and add a new
|
112
106
|
# one.
|
@@ -117,30 +111,32 @@ module Opener
|
|
117
111
|
|
118
112
|
new_node('opinions', 'KAF')
|
119
113
|
end
|
120
|
-
|
114
|
+
|
121
115
|
##
|
122
116
|
# Adds the entire opinion in the KAF file.
|
123
117
|
#
|
124
118
|
def add_opinion(opinion, index)
|
125
119
|
opinion_node = new_node("opinion", "KAF/opinions")
|
126
120
|
opinion_node.set('oid', "o#{index.to_s}")
|
127
|
-
|
121
|
+
|
128
122
|
unless opinion.holders.empty?
|
129
123
|
opinion_holder_node = new_node("opinion_holder", opinion_node)
|
130
124
|
add_opinion_element(opinion_holder_node, opinion.holders)
|
131
125
|
end
|
132
|
-
|
126
|
+
|
133
127
|
opinion_target_node = new_node("opinion_target", opinion_node)
|
128
|
+
|
134
129
|
unless opinion.target_ids.empty?
|
135
130
|
add_opinion_element(opinion_target_node, opinion.target_ids)
|
136
131
|
end
|
137
|
-
|
132
|
+
|
138
133
|
expression_node = new_node("opinion_expression", opinion_node)
|
139
134
|
expression_node.set('polarity', opinion.polarity)
|
140
135
|
expression_node.set('strength', opinion.strength.to_s)
|
136
|
+
|
141
137
|
add_opinion_element(expression_node, opinion.ids)
|
142
138
|
end
|
143
|
-
|
139
|
+
|
144
140
|
##
|
145
141
|
# Method for adding opinion holders, targets and expressions.
|
146
142
|
#
|
@@ -149,12 +145,13 @@ module Opener
|
|
149
145
|
comment = Oga::XML::Comment.new(:text => "#{lemmas}")
|
150
146
|
node.children << comment
|
151
147
|
span_node = new_node("span", node)
|
148
|
+
|
152
149
|
ids.each do |id|
|
153
150
|
target_node = new_node("target", span_node)
|
154
151
|
target_node.set('id', id.to_s)
|
155
152
|
end
|
156
153
|
end
|
157
|
-
|
154
|
+
|
158
155
|
##
|
159
156
|
# Add linguistic processor layer with basic information
|
160
157
|
# (version, timestamp, description etc) in the KAF file.
|
@@ -171,7 +168,7 @@ module Opener
|
|
171
168
|
|
172
169
|
lp_node.set('version', "#{last_edited}-#{version}")
|
173
170
|
lp_node.set('name', description)
|
174
|
-
|
171
|
+
|
175
172
|
if timestamp
|
176
173
|
format = '%Y-%m-%dT%H:%M:%S%Z'
|
177
174
|
|
@@ -180,7 +177,7 @@ module Opener
|
|
180
177
|
lp_node.set('timestamp', '*')
|
181
178
|
end
|
182
179
|
end
|
183
|
-
|
180
|
+
|
184
181
|
##
|
185
182
|
# Format the output document properly.
|
186
183
|
#
|
@@ -196,18 +193,18 @@ module Opener
|
|
196
193
|
formatter.compact = true
|
197
194
|
formatter.write(doc, out)
|
198
195
|
|
199
|
-
|
196
|
+
out.strip
|
200
197
|
end
|
201
|
-
|
198
|
+
|
202
199
|
##
|
203
200
|
# Get terms grouped by sentence.
|
204
201
|
#
|
205
202
|
def sentences
|
206
203
|
@sentences ||= terms.group_by{|t| t.sentence}
|
207
204
|
end
|
208
|
-
|
205
|
+
|
209
206
|
protected
|
210
|
-
|
207
|
+
|
211
208
|
##
|
212
209
|
# The strength of a term depends heavily on the type of the previous
|
213
210
|
# one. For example if the previous one is a shifter, it needs
|
@@ -228,7 +225,7 @@ module Opener
|
|
228
225
|
symbol = terms[i+1].accumulated_strength > 0 ? :+ : :-
|
229
226
|
else
|
230
227
|
symbol = :*
|
231
|
-
end
|
228
|
+
end
|
232
229
|
elsif terms[i+1].is_intensifier?
|
233
230
|
terms[i+1].accumulated_strength = term.accumulated_strength.send(symbol, terms[i+1].accumulated_strength)
|
234
231
|
term.use = false
|
@@ -242,7 +239,7 @@ module Opener
|
|
242
239
|
end
|
243
240
|
end
|
244
241
|
end
|
245
|
-
|
242
|
+
|
246
243
|
##
|
247
244
|
# Apply strength to the next term after a shifter or intensifier.
|
248
245
|
#
|
@@ -258,7 +255,7 @@ module Opener
|
|
258
255
|
end
|
259
256
|
end
|
260
257
|
end
|
261
|
-
|
258
|
+
|
262
259
|
##
|
263
260
|
# Ignore conjunctions when applying strength.
|
264
261
|
#
|
@@ -276,7 +273,7 @@ module Opener
|
|
276
273
|
if j >= terms_count
|
277
274
|
break
|
278
275
|
end
|
279
|
-
|
276
|
+
|
280
277
|
if terms[j].is_conjunction
|
281
278
|
terms[j].use = false
|
282
279
|
j += 1
|
@@ -297,9 +294,9 @@ module Opener
|
|
297
294
|
i = j
|
298
295
|
end
|
299
296
|
i += 1
|
300
|
-
end
|
297
|
+
end
|
301
298
|
end
|
302
|
-
|
299
|
+
|
303
300
|
##
|
304
301
|
# Creates a new node in the KAF file.
|
305
302
|
#
|
@@ -314,7 +311,7 @@ module Opener
|
|
314
311
|
|
315
312
|
parent_node.children << node
|
316
313
|
|
317
|
-
|
314
|
+
node
|
318
315
|
end
|
319
316
|
|
320
317
|
##
|
@@ -322,7 +319,7 @@ module Opener
|
|
322
319
|
# @return [Boolean]
|
323
320
|
#
|
324
321
|
def is_kaf?
|
325
|
-
|
322
|
+
!!document.at_xpath('KAF')
|
326
323
|
end
|
327
324
|
end # Processor
|
328
325
|
end # OpinionDetectorBasic
|
@@ -3,7 +3,17 @@ module Opener
|
|
3
3
|
class Term
|
4
4
|
attr_reader :node, :sentence, :is_conjunction
|
5
5
|
attr_accessor :use, :accumulated_strength, :list_ids
|
6
|
-
|
6
|
+
|
7
|
+
# Map of conjunctions per language code
|
8
|
+
CONJUNCTIONS = {
|
9
|
+
'nl' => %w{, en},
|
10
|
+
'en' => %w{, and},
|
11
|
+
'es' => %w{, y e},
|
12
|
+
'it' => %w{, e ed},
|
13
|
+
'de' => %w{, und},
|
14
|
+
'fr' => %w{, et}
|
15
|
+
}
|
16
|
+
|
7
17
|
def initialize(node, document, language)
|
8
18
|
@node = node
|
9
19
|
@sentence = get_sentence(document)
|
@@ -12,7 +22,7 @@ module Opener
|
|
12
22
|
@list_ids = [id]
|
13
23
|
@is_conjunction = is_conjunction?(language)
|
14
24
|
end
|
15
|
-
|
25
|
+
|
16
26
|
##
|
17
27
|
# Returns the term id.
|
18
28
|
#
|
@@ -21,16 +31,16 @@ module Opener
|
|
21
31
|
def id
|
22
32
|
@id ||= node.get('tid')
|
23
33
|
end
|
24
|
-
|
34
|
+
|
25
35
|
##
|
26
36
|
# Returns the lemma of the term.
|
27
|
-
#
|
37
|
+
#
|
28
38
|
# @return [String]
|
29
39
|
#
|
30
40
|
def lemma
|
31
41
|
@lemma ||= node.get('lemma')
|
32
42
|
end
|
33
|
-
|
43
|
+
|
34
44
|
##
|
35
45
|
# Returns the part of speech of the term.
|
36
46
|
#
|
@@ -39,38 +49,36 @@ module Opener
|
|
39
49
|
def pos
|
40
50
|
@pos ||= node.get('pos')
|
41
51
|
end
|
42
|
-
|
52
|
+
|
43
53
|
##
|
44
54
|
# Returns the sentiment modifier type if it exists.
|
45
55
|
#
|
46
56
|
# @return [String|NilClass]
|
47
57
|
#
|
48
58
|
def sentiment_modifier
|
49
|
-
@sentiment_modifier ||=
|
50
|
-
|
51
|
-
end
|
59
|
+
@sentiment_modifier ||=
|
60
|
+
first_sentiment ? first_sentiment.get('sentiment_modifier') : nil
|
52
61
|
end
|
53
|
-
|
62
|
+
|
54
63
|
##
|
55
64
|
# Returns the polarity of the term if it exists.
|
56
65
|
#
|
57
66
|
# @return [String|NilClass]
|
58
67
|
#
|
59
68
|
def polarity
|
60
|
-
@polarity ||=
|
61
|
-
sentiment.get('polarity')
|
62
|
-
end
|
69
|
+
@polarity ||= first_sentiment ? first_sentiment.get('polarity') : nil
|
63
70
|
end
|
64
|
-
|
71
|
+
|
65
72
|
##
|
66
73
|
# Returns the actual word ids that construct the lemma.
|
67
74
|
#
|
68
75
|
# @return [Array]
|
69
76
|
#
|
70
77
|
def target_ids
|
71
|
-
@target_ids ||= node.xpath('span/target')
|
78
|
+
@target_ids ||= node.xpath('span/target')
|
79
|
+
.map { |target| target.get('id') }
|
72
80
|
end
|
73
|
-
|
81
|
+
|
74
82
|
##
|
75
83
|
# Returns the strength of the term depending on its type.
|
76
84
|
#
|
@@ -82,16 +90,16 @@ module Opener
|
|
82
90
|
elsif polarity == "negative"
|
83
91
|
return -1
|
84
92
|
end
|
85
|
-
|
93
|
+
|
86
94
|
if is_intensifier?
|
87
95
|
return 2
|
88
96
|
elsif is_shifter?
|
89
97
|
return -1
|
90
98
|
end
|
91
|
-
|
99
|
+
|
92
100
|
return 0
|
93
101
|
end
|
94
|
-
|
102
|
+
|
95
103
|
##
|
96
104
|
# Returns the sentence id that the term belongs to in the document.
|
97
105
|
#
|
@@ -103,7 +111,7 @@ module Opener
|
|
103
111
|
.first
|
104
112
|
.get('sent')
|
105
113
|
end
|
106
|
-
|
114
|
+
|
107
115
|
##
|
108
116
|
# Checks if a term is an intensifier.
|
109
117
|
#
|
@@ -112,7 +120,7 @@ module Opener
|
|
112
120
|
def is_intensifier?
|
113
121
|
sentiment_modifier == "intensifier"
|
114
122
|
end
|
115
|
-
|
123
|
+
|
116
124
|
##
|
117
125
|
# Checks if a term is a shifter.
|
118
126
|
#
|
@@ -121,7 +129,7 @@ module Opener
|
|
121
129
|
def is_shifter?
|
122
130
|
sentiment_modifier == "shifter"
|
123
131
|
end
|
124
|
-
|
132
|
+
|
125
133
|
##
|
126
134
|
# Checks if a term is an expression.
|
127
135
|
#
|
@@ -130,31 +138,22 @@ module Opener
|
|
130
138
|
def is_expression?
|
131
139
|
use && !!polarity
|
132
140
|
end
|
133
|
-
|
141
|
+
|
134
142
|
##
|
135
143
|
# Checks if a term is a conjunction.
|
136
144
|
#
|
137
145
|
# @return [TrueClass|FalseClass]
|
138
146
|
#
|
139
147
|
def is_conjunction?(language)
|
140
|
-
|
148
|
+
CONJUNCTIONS[language].include?(lemma)
|
141
149
|
end
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
#
|
146
|
-
|
147
|
-
|
148
|
-
def conjunctions
|
149
|
-
{
|
150
|
-
'nl' => [',','en'],
|
151
|
-
'en' => [',','and'],
|
152
|
-
'es' => [',','y','e'],
|
153
|
-
'it' => [',','e','ed'],
|
154
|
-
'de' => [',','und'],
|
155
|
-
'fr' => [',','et']
|
156
|
-
}
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
# @return [Oga::XML::Element]
|
154
|
+
def first_sentiment
|
155
|
+
@first_sentiment ||= node.xpath('sentiment').first
|
157
156
|
end
|
158
157
|
end # Term
|
159
158
|
end # OpinionDetectorBasic
|
160
|
-
end # Opener
|
159
|
+
end # Opener
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-opinion-detector-basic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -125,9 +125,9 @@ dependencies:
|
|
125
125
|
description: Basic Opinion Detector.
|
126
126
|
email:
|
127
127
|
executables:
|
128
|
+
- opinion-detector-basic-server
|
128
129
|
- opinion-detector-basic
|
129
130
|
- opinion-detector-basic-daemon
|
130
|
-
- opinion-detector-basic-server
|
131
131
|
extensions: []
|
132
132
|
extra_rdoc_files: []
|
133
133
|
files:
|
@@ -169,7 +169,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
169
169
|
version: '0'
|
170
170
|
requirements: []
|
171
171
|
rubyforge_project:
|
172
|
-
rubygems_version: 2.
|
172
|
+
rubygems_version: 2.4.8
|
173
173
|
signing_key:
|
174
174
|
specification_version: 4
|
175
175
|
summary: Basic Opinion Detector.
|