opener-opinion-detector-basic 3.2.2 → 3.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ module Opener
2
+ class OpinionDetectorBasic
3
+ ##
4
+ # Class that detects opinions in a given input KAF file.
5
+ #
6
+ class LegacyProcessor < BaseProcessor
7
+
8
+ def opinions
9
+ unless @opinions
10
+ set_accumulated_strength
11
+ apply_modifiers
12
+ apply_conjunctions
13
+
14
+ ##
15
+ # Initialize opinions with their expressions.
16
+ #
17
+ @opinions = document.terms.map do |term|
18
+ next unless term.is_expression? and term.accumulated_strength != 0
19
+ Kaf::Opinion.new term
20
+ end.compact
21
+
22
+ ##
23
+ # Obtain targets for each opinion.
24
+ #
25
+ @opinions.each do |opinion|
26
+ opinion.obtain_targets sentences
27
+ end
28
+
29
+ ##
30
+ # Obtain holders for each opinion.
31
+ #
32
+ @opinions.each do |opinion|
33
+ opinion.obtain_holders sentences, document.language
34
+ end
35
+ end
36
+
37
+ @opinions
38
+ end
39
+
40
+ protected
41
+
42
+ ##
43
+ # The strength of a term depends heavily on the type of the previous
44
+ # one. For example if the previous one is a shifter, it needs
45
+ # to be multiplied. If it's an intensifier, it needs to be
46
+ # added (or subtracted depending on the strength of the previous
47
+ # term) etc.
48
+ #
49
+ def set_accumulated_strength
50
+ symbol = :+
51
+ terms_count = terms.count
52
+ terms.each.with_index do |term, i|
53
+ next unless i+1 < terms_count
54
+
55
+ if terms[i+1].is_shifter?
56
+ if term.accumulated_strength != 0
57
+ terms[i+1].accumulated_strength *= term.accumulated_strength
58
+ terms[i+1].list_ids += term.list_ids
59
+ term.use = false
60
+ symbol = terms[i+1].accumulated_strength > 0 ? :+ : :-
61
+ else
62
+ symbol = :*
63
+ end
64
+ elsif terms[i+1].is_intensifier?
65
+ terms[i+1].accumulated_strength = term.accumulated_strength.send(symbol, terms[i+1].accumulated_strength)
66
+ term.use = false
67
+ symbol = terms[i+1].accumulated_strength > 0 ? :+ : :-
68
+ if term.accumulated_strength != 0
69
+ terms[i+1].list_ids += term.list_ids
70
+ end
71
+ else
72
+ symbol = terms[i+1].accumulated_strength >= 0 ? :+ : :-
73
+ end
74
+ end
75
+ end
76
+
77
+ ##
78
+ # Apply strength to the next term after a shifter or intensifier.
79
+ #
80
+ def apply_modifiers
81
+ terms_count = terms.count
82
+ terms.each.with_index do |term, i|
83
+ if i+1 < terms_count
84
+ if term.use && (term.is_shifter? || term.is_intensifier?)
85
+ terms[i+1].accumulated_strength *= term.accumulated_strength
86
+ terms[i+1].list_ids += term.list_ids
87
+ term.use = false
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+ ##
94
+ # Ignore conjunctions when applying strength.
95
+ #
96
+ def apply_conjunctions
97
+ terms_count = terms.count
98
+ i = 0
99
+ while i < terms_count
100
+ if terms[i].use && terms[i].accumulated_strength != 0
101
+ used = [i]
102
+ list_ids = terms[i].list_ids
103
+ strength = terms[i].accumulated_strength
104
+ terms[i].use = false
105
+ j = i+1
106
+ while true
107
+ if j >= terms_count
108
+ break
109
+ end
110
+
111
+ if terms[j].is_conjunction
112
+ terms[j].use = false
113
+ j += 1
114
+ elsif terms[j].use && terms[j].accumulated_strength != 0
115
+ list_ids += terms[j].list_ids
116
+ used << j
117
+ terms[j].use = false
118
+ strength += terms[j].accumulated_strength
119
+ j += 1
120
+ else
121
+ break
122
+ end
123
+ end
124
+ last_used = used.last
125
+ terms[last_used].accumulated_strength = strength
126
+ terms[last_used].list_ids = list_ids
127
+ terms[last_used].use = true
128
+ i = j
129
+ end
130
+ i += 1
131
+ end
132
+ end
133
+
134
+ end
135
+ end
136
+ end
@@ -1,326 +1,38 @@
1
- require_relative 'term'
2
- require_relative 'opinion'
3
-
4
1
  module Opener
5
2
  class OpinionDetectorBasic
6
3
  ##
7
4
  # Class that detects opinions in a given input KAF file.
8
5
  #
9
- class Processor
10
- attr_accessor :document, :timestamp, :opinion_strength, :pretty
11
-
12
- ##
13
- # @param [String|IO] file The KAF file/input to process.
14
- # @param [Hash] options. Options for timestamp and including strength to
15
- # opinions.
16
- # @param [TrueClass|FalseClass] pretty Enable pretty formatting, disabled
17
- # by default due to the performance overhead.
18
- #
19
- def initialize(file, options = {})
20
- @document = Oga.parse_xml(file)
21
-
22
- @timestamp = options[:timestamp]
23
- @opinion_strength = options[:opinion_strength]
24
- @pretty = options[:pretty] || false
25
-
26
- raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
27
- end
28
-
29
- ##
30
- # Processes the input and returns the new KAF output.
31
- # @return [String]
32
- #
33
- def process
34
- add_opinions_layer
35
-
36
- index = 1
37
- opinions.each do |opinion|
38
- add_opinion(opinion, index)
39
- index += 1
40
- end
41
-
42
- add_linguistic_processor
6
+ class Processor < BaseProcessor
43
7
 
44
- pretty ? pretty_print(document) : document.to_xml
45
- end
46
-
47
- ##
48
- # Get the language of the input file.
49
- #
50
- # @return [String]
51
- #
52
- def language
53
- @language ||= document.at_xpath('KAF').get('xml:lang')
54
- end
55
-
56
- ##
57
- # Get the terms from the input file
58
- # @return [Hash]
59
- #
60
- def terms
61
- @terms ||= document.xpath('KAF/terms/term').map do |term|
62
- Term.new(term, document, language)
63
- end
64
- end
65
-
66
- ##
67
- # Get the opinions.
68
- #
69
- # @return [Hash]
70
- #
71
8
  def opinions
72
- unless @opinions
73
- set_accumulated_strength
74
- apply_modifiers
75
- apply_conjunctions
76
-
77
- ##
78
- # Initialize opinions with their expressions.
79
- #
80
- @opinions = terms.map do |term|
81
- if term.is_expression? && term.accumulated_strength != 0
82
- Opinion.new(term)
83
- end
84
- end.compact
85
-
86
- ##
87
- # Obtain targets for each opinion.
88
- #
89
- @opinions.each do |opinion|
90
- opinion.obtain_targets(sentences)
91
- end
92
-
93
- ##
94
- # Obtain holders for each opinion.
95
- #
96
- @opinions.each do |opinion|
97
- opinion.obtain_holders(sentences, language)
98
- end
99
- end
100
-
101
- @opinions
102
- end
103
-
104
- ##
105
- # Remove the opinions layer from the KAF file if it exists and add a new
106
- # one.
107
- def add_opinions_layer
108
- existing = document.at_xpath('KAF/opinions')
109
-
110
- existing.remove if existing
111
-
112
- new_node('opinions', 'KAF')
113
- end
114
-
115
- ##
116
- # Adds the entire opinion in the KAF file.
117
- #
118
- def add_opinion(opinion, index)
119
- opinion_node = new_node("opinion", "KAF/opinions")
120
- opinion_node.set('oid', "o#{index.to_s}")
121
-
122
- unless opinion.holders.empty?
123
- opinion_holder_node = new_node("opinion_holder", opinion_node)
124
- add_opinion_element(opinion_holder_node, opinion.holders)
125
- end
126
-
127
- opinion_target_node = new_node("opinion_target", opinion_node)
128
-
129
- unless opinion.target_ids.empty?
130
- add_opinion_element(opinion_target_node, opinion.target_ids)
131
- end
9
+ return @opinions if @opinions
132
10
 
133
- expression_node = new_node("opinion_expression", opinion_node)
134
- expression_node.set('polarity', opinion.polarity)
135
- expression_node.set('strength', opinion.strength.to_s)
11
+ ##
12
+ # Initialize opinions with their expressions.
13
+ #
14
+ @opinions = document.terms.map do |term|
15
+ next unless term.is_expression? and term.accumulated_strength != 0
16
+ Kaf::Opinion.new term
17
+ end.compact
136
18
 
137
- add_opinion_element(expression_node, opinion.ids)
19
+ set_accumulated_strength
138
20
  end
139
21
 
140
- ##
141
- # Method for adding opinion holders, targets and expressions.
142
- #
143
- def add_opinion_element(node, ids)
144
- lemmas = terms.select{|t| ids.include?(t.id)}.map(&:lemma).join(" ")
145
- comment = Oga::XML::Comment.new(:text => "#{lemmas}")
146
- node.children << comment
147
- span_node = new_node("span", node)
148
-
149
- ids.each do |id|
150
- target_node = new_node("target", span_node)
151
- target_node.set('id', id.to_s)
152
- end
153
- end
154
-
155
- ##
156
- # Add linguistic processor layer with basic information
157
- # (version, timestamp, description etc) in the KAF file.
158
- #
159
- def add_linguistic_processor
160
- description = 'Basic opinion detector with Pos'
161
- last_edited = '13may2015'
162
- version = '2.0'
163
-
164
- node = new_node('linguisticProcessors', 'KAF/kafHeader')
165
- node.set('layer', 'opinions')
166
-
167
- lp_node = new_node('lp', node)
168
-
169
- lp_node.set('version', "#{last_edited}-#{version}")
170
- lp_node.set('name', description)
171
-
172
- if timestamp
173
- format = '%Y-%m-%dT%H:%M:%S%Z'
174
-
175
- lp_node.set('timestamp', Time.now.strftime(format))
176
- else
177
- lp_node.set('timestamp', '*')
178
- end
179
- end
180
-
181
- ##
182
- # Format the output document properly.
183
- #
184
- # TODO: this should be handled by Oga in a nice way.
185
- #
186
- # @return [String]
187
- #
188
- def pretty_print(document)
189
- doc = REXML::Document.new document.to_xml
190
- doc.context[:attribute_quote] = :quote
191
- out = ""
192
- formatter = REXML::Formatters::Pretty.new
193
- formatter.compact = true
194
- formatter.write(doc, out)
195
-
196
- out.strip
197
- end
198
-
199
- ##
200
- # Get terms grouped by sentence.
201
- #
202
- def sentences
203
- @sentences ||= terms.group_by{|t| t.sentence}
204
- end
205
-
206
- protected
207
-
208
- ##
209
- # The strength of a term depends heavily on the type of the previous
210
- # one. For example if the previous one is a shifter, it needs
211
- # to be multiplied. If it's an intensifier, it needs to be
212
- # added (or subtracted depending on the strength of the previous
213
- # term) etc.
214
- #
215
22
  def set_accumulated_strength
216
- symbol = :+
217
- terms_count = terms.count
218
- terms.each_with_index do |term, i|
219
- if i+1 < terms_count
220
- if terms[i+1].is_shifter?
221
- if term.accumulated_strength != 0
222
- terms[i+1].accumulated_strength *= term.accumulated_strength
223
- terms[i+1].list_ids += term.list_ids
224
- term.use = false
225
- symbol = terms[i+1].accumulated_strength > 0 ? :+ : :-
226
- else
227
- symbol = :*
228
- end
229
- elsif terms[i+1].is_intensifier?
230
- terms[i+1].accumulated_strength = term.accumulated_strength.send(symbol, terms[i+1].accumulated_strength)
231
- term.use = false
232
- symbol = terms[i+1].accumulated_strength > 0 ? :+ : :-
233
- if term.accumulated_strength != 0
234
- terms[i+1].list_ids += term.list_ids
235
- end
236
- else
237
- symbol = terms[i+1].accumulated_strength >= 0 ? :+ : :-
238
- end
239
- end
240
- end
241
- end
242
-
243
- ##
244
- # Apply strength to the next term after a shifter or intensifier.
245
- #
246
- def apply_modifiers
247
- terms_count = terms.count
248
- terms.each_with_index do |term, i|
249
- if i+1 < terms_count
250
- if term.use && (term.is_shifter? || term.is_intensifier?)
251
- terms[i+1].accumulated_strength *= term.accumulated_strength
252
- terms[i+1].list_ids += term.list_ids
253
- term.use = false
254
- end
23
+ terms.each.with_index do |term, i|
24
+ head = term.head_term
25
+ if head.is_shifter?
26
+ term.accumulated_strength *= -1
27
+ term.list_ids += term.list_ids
28
+ elsif head.is_intensifier?
29
+ term.accumulated_strength += head.accumulated_strength
30
+ term.list_ids += term.list_ids
31
+ else
255
32
  end
256
33
  end
257
34
  end
258
35
 
259
- ##
260
- # Ignore conjunctions when applying strength.
261
- #
262
- def apply_conjunctions
263
- terms_count = terms.count
264
- i = 0
265
- while i < terms_count
266
- if terms[i].use && terms[i].accumulated_strength != 0
267
- used = [i]
268
- list_ids = terms[i].list_ids
269
- strength = terms[i].accumulated_strength
270
- terms[i].use = false
271
- j = i+1
272
- while true
273
- if j >= terms_count
274
- break
275
- end
276
-
277
- if terms[j].is_conjunction
278
- terms[j].use = false
279
- j += 1
280
- elsif terms[j].use && terms[j].accumulated_strength != 0
281
- list_ids += terms[j].list_ids
282
- used << j
283
- terms[j].use = false
284
- strength += terms[j].accumulated_strength
285
- j += 1
286
- else
287
- break
288
- end
289
- end
290
- last_used = used.last
291
- terms[last_used].accumulated_strength = strength
292
- terms[last_used].list_ids = list_ids
293
- terms[last_used].use = true
294
- i = j
295
- end
296
- i += 1
297
- end
298
- end
299
-
300
- ##
301
- # Creates a new node in the KAF file.
302
- #
303
- def new_node(tag, parent)
304
- if parent.is_a?(String)
305
- parent_node = document.at_xpath(parent)
306
- else
307
- parent_node = parent
308
- end
309
-
310
- node = Oga::XML::Element.new(:name => tag)
311
-
312
- parent_node.children << node
313
-
314
- node
315
- end
316
-
317
- ##
318
- # Check if input is a KAF file.
319
- # @return [Boolean]
320
- #
321
- def is_kaf?
322
- !!document.at_xpath('KAF')
323
- end
324
- end # Processor
325
- end # OpinionDetectorBasic
326
- end # Opener
36
+ end
37
+ end
38
+ end