opener-opinion-detector-basic 2.0.7 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,160 @@
1
+ module Opener
2
+ class OpinionDetectorBasic
3
+ class Term
4
+ attr_reader :node, :sentence, :is_conjunction
5
+ attr_accessor :use, :accumulated_strength, :list_ids
6
+
7
+ def initialize(node, document, language)
8
+ @node = node
9
+ @sentence = get_sentence(document)
10
+ @use = true
11
+ @accumulated_strength = strength
12
+ @list_ids = [id]
13
+ @is_conjunction = is_conjunction?(language)
14
+ end
15
+
16
+ ##
17
+ # Returns the term id.
18
+ #
19
+ # @return [String]
20
+ #
21
+ def id
22
+ @id ||= node.get('tid')
23
+ end
24
+
25
+ ##
26
+ # Returns the lemma of the term.
27
+ #
28
+ # @return [String]
29
+ #
30
+ def lemma
31
+ @lemma ||= node.get('lemma')
32
+ end
33
+
34
+ ##
35
+ # Returns the part of speech of the term.
36
+ #
37
+ # @return [String]
38
+ #
39
+ def pos
40
+ @pos ||= node.get('pos')
41
+ end
42
+
43
+ ##
44
+ # Returns the sentiment modifier type if it exists.
45
+ #
46
+ # @return [String|NilClass]
47
+ #
48
+ def sentiment_modifier
49
+ @sentiment_modifier ||= if sentiment = node.xpath('sentiment').first
50
+ sentiment.get('sentiment_modifier')
51
+ end
52
+ end
53
+
54
+ ##
55
+ # Returns the polarity of the term if it exists.
56
+ #
57
+ # @return [String|NilClass]
58
+ #
59
+ def polarity
60
+ @polarity ||= if sentiment = node.xpath('sentiment').first
61
+ sentiment.get('polarity')
62
+ end
63
+ end
64
+
65
+ ##
66
+ # Returns the actual word ids that construct the lemma.
67
+ #
68
+ # @return [Array]
69
+ #
70
+ def target_ids
71
+ @target_ids ||= node.xpath('span/target').map {|target| target.get('id')}
72
+ end
73
+
74
+ ##
75
+ # Returns the strength of the term depending on its type.
76
+ #
77
+ # @return [Integer]
78
+ #
79
+ def strength
80
+ if polarity == "positive"
81
+ return 1
82
+ elsif polarity == "negative"
83
+ return -1
84
+ end
85
+
86
+ if is_intensifier?
87
+ return 2
88
+ elsif is_shifter?
89
+ return -1
90
+ end
91
+
92
+ return 0
93
+ end
94
+
95
+ ##
96
+ # Returns the sentence id that the term belongs to in the document.
97
+ #
98
+ # @return [String]
99
+ #
100
+ def get_sentence(document)
101
+ document
102
+ .xpath("KAF/text/wf[@wid='#{target_ids.first}']")
103
+ .first
104
+ .get('sent')
105
+ end
106
+
107
+ ##
108
+ # Checks if a term is an intensifier.
109
+ #
110
+ # @return [TrueClass|FalseClass]
111
+ #
112
+ def is_intensifier?
113
+ sentiment_modifier == "intensifier"
114
+ end
115
+
116
+ ##
117
+ # Checks if a term is a shifter.
118
+ #
119
+ # @return [TrueClass|FalseClass]
120
+ #
121
+ def is_shifter?
122
+ sentiment_modifier == "shifter"
123
+ end
124
+
125
+ ##
126
+ # Checks if a term is an expression.
127
+ #
128
+ # @return [TrueClass|FalseClass]
129
+ #
130
+ def is_expression?
131
+ use && !!polarity
132
+ end
133
+
134
+ ##
135
+ # Checks if a term is a conjunction.
136
+ #
137
+ # @return [TrueClass|FalseClass]
138
+ #
139
+ def is_conjunction?(language)
140
+ conjunctions[language].include?(lemma)
141
+ end
142
+
143
+ ##
144
+ # Map of conjunctions per language code
145
+ #
146
+ # @return [Hash]
147
+ #
148
+ def conjunctions
149
+ {
150
+ 'nl' => [',','en'],
151
+ 'en' => [',','and'],
152
+ 'es' => [',','y','e'],
153
+ 'it' => [',','e','ed'],
154
+ 'de' => [',','und'],
155
+ 'fr' => [',','et']
156
+ }
157
+ end
158
+ end # Term
159
+ end # OpinionDetectorBasic
160
+ end # Opener
@@ -1,5 +1,5 @@
1
1
  module Opener
2
2
  class OpinionDetectorBasic
3
- VERSION = '2.0.7'
3
+ VERSION = '3.0.0'
4
4
  end
5
5
  end
@@ -7,18 +7,14 @@ Gem::Specification.new do |gem|
7
7
  gem.summary = 'Basic Opinion Detector.'
8
8
  gem.description = gem.summary
9
9
  gem.homepage = 'http://opener-project.github.com/'
10
- gem.extensions = ['ext/hack/Rakefile']
11
10
  gem.license = 'Apache 2.0'
12
11
 
13
12
  gem.required_ruby_version = '>= 1.9.2'
14
-
13
+
15
14
  gem.files = Dir.glob([
16
- 'core/*',
17
- 'ext/**/*',
18
15
  'lib/**/*',
19
16
  'config.ru',
20
17
  '*.gemspec',
21
- '*_requirements.txt',
22
18
  'README.md',
23
19
  'LICENSE.txt',
24
20
  'exec/**/*',
@@ -30,12 +26,11 @@ Gem::Specification.new do |gem|
30
26
  gem.add_dependency 'opener-daemons', '~> 2.2'
31
27
  gem.add_dependency 'opener-webservice', '~> 2.1'
32
28
  gem.add_dependency 'opener-core', '~> 2.2'
33
-
34
- gem.add_dependency 'rake'
35
- gem.add_dependency 'nokogiri'
36
- gem.add_dependency 'cliver'
37
- gem.add_dependency 'slop', '~> 3.5'
29
+
30
+ gem.add_dependency 'oga'
38
31
 
39
32
  gem.add_development_dependency 'rspec', '~> 3.0'
40
33
  gem.add_development_dependency 'cucumber'
34
+ gem.add_development_dependency 'rake'
35
+ gem.add_development_dependency 'benchmark-ips', '~> 2.0'
41
36
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-opinion-detector-basic
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.7
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-17 00:00:00.000000000 Z
11
+ date: 2015-06-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -53,7 +53,7 @@ dependencies:
53
53
  prerelease: false
54
54
  type: :runtime
55
55
  - !ruby/object:Gem::Dependency
56
- name: rake
56
+ name: oga
57
57
  version_requirements: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - '>='
@@ -67,21 +67,21 @@ dependencies:
67
67
  prerelease: false
68
68
  type: :runtime
69
69
  - !ruby/object:Gem::Dependency
70
- name: nokogiri
70
+ name: rspec
71
71
  version_requirements: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - '>='
73
+ - - ~>
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: '3.0'
76
76
  requirement: !ruby/object:Gem::Requirement
77
77
  requirements:
78
- - - '>='
78
+ - - ~>
79
79
  - !ruby/object:Gem::Version
80
- version: '0'
80
+ version: '3.0'
81
81
  prerelease: false
82
- type: :runtime
82
+ type: :development
83
83
  - !ruby/object:Gem::Dependency
84
- name: cliver
84
+ name: cucumber
85
85
  version_requirements: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - '>='
@@ -93,47 +93,33 @@ dependencies:
93
93
  - !ruby/object:Gem::Version
94
94
  version: '0'
95
95
  prerelease: false
96
- type: :runtime
96
+ type: :development
97
97
  - !ruby/object:Gem::Dependency
98
- name: slop
98
+ name: rake
99
99
  version_requirements: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ~>
101
+ - - '>='
102
102
  - !ruby/object:Gem::Version
103
- version: '3.5'
103
+ version: '0'
104
104
  requirement: !ruby/object:Gem::Requirement
105
105
  requirements:
106
- - - ~>
106
+ - - '>='
107
107
  - !ruby/object:Gem::Version
108
- version: '3.5'
108
+ version: '0'
109
109
  prerelease: false
110
- type: :runtime
110
+ type: :development
111
111
  - !ruby/object:Gem::Dependency
112
- name: rspec
112
+ name: benchmark-ips
113
113
  version_requirements: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - ~>
116
116
  - !ruby/object:Gem::Version
117
- version: '3.0'
117
+ version: '2.0'
118
118
  requirement: !ruby/object:Gem::Requirement
119
119
  requirements:
120
120
  - - ~>
121
121
  - !ruby/object:Gem::Version
122
- version: '3.0'
123
- prerelease: false
124
- type: :development
125
- - !ruby/object:Gem::Dependency
126
- name: cucumber
127
- version_requirements: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - '>='
130
- - !ruby/object:Gem::Version
131
- version: '0'
132
- requirement: !ruby/object:Gem::Requirement
133
- requirements:
134
- - - '>='
135
- - !ruby/object:Gem::Version
136
- version: '0'
122
+ version: '2.0'
137
123
  prerelease: false
138
124
  type: :development
139
125
  description: Basic Opinion Detector.
@@ -142,21 +128,20 @@ executables:
142
128
  - opinion-detector-basic
143
129
  - opinion-detector-basic-daemon
144
130
  - opinion-detector-basic-server
145
- extensions:
146
- - ext/hack/Rakefile
131
+ extensions: []
147
132
  extra_rdoc_files: []
148
133
  files:
149
- - core/opinion_detector_basic_multi.py
150
- - ext/hack/Rakefile
151
134
  - lib/opener/opinion_detector_basic.rb
152
135
  - lib/opener/opinion_detector_basic/cli.rb
136
+ - lib/opener/opinion_detector_basic/opinion.rb
137
+ - lib/opener/opinion_detector_basic/processor.rb
153
138
  - lib/opener/opinion_detector_basic/server.rb
139
+ - lib/opener/opinion_detector_basic/term.rb
154
140
  - lib/opener/opinion_detector_basic/version.rb
155
141
  - lib/opener/opinion_detector_basic/public/markdown.css
156
142
  - lib/opener/opinion_detector_basic/views/index.erb
157
143
  - config.ru
158
144
  - opener-opinion-detector-basic.gemspec
159
- - pre_install_requirements.txt
160
145
  - README.md
161
146
  - LICENSE.txt
162
147
  - exec/opinion-detector-basic.rb
@@ -1,512 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- import sys
4
- import getopt
5
- import os
6
-
7
- this_folder = os.path.dirname(os.path.realpath(__file__))
8
-
9
- # This updates the load path to ensure that the local site-packages directory
10
- # can be used to load packages (e.g. a locally installed copy of lxml).
11
- sys.path.append(os.path.join(this_folder, 'site-packages/pre_install'))
12
-
13
- from VUKafParserPy import KafParser
14
- from collections import defaultdict
15
- import operator
16
- import pprint
17
- import lxml
18
- from lxml import etree
19
- import logging
20
-
21
-
22
-
23
- def mix_lists(l1,l2):
24
- newl=[]
25
- min_l = min(len(l1),len(l2))
26
- for x in range(min_l):
27
- newl.append(l1[x])
28
- newl.append(l2[x])
29
-
30
- if len(l1)>len(l2):
31
- newl.extend(l1[min_l:])
32
- elif len(l2)>len(l1):
33
- newl.extend(l2[min_l:])
34
- return newl
35
-
36
-
37
- class OpinionExpression:
38
- def __init__(self,spans,sentence,value):
39
- self.ids = spans
40
- self.sentence = sentence
41
- self.value = value
42
- self.target_ids = []
43
- self.candidates_r=[]
44
- self.candidates_l=[]
45
- self.holder = []
46
-
47
- def __repr__(self):
48
- r='Ids:'+'#'.join(self.ids)+' Sent:'+self.sentence+' Value:'+str(self.value)+ ' Target:'+'#'.join(self.target_ids)+'\n'
49
- r+='Right cand: '+str(self.candidates_r)+'\n'
50
- r+='Left cand: '+str(self.candidates_l)+'\n'
51
- return r
52
-
53
- class MyToken:
54
- def __init__(self,id,lemma,pos,polarity,sent_mod,sent):
55
- self.id = id
56
- self.lemma = lemma
57
- self.pos = pos
58
- self.polarity = polarity
59
- self.sent_mod = sent_mod
60
- self.sentence = sent
61
- self.use_it = True
62
- self.list_ids = [id]
63
- self.value = 0
64
-
65
-
66
- if polarity == 'positive':
67
- self.value = 1
68
- elif polarity == 'negative':
69
- self.value = -1
70
-
71
- if sent_mod == 'intensifier':
72
- self.value = 2
73
- elif sent_mod == 'shifter':
74
- self.value = -1
75
-
76
-
77
- def isNegator(self):
78
- return self.sent_mod == 'shifter'
79
-
80
-
81
-
82
- def isIntensifier(self):
83
- return self.sent_mod == 'intensifier'
84
-
85
-
86
- def is_opinion_expression(self):
87
- return self.use_it and self.polarity is not None
88
-
89
-
90
- def __repr__(self):
91
- if self.use_it:
92
- return self.id+' lemma:'+self.lemma.encode('utf-8')+'.'+self.pos.encode('utf-8')+' pol:'+str(self.polarity)+' sentmod:'+str(self.sent_mod)+' sent:'+self.sentence+' use:'+str(self.use_it)+' list:'+'#'.join(self.list_ids)+' val:'+str(self.value)
93
- else:
94
- return '\t'+self.id+' lemma:'+self.lemma.encode('utf-8')+'.'+self.pos.encode('utf-8')+' pol:'+str(self.polarity)+' sentmod:'+str(self.sent_mod)+' sent:'+self.sentence+' use:'+str(self.use_it)+' list:'+'#'.join(self.list_ids)+' val:'+str(self.value)
95
-
96
-
97
-
98
- def obtain_opinion_expressions(tokens,lang='nl'):
99
- logging.debug(' Obtaining opinion expressions')
100
- my_tokens = tokens[:]
101
-
102
- accumulate_several_modifiers = True
103
- apply_modifiers = True
104
- apply_conjunctions = True
105
-
106
- ## Acumulate doble/triple intensifiers or negators
107
- if accumulate_several_modifiers:
108
- logging.debug(' Accumulating modifiers')
109
- t = 0
110
- while t < len(my_tokens):
111
- if t+1 < len(my_tokens):
112
- if (my_tokens[t].isNegator() or my_tokens[t].isIntensifier()) and my_tokens[t+1].isNegator():
113
- my_tokens[t+1].value *= my_tokens[t].value
114
- my_tokens[t].use_it = False
115
- my_tokens[t+1].list_ids += my_tokens[t].list_ids
116
- logging.debug(' Accumulating '+'-'.join(my_tokens[t+1].list_ids))
117
- elif my_tokens[t].isNegator() and my_tokens[t+1].isIntensifier():
118
- my_tokens[t+1].value *= -1
119
- my_tokens[t].use_it = False
120
- my_tokens[t+1].list_ids += my_tokens[t].list_ids
121
- logging.debug(' Accumulating '+'-'.join(my_tokens[t+1].list_ids))
122
- elif my_tokens[t].isIntensifier() and my_tokens[t+1].isIntensifier():
123
- if my_tokens[t].value >= 0:
124
- my_tokens[t+1].value = my_tokens[t].value + my_tokens[t+1].value
125
- else:
126
- my_tokens[t+1].value = my_tokens[t].value - my_tokens[t+1].value
127
- my_tokens[t].use_it = False
128
- my_tokens[t+1].list_ids += my_tokens[t].list_ids
129
- logging.debug(' Accumulating '+'-'.join(my_tokens[t+1].list_ids))
130
-
131
- t+=1
132
- ###########################################
133
-
134
- ##Apply intensifiers/negators over the next elements
135
- if apply_modifiers:
136
- logging.debug(' Applying modifiers')
137
- t = 0
138
- while t < len(my_tokens):
139
- if my_tokens[t].use_it and (my_tokens[t].isNegator() or my_tokens[t].isIntensifier()):
140
- ## Try to modify the next token:
141
- if t+1<len(my_tokens):
142
- #print 'Score: ',my_tokens[t]
143
- my_tokens[t+1].value *= my_tokens[t].value
144
- my_tokens[t+1].list_ids += my_tokens[t].list_ids
145
- my_tokens[t].use_it = False
146
- logging.debug(' Applied modifier over '+'-'.join(my_tokens[t+1].list_ids))
147
- t += 1
148
- ###########################################
149
-
150
- if apply_conjunctions:
151
- if lang=='nl':
152
- concat = [',','en']
153
- elif lang=='en':
154
- concat = [',','and']
155
- elif lang=='es':
156
- concat = [',','y','e']
157
- elif lang=='it':
158
- concat = [',','e','ed']
159
- elif lang=='de':
160
- concat = [',','und']
161
- elif lang == 'fr':
162
- concat=[',','et']
163
- logging.debug(' Applying conjunctions:'+str(concat))
164
-
165
-
166
- t = 0
167
- while t < len(my_tokens):
168
- if my_tokens[t].use_it and my_tokens[t].value!=0: ## Find the first one
169
- #print 'FOUND ',my_tokens[t]
170
- logging.debug(' Found token '+str(my_tokens[t]))
171
- list_aux = my_tokens[t].list_ids
172
- used = [t]
173
- value_aux = my_tokens[t].value
174
- my_tokens[t].use_it = False
175
- #print 'Modified',my_tokens[t]
176
-
177
- x = t+1
178
- while True:
179
- if x>=len(my_tokens):
180
- break
181
-
182
- if my_tokens[x].lemma in concat:
183
- ## list_aux += my_tokens[x].list_ids Dont use it as part of the OE
184
- my_tokens[x].use_it = False
185
- x+=1
186
- elif (my_tokens[x].use_it and my_tokens[x].value!=0):
187
- #print '\Also ',my_tokens[x]
188
- logging.debug(' Found token '+str(my_tokens[x]))
189
- list_aux += my_tokens[x].list_ids
190
-
191
- used.append(x)
192
- my_tokens[x].use_it = False
193
- value_aux += my_tokens[x].value
194
- x += 1
195
- else:
196
- break
197
- #print 'OUT OF THE WHILE'
198
- ##The last one in the list used is the one accumulating all
199
-
200
- last_pos = used[-1]
201
- my_tokens[last_pos].value = value_aux
202
- my_tokens[last_pos].list_ids = list_aux
203
- my_tokens[last_pos].use_it = True
204
- logging.debug(' Regenerating '+str(my_tokens[last_pos]))
205
- t = x ## next token
206
- #print
207
- #print
208
- t += 1
209
-
210
-
211
- ## Create OpinionExpression
212
- my_opinion_exps = []
213
- logging.debug(' Generating output')
214
- for token in my_tokens:
215
- if token.use_it and token.value != 0:
216
- op_exp = OpinionExpression(token.list_ids,token.sentence,token.value)
217
- my_opinion_exps.append(op_exp)
218
- return my_opinion_exps
219
-
220
-
221
- '''
222
- def get_distance(id1, id2):
223
- pos1 = int(id1[id1.find('_')+1:])
224
- pos2 = int(id2[id2.find('_')+1:])
225
- if pos1>pos2:
226
- return pos1-pos2
227
- else:
228
- return pos2-pos1
229
- '''
230
-
231
-
232
- def obtain_holders(ops_exps,sentences,lang):
233
- if lang=='nl':
234
- holders = ['ik','we','wij','ze','zij','jullie','u','hij','het','jij','je','mij','me','hem','haar','ons','hen','hun']
235
- elif lang=='en':
236
- holders = ['i','we','he','she','they','it','you']
237
- elif lang =='es':
238
- holders = ['yo','tu','nosotros','vosotros','ellos','ellas','nosotras','vosotras']
239
- elif lang =='it':
240
- holders = ['io','tu','noi','voi','loro','lei','lui']
241
- elif lang == 'de':
242
- holders = ['ich','du','wir','ihr','sie','er']
243
- elif lang == 'fr':
244
- holders = ['je','tu','lui','elle','nous','vous','ils','elles']
245
-
246
- logging.debug('Obtaining holders with list: '+str(holders))
247
-
248
- for oe in ops_exps:
249
- sent = oe.sentence
250
- list_terms = sentences[str(sent)]
251
- for lemma, pos, term_id in list_terms:
252
- if lemma in holders:
253
- oe.holder.append(term_id)
254
- logging.debug(' Selected for '+str(oe)+' holder'+lemma+' '+term_id)
255
- break
256
-
257
-
258
-
259
-
260
- #This is specific for the basic version
261
- def filter_candidates(candidates,ids_oe):
262
- ##filtered = [(lemma, pos,term_id) for (lemma,pos, term_id) in candidates if len(lemma)>=4 and term_id not in ids_oe]
263
- filtered = [(lemma,pos,id) for (lemma,pos,id) in candidates if pos in ['N','R']]
264
- return filtered
265
-
266
- def obtain_targets_improved(ops_exps,sentences):
267
- logging.debug(' Obtaining targets improved')
268
- #print>>sys.stderr,'#'*40
269
- #print>>sys.stderr,'#'*40
270
-
271
- #print>>sys.stderr,'Beginning with obtain targets'
272
- ##sentences --> dict [str(numsent)] ==> list of (lemma, term)id
273
-
274
- all_ids_in_oe = []
275
- for oe in ops_exps:
276
- all_ids_in_oe.extend(oe.ids)
277
- #print>>sys.stderr,'All list of ids in oe',all_ids_in_oe
278
-
279
- for oe in ops_exps:
280
- #print>>sys.stderr,'\tOE:',oe
281
- logging.debug(' OpExp: '+str(oe))
282
-
283
- ids_in_oe = oe.ids
284
- sent = oe.sentence
285
- list_terms = sentences[str(sent)]
286
- #print>>sys.stderr,'\t\tTerms in sent:',list_terms
287
-
288
- ###########################################
289
- #First rule: noun to the right within maxdistance tokens
290
- max_distance_right = 3
291
- biggest_index = -1
292
- for idx, (lemma,pos,term_id) in enumerate(list_terms):
293
- if term_id in ids_in_oe:
294
- biggest_index = idx
295
-
296
- #print>>sys.stderr,'\t\tBI',biggest_index
297
- if biggest_index+1 >= len(list_terms): ## is the last element and we shall skip it
298
- #print>>sys.stderr,'\t\tNot possible to apply 1st rule'
299
- pass
300
- else:
301
- candidates=list_terms[biggest_index+1:min(biggest_index+1+max_distance_right,len(list_terms))]
302
- ##Filter candidates
303
- #print>>sys.stderr,'\t\tCandidates for right rule no filter',candidates
304
- #oe.__candidates_right = [(lemma, term_id) for (lemma, term_id) in candidates if len(lemma)>=4 and term_id not in all_ids_in_oe]
305
- oe.candidates_r = filter_candidates(candidates,all_ids_in_oe)
306
- logging.debug(' Candidates filtered right'+str(oe.candidates_r))
307
- #print>>sys.stderr,'\t\tCandidates for right rule no filter',oe.__candidates_right
308
-
309
- ######################################################################################
310
-
311
-
312
- ###########################################
313
- max_distance_left = 3
314
- smallest_index = 0
315
- for idx,(lemma,pos,term_id) in enumerate(list_terms):
316
- if term_id in ids_in_oe:
317
- smallest_index = idx
318
- break
319
- #print>>sys.stderr,'Smalles index:',smallest_index
320
- if smallest_index == 0:
321
- #print>>sys.stderr,'\t\tNot possible to apply left rule'
322
- pass
323
- else:
324
- candidates = list_terms[max(0,smallest_index-1-max_distance_left):smallest_index]
325
- ##Filter candidates
326
- #print>>sys.stderr,'\t\tCandidates for left rule no filter',candidates
327
-
328
- oe.candidates_l = filter_candidates(candidates,all_ids_in_oe)
329
- logging.debug(' Candidates filtered left: '+str(oe.candidates_l))
330
-
331
- ######################################################################################
332
-
333
- #print>>sys.stderr,'#'*40
334
- #print>>sys.stderr,'#'*40
335
-
336
- ## filling or.target_ids
337
- assigned_as_targets = []
338
-
339
- # First we assing to all the first in the right, if any, and not assigned
340
- logging.debug(' Applying first to the right rule')
341
- for oe in ops_exps:
342
- #print>>sys.stderr,'A ver ',oe
343
- if len(oe.candidates_r) !=0:
344
- lemma, pos, id = oe.candidates_r[0]
345
- if id not in assigned_as_targets:
346
- oe.target_ids.append(id)
347
- ###assigned_as_targets.append(id) #Uncomment to avoid selection of the same target moe than once
348
- logging.debug(' OpExp '+str(oe)+' selected '+id)
349
- #print>>sys.stderr,'Asignamos',id
350
-
351
- logging.debug(' Applying most close rule')
352
- for oe in ops_exps:
353
- if len(oe.target_ids) == 0: # otherwise it's solved
354
- intercalados_list = mix_lists([id for _,_,id in oe.candidates_r],[id for _,_,id in oe.candidates_l])
355
- for id in intercalados_list:
356
- if id not in assigned_as_targets:
357
- oe.target_ids.append(id)
358
- ###assigned_as_targets.append(id) #Uncomment to avoid selection of the same target moe than once
359
- logging.debug(' OpExp '+str(oe)+' selected '+id)
360
- break
361
-
362
- ######## MAIN ROUTINE ############
363
-
364
- ## Check if we are reading from a pipeline
365
- if sys.stdin.isatty():
366
- print>>sys.stderr,'Input stream required.'
367
- print>>sys.stderr,'Example usage: cat myUTF8file.kaf.xml |',sys.argv[0]
368
- sys.exit(-1)
369
- ########################################
370
-
371
- logging.basicConfig(stream=sys.stderr,format='%(asctime)s - %(levelname)s - %(message)s',level=logging.DEBUG)
372
-
373
- ## Processing the parameters
374
- my_time_stamp = True
375
- remove_opinions = True
376
- opinion_strength = True
377
- try:
378
- opts, args = getopt.getopt(sys.argv[1:],"",["no-time","no-remove-opinions","no-opinion-strength"])
379
- for opt, arg in opts:
380
- if opt == "--no-time":
381
- my_time_stamp = False
382
- elif opt == "--no-remove-opinions":
383
- remove_opinions = False
384
- elif opt == "--no-opinion-strength":
385
- opinion_strength = False
386
- except getopt.GetoptError:
387
- pass
388
- #########################################
389
-
390
- logging.debug('Include timestamp: '+str(my_time_stamp))
391
-
392
- # Parsing the KAF file
393
- try:
394
- my_kaf_tree = KafParser(sys.stdin)
395
- except Exception as e:
396
- print>>sys.stderr,'Error parsing input'
397
- print>>sys.stderr,'Stream input must be a valid KAF file'
398
- print>>sys.stderr,'Error: ',str(e)
399
- sys.exit(-1)
400
-
401
-
402
- lang = my_kaf_tree.getLanguage()
403
- ## Creating data structure
404
- sentences = defaultdict(list)
405
- my_tokens = []
406
-
407
-
408
- # CREATE the datastructure for the tokens
409
- n=0
410
- lemma_for_tid = {}
411
- for term in my_kaf_tree.getTerms():
412
- n+=1
413
- term_id = term.getId()
414
- lemma = term.getLemma()
415
- lemma_for_tid[term_id] = lemma
416
- kaf_pos = term.getPos()
417
- #print>>sys.stderr,kaf_pos
418
- list_span = term.get_list_span() ## List of token ids in the span layer of the term
419
- sentiment = term.getSentiment()
420
- polarity = sent_mod = None
421
- if sentiment is not None:
422
- polarity = sentiment.getPolarity()
423
- sent_mod = sentiment.getSentimentModifier()
424
- sentence = my_kaf_tree.getToken(list_span[0]).get('sent') ## The sentence of the first token element in span
425
- my_tokens.append(MyToken(term_id,lemma,kaf_pos,polarity,sent_mod,sentence))
426
-
427
- sentences[str(sentence)].append((lemma,kaf_pos,term_id))
428
- #############################
429
-
430
- logging.debug('Num terms loaded: '+str(n))
431
- logging.debug('Num sentences: '+str(len(sentences)))
432
-
433
-
434
- logging.debug('Obtaining opinion expressions')
435
- my_ops_exps = obtain_opinion_expressions(my_tokens,lang)
436
- print>>sys.stderr,my_ops_exps
437
-
438
- logging.debug('Obtaining targets')
439
- obtain_targets_improved(my_ops_exps,sentences)
440
-
441
-
442
- logging.debug('Obtaining holders')
443
- obtain_holders(my_ops_exps,sentences,lang)
444
-
445
-
446
-
447
-
448
- ## Create the elements
449
- logging.debug('Generating KAF output')
450
-
451
- if remove_opinions:
452
- my_kaf_tree.remove_opinion_layer()
453
-
454
- for oe in my_ops_exps:
455
- op_ele = etree.Element('opinion')
456
-
457
- ## Holder
458
- if len(oe.holder)!=0:
459
- oe.holder.sort()
460
- c = ' '.join(lemma_for_tid[tid] for tid in oe.holder)
461
- op_hol = etree.Element('opinion_holder')
462
- op_hol.append(etree.Comment(c))
463
- op_ele.append(op_hol)
464
- span_op_hol = etree.Element('span')
465
- op_hol.append(span_op_hol)
466
- for id in oe.holder:
467
- span_op_hol.append(etree.Element('target',attrib={'id':id}))
468
-
469
- ## Target
470
- op_tar = etree.Element('opinion_target')
471
- op_ele.append(op_tar)
472
-
473
-
474
- if len(oe.target_ids)!=0: ## if there are no targets, there is no opinion eleemnt
475
- oe.target_ids.sort()
476
- c = ' '.join(lemma_for_tid[tid] for tid in oe.target_ids)
477
- op_tar.append(etree.Comment(c))
478
- span_op_tar = etree.Element('span')
479
- op_tar.append(span_op_tar)
480
- for id in oe.target_ids:
481
- span_op_tar.append(etree.Element('target',attrib={'id':id}))
482
-
483
- #Expression
484
- if oe.value > 0: pol = 'positive'
485
- elif oe.value < 0: pol = 'negative'
486
- else: pol = 'neutral'
487
-
488
- op_exp = etree.Element('opinion_expression')
489
- op_exp.set('polarity',pol)
490
- if opinion_strength:
491
- op_exp.set('strength',str(oe.value))
492
-
493
- op_ele.append(op_exp)
494
- oe.ids.sort()
495
- c = ' '.join(lemma_for_tid[tid] for tid in oe.ids)
496
- op_exp.append(etree.Comment(c))
497
- span_exp = etree.Element('span')
498
- op_exp.append(span_exp)
499
- for id in oe.ids:
500
- span_exp.append(etree.Element('target',attrib={'id':id}))
501
-
502
- ##Append the op_ele to the opinions layer
503
- my_kaf_tree.addElementToLayer('opinions', op_ele)
504
-
505
-
506
- my_kaf_tree.addLinguisticProcessor('Basic opinion detector with Pos','1.0','opinions', my_time_stamp)
507
- my_kaf_tree.saveToFile(sys.stdout)
508
- logging.debug('Process finished')
509
-
510
-
511
-
512
-