opener-coreference-base 2.0.2 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,60 +0,0 @@
1
- # coding=utf-8
2
- __author__ = 'Valeria Quochi <valeria.quochi@ilc.cnr.it>'
3
- __date__= '5/16/2013'
4
-
5
-
6
- # Clauses
7
-
8
- """ simple declarative clause, i.e. one that is not introduced by a (possible empty) subordinating conjunction or a
9
- wh-word and that does not exhibit subject-verb inversion"""
10
- simple = "S"
11
-
12
- """Clause introduced by a (possibly empty) subordinating conjunction."""
13
- subordinated = "SBAR"
14
-
15
- """Direct question introduced by a wh-word or a wh-phrase. Indirect questions and relative clauses should be
16
- bracketed as SBAR, not SBARQ."""
17
- #direct_question = "SBARQ"
18
-
19
- """Inverted declarative sentence, i.e. one in which the subject follows the tensed verb or modal."""
20
- #inverted_declarative = "SINV"
21
-
22
- """Inverted yes/no question, or main clause of a wh-question, following the wh-phrase in SBARQ."""
23
- #inverted_question = "SQ"
24
-
25
- #clauses = (simple, subordinated, direct_question, inverted_declarative, inverted_question)
26
- clauses = (simple, subordinated, inverted_declarative, inverted_question)
27
-
28
-
29
- # Phrases
30
-
31
- noun_phrase = "NP"
32
- #wh_noun_phrase = "WHNP"
33
-
34
- adjetival_phrase = "ADJP"
35
- adverb_phrase = "ADVP"
36
- conjuntion_phrase = "CONJ"
37
- #fragment = "FRAG"
38
- #interjection = "INTJ"
39
- #list_marker = "LST"
40
- #not_a_constituent = "NAC"
41
- #noun_phrase_mark = "NX"
42
- prepositional_phrase = "PP"
43
- parenthetical = "PRN"
44
- #particle = "PRT"
45
- #quantifier_phrase = "QP"
46
- #reduced_relative_clause = "RRC"
47
- #unlike_coordinated_phrase = "UCP"
48
- verb_phrase = "VP"
49
- #wh_adjective_phrase = "WHADJP"
50
- #wh_adverb = "WHAVP"
51
- #wh_prepositional_phrase = "WHPP"
52
- #unknown = "X"
53
-
54
-
55
- #phrases = (noun_phrase, """wh_noun_phrase,""", adjetival_phrase, adverb_phrase, conjuntion_phrase, """fragment, interjection, list_marker,
56
- # not_a_constituent, noun_phrase_mark,""" prepositional_phrase, parenthetical, """particle, quantifier_phrase,
57
- # reduced_relative_clause, unlike_coordinated_phrase,""" verb_phrase, """wh_adjective_phrase, wh_adverb,
58
- # wh_prepositional_phrase, unknown""")
59
-
60
- phrases = (noun_phrase, adjetival_phrase, adverb_phrase, conjuntion_phrase, prepositional_phrase, parenthetical, verb_phrase)
@@ -1,97 +0,0 @@
1
- # coding=utf-8
2
- __author__ = 'Valeria Quochi <valeria.quochi@ilc.cnr.it>'
3
- __date__ = '5/16/2013'
4
-
5
- personal_pronoun = "PRO~PE"
6
- possessive_pronoun = "PRO~PO"
7
- wh_pronoun = "PRO~RE" #in TUT there are distinct tags for relative and interrogative pronouns. Here only relative pro are given
8
- #wh_possessive_pronoun = "WP$"
9
-
10
- noun = "NOU~CS" #if we add gender features to the tags in TUT we may need to change here
11
- noun_plural = "NOU~CP" #if we add gender features to the tags in TUT we may need to change here
12
-
13
- proper_noun = "NOU~PR"
14
- #proper_noun_plural = "NNPS"
15
-
16
- personal_pronouns = (personal_pronoun, possessive_pronoun)
17
- #relative_pronouns = (wh_pronoun, wh_possessive_pronoun)
18
- relative_pronouns = (wh_pronoun)
19
-
20
- #pronouns = (personal_pronoun, possessive_pronoun, wh_pronoun, wh_possessive_pronoun)
21
- pronouns = (personal_pronoun, possessive_pronoun, wh_pronoun)
22
-
23
- nouns = (noun, noun_plural)
24
- all_nouns = (noun, noun_plural, proper_noun)
25
- #proper_nouns = (proper_noun, proper_noun_plural)
26
- #proper_nouns = (proper_noun)
27
-
28
- #all_nouns = nouns + proper_nouns
29
-
30
-
31
- #adjective = "JJ"
32
- #adjective_comparative = "JJR"
33
- #adjective_superlative = "JJS"
34
- #adjetives = (adjective, adjective_comparative, adjective_superlative)
35
-
36
- """Adjectives
37
- TUT does not have tags for comparative and superlative, but make finer distinctions for the different kinds of adjectives:
38
- ADJ~QU : qualifying adjective
39
- ADJ~OR : ordinal adjective
40
- ADJ~IN : indefinite adjective
41
- ADJ~DE : demonstrative adjective
42
- ADJ~PO : possessive adjective
43
- ADJ~DI : deictic adjective
44
- ADJ~IR : interrogative adjective
45
- ADJ~EX : exclamative adjective
46
-
47
- following lines modified accordingly
48
- TODO check correctedness
49
- """
50
- adjective_qualif = "ADJ~QU"
51
- adjective_ord = "ADJ~OR"
52
- adjective_indef = "ADJ~IN"
53
- adjective_dem = "ADJ~DE"
54
- adjective_poss = "ADJ~PO"
55
- adjective_deitt = "ADJ~DI"
56
- adjective_interr = "ADJ~IR"
57
- adjective_excl = "ADJ~EX"
58
-
59
- adjetives = (adjective_qualif, adjective_ord, adjective_indef, adjective_dem, adjective_poss, adjective_deitt, adjective_interr, adjective_excl)
60
-
61
- mod_forms = nouns, adjetives
62
-
63
- conjuntion = "CONJ"
64
-
65
- """
66
- CC - Coordinating conjunction
67
- CD - Cardinal number
68
- DT - Determiner
69
- EX - Existential there
70
- FW - Foreign word
71
- IN - Preposition or subordinating conjunction
72
- JJ - Adjective
73
- JJR - Adjective, comparative
74
- JJS - Adjective, superlative
75
- LS - List item marker
76
- MD - Modal
77
- NN - Noun, singular or mass
78
- NNS - Noun, plural
79
- NNPS - Proper noun, plural
80
- PDT - Predeterminer
81
- POS - Possessive ending
82
- RB - Adverb
83
- RBR - Adverb, comparative
84
- RBS - Adverb, superlative
85
- RP - Particle
86
- SYM - Symbol
87
- TO - to
88
- UH - Interjection
89
- VB - Verb, base form
90
- VBD - Verb, past tense
91
- VBG - Verb, gerund or present participle
92
- VBN - Verb, past participle
93
- VBP - Verb, non-3rd person singular present
94
- VBZ - Verb, 3rd person singular present
95
- WDT - Wh-determiner
96
- WRB - Wh-adverb
97
- """
@@ -1,165 +0,0 @@
1
- class KafTermSentiment:
2
- def __init__(self):
3
- self.resource=None
4
- self.polarity=None
5
- self.strength=None
6
- self.subjectivity=None
7
-
8
- def simpleInit(self,r,p,st,su,sm=None):
9
- self.resource=r
10
- self.polarity=p
11
- self.strength=st
12
- self.subjectivity=su
13
- self.sentiment_modifier = sm
14
-
15
- def getPolarity(self):
16
- return self.polarity
17
-
18
- def getSentimentModifier(self):
19
- return self.sentiment_modifier
20
-
21
-
22
- class KafToken:
23
- def __init__(self,wid, value, sent=None, para=None):
24
- self.token_id = wid
25
- self.value = value
26
- self.sent = sent
27
- self.para = para
28
-
29
-
30
- class KafOpinionExpression:
31
- def __init__(self,polarity,strength,targets):
32
- self.polarity = polarity
33
- self.strength = strength
34
- self.targets = targets
35
-
36
- def __str__(self):
37
- return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
38
-
39
- class KafOpinion:
40
- def __init__(self,id,holders, targets, opi_exp):
41
- self.id = id
42
- self.holders = holders
43
- self.targets = targets
44
- self.opi_exp = opi_exp
45
-
46
- def __str__(self):
47
- c='Opinion id'+self.id+'\n'
48
- c+=' Holders: '+'-'.join(self.holders)+'\n'
49
- c+=' Targets: '+'-'.join(self.targets)+'\n'
50
- c+=str(self.opi_exp)
51
- return c
52
-
53
-
54
-
55
- class KafSingleProperty:
56
- def __init__(self,id,type,targets):
57
- self.id = id
58
- self.type = type
59
- self.targets = targets
60
-
61
-
62
- def get_id(self):
63
- return self.id
64
-
65
- def get_type(self):
66
- return self.type
67
-
68
- def get_span(self):
69
- return self.targets
70
-
71
- def __str__(self):
72
- return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
73
-
74
-
75
- class KafSingleEntity:
76
- def __init__(self,id,type,targets):
77
- self.id = id
78
- self.type = type
79
- self.targets = targets
80
-
81
- def get_id(self):
82
- return self.id
83
-
84
- def get_type(self):
85
- return self.type
86
-
87
- def get_span(self):
88
- return self.targets
89
-
90
- def __str__(self):
91
- return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
92
-
93
- class KafTerm:
94
- def __init__(self):
95
- self.tid = None
96
- self.lemma = None
97
- self.pos = None
98
- self.morphofeat = None
99
- self.sentiment = None
100
- self.list_span_id = []
101
-
102
- def get_morphofeat(self):
103
- return self.morphofeat
104
-
105
- def set_list_span_id(self, L):
106
- self.list_span_id = L
107
-
108
- def get_list_span(self):
109
- return self.list_span_id
110
-
111
- def get_polarity(self):
112
- if self.sentiment != None:
113
- return self.sentiment.getPolarity()
114
- else:
115
- return None
116
-
117
- def get_sentiment_modifier(self):
118
- if self.sentiment != None:
119
- return self.sentiment.getSentimentModifier()
120
- else:
121
- return None
122
-
123
-
124
- def setSentiment(self,my_sent):
125
- self.sentiment = my_sent
126
-
127
- def getSentiment(self):
128
- return self.sentiment
129
-
130
- def getLemma(self):
131
- return self.lemma
132
-
133
- def setLemma(self,lemma):
134
- self.lemma = lemma
135
-
136
- def getPos(self):
137
- return self.pos
138
-
139
- def setPos(self,pos):
140
- self.pos = pos
141
-
142
- def getId(self):
143
- return self.tid
144
-
145
- def setId(self,id):
146
- self.tid = id
147
-
148
- def getShortPos(self):
149
- if self.pos==None:
150
- return None
151
- auxpos=self.pos.lower()[0]
152
- if auxpos == 'g': auxpos='a'
153
- elif auxpos == 'a': auxpos='r'
154
- return auxpos
155
-
156
- def __str__(self):
157
- if self.tid and self.lemma and self.pos:
158
- return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
159
- else:
160
- return 'None'
161
-
162
-
163
-
164
-
165
-
@@ -1,436 +0,0 @@
1
- ########################################################################
2
- # 14 Jan 2013: added function add_attrs_to_layer
3
- ########################################################################
4
-
5
- ###################
6
- # List of changes #
7
- ###################
8
- # 14 Jan 2013: added function add_attrs_to_layer
9
- # 27 Feb 2013: added code for comply with DTD
10
- # 18 Jun 2013: getSingleProperties adapted to the structure KAF/features/properties/property/references/span/target
11
- # 18 Jun 2013: funcion add_property created for adding the properties to the KAF
12
-
13
-
14
- from lxml import etree
15
- from KafDataObjectsMod import *
16
- import time
17
-
18
- class KafParser:
19
- def __init__(self,filename=None):
20
- self.tree=None
21
- self.__pathForToken={}
22
- self.__term_ids_for_token_id = None
23
-
24
- if filename:
25
- self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
26
- ## Do the text tokenization
27
- self.__textTokenization()
28
- else:
29
- root = etree.Element('KAF')
30
- root.set('version','v1.opener')
31
- root.set('{http://www.w3.org/XML/1998/namespace}lang','en')
32
- self.tree = etree.ElementTree(element=root)
33
-
34
- def __textTokenization(self):
35
- for wf in self.tree.findall('text/wf'):
36
- wid = wf.get('wid')
37
- self.__pathForToken[wid] = self.tree.getpath(wf)
38
-
39
-
40
- def getToken(self,tid):
41
- path = self.__pathForToken[tid]
42
- return self.tree.xpath(self.__pathForToken[tid])[0]
43
-
44
-
45
- def getLanguage(self):
46
- lang = self.tree.getroot().get('{http://www.w3.org/XML/1998/namespace}lang','nl')
47
- return lang
48
-
49
- ## Return a list of (sentence_id, TOKENS) where tokens is a list of (token_id,token)
50
- ## [(s_id1, T1), (sent_id2, T2)....]
51
- ## T1 --> [(tokenid, token), (tokenid2,token2)....]
52
- def get_tokens_in_sentences(self):
53
- sents = []
54
- current = []
55
- previous_sent = None
56
- for element in self.tree.findall('text/wf'):
57
- w_id = element.get('wid')
58
- s_id = element.get('sent')
59
- word = element.text
60
-
61
- if previous_sent is not None and s_id != previous_sent:
62
- sents.append((previous_sent,current))
63
- current = []
64
- current.append((w_id,word))
65
- previous_sent = s_id
66
- ####
67
- sents.append((s_id,current))
68
- return sents
69
-
70
- def get_term_ids_for_token_id(self,tok_id):
71
- if self.__term_ids_for_token_id is None:
72
- self.__term_ids_for_token_id = {}
73
- for element in self.tree.findall('terms/term'):
74
- term_id = element.get('tid')
75
- for target in element.findall('span/target'):
76
- token_id = target.get('id')
77
- if token_id not in self.__term_ids_for_token_id:
78
- self.__term_ids_for_token_id[token_id] = [term_id]
79
- else:
80
- self.__term_ids_for_token_id[token_id].append(term_id)
81
- return self.__term_ids_for_token_id.get(tok_id,[])
82
-
83
-
84
-
85
- def getTokens(self):
86
- for element in self.tree.findall('text/wf'):
87
- w_id = element.get('wid')
88
- s_id = element.get('sent','0')
89
- word = element.text
90
- yield (word, s_id, w_id)
91
-
92
-
93
-
94
- def getTerms(self):
95
- if self.tree:
96
- for element in self.tree.findall('terms/term'):
97
- kafTermObj = KafTerm()
98
- kafTermObj.setId(element.get('tid'))
99
- kafTermObj.setLemma(element.get('lemma'))
100
- kafTermObj.setPos(element.get('pos'))
101
- kafTermObj.morphofeat = element.get('morphofeat')
102
-
103
- ## Parsing sentiment
104
- sentiment = element.find('sentiment')
105
- if sentiment is not None:
106
- resource = sentiment.get('resource','')
107
- polarity = sentiment.get('polarity',None)
108
- strength = sentiment.get('strength','')
109
- subjectivity = sentiment.get('subjectivity','')
110
- sentiment_modifier = sentiment.get('sentiment_modifier')
111
-
112
- my_sent = KafTermSentiment()
113
- my_sent.simpleInit(resource,polarity,strength,subjectivity,sentiment_modifier)
114
- kafTermObj.setSentiment(my_sent)
115
-
116
- ## Parsing the span
117
- span = element.find('span')
118
- if span is not None:
119
- list_ids = [target.get('id') for target in span.findall('target')]
120
- kafTermObj.set_list_span_id(list_ids)
121
-
122
-
123
- yield kafTermObj
124
- else:
125
- return
126
-
127
-
128
- def getSentimentTriples(self):
129
- data = []
130
- if self.tree:
131
- for term_element in self.tree.findall('terms/term'):
132
- lemma = term_element.get('lemma')
133
- polarity = None
134
- sentiment_modifier = None
135
-
136
- sentiment_element = term_element.find('sentiment')
137
- if sentiment_element is not None:
138
- polarity = sentiment_element.get('polarity',None)
139
- sentiment_modifier = sentiment_element.get('sentiment_modifier')
140
- data.append( (lemma,polarity,sentiment_modifier))
141
- return data
142
-
143
-
144
-
145
- def addPolarityToTerm(self,termid,my_sentiment_attribs,polarity_pos=None):
146
- if self.tree:
147
- for element in self.tree.find('terms'):
148
- if element.get('tid','')==termid:
149
-
150
- #In case there is no pos info, we use the polarityPos
151
- if not element.get('pos') and polarity_pos is not None:
152
- element.set('pos',polarity_pos)
153
- sentEle = etree.Element('sentiment',attrib=my_sentiment_attribs)
154
- element.append(sentEle)
155
-
156
- def saveToFile(self,filename,myencoding='UTF-8'):
157
- if self.tree:
158
- self.tree.write(filename,encoding=myencoding,pretty_print=True,xml_declaration=True)
159
-
160
-
161
- def addLinguisticProcessor(self,name,version, layer, time_stamp=True):
162
- aux = self.tree.findall('kafHeader')
163
- if len(aux)!=0:
164
- kaf_header = aux[0]
165
- else:
166
- kaf_header = etree.Element('kafHeader')
167
- self.tree.getroot().insert(0,kaf_header)
168
-
169
- aux2= kaf_header.findall('linguisticProcessors')
170
- if len(aux2) == 0:
171
- new_lp = etree.Element('linguisticProcessors')
172
- new_lp.set('layer',layer)
173
- kaf_header.append(new_lp)
174
-
175
- ## Check if there is already element for the layer
176
- my_lp_ele = None
177
-
178
- for element in kaf_header.findall('linguisticProcessors'):
179
- if element.get('layer','')==layer:
180
- my_lp_ele = element
181
- break
182
-
183
- if time_stamp:
184
- my_time = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
185
- else:
186
- my_time = '*'
187
-
188
- my_lp = etree.Element('lp')
189
- my_lp.set('timestamp',my_time)
190
- my_lp.set('version',version)
191
- my_lp.set('name',name)
192
-
193
- if my_lp_ele is not None: #Already an element for linguisticProcessor with the layer
194
- my_lp_ele.append(my_lp)
195
- else:
196
- # Create a new element for the LP layer
197
- my_lp_ele = etree.Element('linguisticProcessors')
198
- my_lp_ele.set('layer',layer)
199
- my_lp_ele.append(my_lp)
200
- #my_lp_ele.tail=my_lp_ele.text='\n'
201
- ## Should be inserted after the last linguisticProcessor element (stored in variable element)
202
- idx = kaf_header.index(element)
203
- kaf_header.insert(idx+1,my_lp_ele)
204
-
205
-
206
- def addLayer(self,type,element,first_char_id=None):
207
- if first_char_id is None:
208
- first_char_id = type[0]
209
-
210
- ## Check if there is already layer for the type
211
- layer_element = self.tree.find(type)
212
-
213
- if layer_element is None:
214
- layer_element = etree.Element(type)
215
- self.tree.getroot().append(layer_element)
216
- ## The id is going to be the first one
217
- new_id = first_char_id+'1'
218
- else:
219
- ## We need to know how many elements there are in the layer
220
- current_n = len(layer_element.getchildren())
221
- new_id = first_char_id+''+str(current_n+1)
222
-
223
-
224
- ## In this point layer_element points to the correct element, existing or created
225
-
226
- element.set(first_char_id+'id',new_id)
227
- layer_element.append(element)
228
- return new_id
229
-
230
- def addElementToLayer(self,layer, element,first_char_id=None):
231
- return self.addLayer(layer,element,first_char_id)
232
-
233
- def add_attrs_to_layer(self,layer,attrs):
234
- layer_element = self.tree.find(layer)
235
- if layer_element is not None:
236
- for att, val in attrs.items():
237
- layer_element.set(att,val)
238
-
239
-
240
- def addAttributeToElement(self,path,str_id, id, attribute, value,sub_path=None):
241
- for element in self.tree.findall(path):
242
- if id is not None and element.get(str_id,None) == id:
243
- if sub_path is not None:
244
- elements = element.findall(sub_path)
245
- if len(elements)!=0: element = elements[0]
246
- element.set(attribute,value)
247
- return
248
-
249
-
250
- ## This works with the original definition of the property layer
251
- ## KAF -> properties -> property* -> span* -> target*
252
- def getSingleProperties_old(self):
253
- for element in self.tree.findall('properties/property'):
254
- my_id = element.get('pid')
255
- my_type = element.get('type')
256
- ref = element.find('references')
257
- if ref is not None:
258
- element = ref
259
- for span_element in element.findall('span'):
260
- target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
261
- my_prop = KafSingleProperty(my_id,my_type,target_ids)
262
- yield my_prop
263
-
264
- ## 18-June-2013
265
- def getSingleProperties(self):
266
- for property in self.tree.findall('features/properties/property'):
267
- my_id = property.get('pid')
268
- if my_id is None:
269
- my_id = property.get('fpid')
270
- my_type = property.get('lemma')
271
- for span_element in property.findall('references/span'):
272
- target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
273
- my_prop = KafSingleProperty(my_id,my_type,target_ids)
274
- yield my_prop
275
-
276
- # This function adds a new property of the type given with the list of ids given
277
- # my_type -> 'sleeping comfort' list_ids = ['id1','id2']
278
- # It creates the features/properties layers in case
279
- # Agglomerates all the properties for the same TYPE under the same property element
280
- # It calculates automatically the number for the identifier depending on the number
281
- # of properties existing
282
- def add_property(self,my_type,list_ids,comment=None):
283
-
284
- #Looking for feature layer or creating it
285
- feature_layer = self.tree.find('features')
286
- if feature_layer is None:
287
- feature_layer = etree.Element('features')
288
- self.tree.getroot().append(feature_layer)
289
-
290
- #Looking for properties layer
291
- properties_layer = feature_layer.find('properties')
292
- if properties_layer is None:
293
- properties_layer = etree.Element('properties')
294
- feature_layer.append(properties_layer)
295
-
296
- num_props = 0
297
- property_layer = None
298
- for property in properties_layer.findall('property'):
299
- num_props += 1
300
- prop_type = property.get('lemma')
301
- if prop_type == my_type:
302
- property_layer = property
303
- break
304
-
305
- if property_layer is None: # There is no any property for that type, let's create one
306
- property_layer = etree.Element('property')
307
- property_layer.set('pid','p'+str(num_props+1))
308
- property_layer.set('lemma',my_type)
309
- properties_layer.append(property_layer)
310
-
311
-
312
- references = property_layer.find('references')
313
- if references is None:
314
- references = etree.Element('references')
315
- property_layer.append(references)
316
- ## Create the new span
317
- if comment is not None:
318
- references.append(etree.Comment(comment))
319
- span = etree.Element('span')
320
- references.append(span)
321
- for my_id in list_ids:
322
- span.append(etree.Element('target',attrib={'id':my_id}))
323
-
324
-
325
-
326
-
327
- def getSingleEntities(self):
328
- for element in self.tree.findall('entities/entity'):
329
- my_id = element.get('eid')
330
- my_type = element.get('type')
331
- my_path_to_span = None
332
- ref = element.find('references')
333
- if ref is not None:
334
- my_path_to_span = 'references/span'
335
- else:
336
- my_path_to_span = 'span'
337
-
338
- for span_element in element.findall(my_path_to_span):
339
- target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
340
- my_prop = KafSingleEntity(my_id,my_type,target_ids)
341
- yield my_prop
342
-
343
-
344
- def getOpinions(self):
345
- for element in self.tree.findall('opinions/opinion'):
346
- my_id = element.get('oid')
347
-
348
- tar_ids_hol = []
349
- tar_ids_tar = []
350
- polarity = strenght = ''
351
- tar_ids_exp = []
352
-
353
- #Holder
354
- opi_hol_eles = element.findall('opinion_holder')
355
- if len(opi_hol_eles)!=0:
356
- opi_hol_ele = opi_hol_eles[0]
357
- tar_ids_hol = [t_ele.get('id') for t_ele in opi_hol_ele.findall('span/target')]
358
-
359
- #Target
360
- opi_tar_eles = element.findall('opinion_target')
361
- if len(opi_tar_eles) != 0:
362
- opi_tar_ele = opi_tar_eles[0]
363
- tar_ids_tar = [t_ele.get('id') for t_ele in opi_tar_ele.findall('span/target')]
364
-
365
- ## Opinion expression
366
- opi_exp_eles = element.findall('opinion_expression')
367
- if len(opi_exp_eles) != 0:
368
- opi_exp_ele = opi_exp_eles[0]
369
- polarity = opi_exp_ele.get('polarity','')
370
- strength = opi_exp_ele.get('strength','')
371
- tar_ids_exp = [t_ele.get('id') for t_ele in opi_exp_ele.findall('span/target')]
372
-
373
- yield KafOpinion(my_id,tar_ids_hol, tar_ids_tar, KafOpinionExpression(polarity, strength,tar_ids_exp))
374
-
375
-
376
-
377
- def remove_opinion_layer(self):
378
- opinion_layer = self.tree.find('opinions')
379
- if opinion_layer is not None:
380
- self.tree.getroot().remove(opinion_layer)
381
-
382
- ## This function add an opinion to the opinion layer, creating it if does not exist
383
- ## The id is calculated automatically according to the number of elements and ensring there is no repetition
384
- def add_opinion(self,hol_ids,tar_ids,polarity,strength,exp_ids):
385
-
386
- #Looking for opinion layer or creating it
387
- opinion_layer = self.tree.find('opinions')
388
- if opinion_layer is None:
389
- opinion_layer = etree.Element('opinions')
390
- self.tree.getroot().append(opinion_layer)
391
-
392
- ## Generating unique id
393
- list_of_oids = [opi.get('oid') for opi in opinion_layer]
394
-
395
- n = 1
396
- while True:
397
- my_id = 'o'+str(n)
398
- if my_id not in list_of_oids:
399
- break
400
- n += 1
401
- #####
402
-
403
- op_ele = etree.Element('opinion')
404
- opinion_layer.append(op_ele)
405
- op_ele.set('oid',my_id)
406
-
407
- ## Holder
408
- op_hol = etree.Element('opinion_holder')
409
- op_ele.append(op_hol)
410
- span_op_hol = etree.Element('span')
411
- op_hol.append(span_op_hol)
412
- for my_id in hol_ids:
413
- span_op_hol.append(etree.Element('target',attrib={'id':my_id}))
414
-
415
- ## TARGET
416
- op_tar = etree.Element('opinion_target')
417
- op_ele.append(op_tar)
418
- span_op_tar = etree.Element('span')
419
- op_tar.append(span_op_tar)
420
- for my_id in tar_ids:
421
- span_op_tar.append(etree.Element('target',attrib={'id':my_id}))
422
-
423
- ## Expression
424
-
425
- op_exp = etree.Element('opinion_expression',attrib={'polarity':polarity,
426
- 'strength':str(strength)})
427
- op_ele.append(op_exp)
428
- span_exp = etree.Element('span')
429
- op_exp.append(span_exp)
430
- for my_id in exp_ids:
431
- span_exp.append(etree.Element('target',attrib={'id':my_id}))
432
-
433
-
434
-
435
-
436
-