opener-opinion-detector-base 2.0.1 → 2.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/core/python-scripts/README.md +78 -3
  3. data/core/python-scripts/classify_kaf_naf_file.py +94 -94
  4. data/core/python-scripts/models.cfg +1 -0
  5. data/core/python-scripts/scripts/config_manager.py +3 -0
  6. data/core/python-scripts/scripts/extract_features.py +0 -3
  7. data/core/python-scripts/scripts/relation_classifier.py +1 -1
  8. data/core/vendor/src/crfsuite/crfsuite.sln +42 -42
  9. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -26
  10. data/ext/hack/Rakefile +5 -2
  11. data/lib/opener/opinion_detectors/base.rb +19 -15
  12. data/lib/opener/opinion_detectors/base/version.rb +1 -1
  13. data/lib/opener/opinion_detectors/configuration_creator.rb +6 -8
  14. data/lib/opener/opinion_detectors/de.rb +1 -1
  15. data/lib/opener/opinion_detectors/es.rb +7 -0
  16. data/lib/opener/opinion_detectors/fr.rb +7 -0
  17. data/opener-opinion-detector-base.gemspec +0 -1
  18. data/pre_install_requirements.txt +3 -0
  19. metadata +41 -85
  20. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  21. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  22. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +0 -10
  23. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +0 -22
  24. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +0 -1
  25. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +0 -47
  26. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +0 -1
  27. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +0 -390
  28. data/core/site-packages/pre_build/KafNafParser/__init__.py +0 -14
  29. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +0 -125
  30. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +0 -52
  31. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +0 -78
  32. data/core/site-packages/pre_build/KafNafParser/entity_data.py +0 -59
  33. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +0 -41
  34. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +0 -2
  35. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +0 -205
  36. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +0 -309
  37. data/core/site-packages/pre_build/KafNafParser/features_data.py +0 -131
  38. data/core/site-packages/pre_build/KafNafParser/header_data.py +0 -127
  39. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +0 -211
  40. data/core/site-packages/pre_build/KafNafParser/references_data.py +0 -23
  41. data/core/site-packages/pre_build/KafNafParser/span_data.py +0 -63
  42. data/core/site-packages/pre_build/KafNafParser/term_data.py +0 -111
  43. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +0 -42
  44. data/core/site-packages/pre_build/KafNafParser/text_data.py +0 -99
  45. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +0 -10
  46. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +0 -14
  47. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +0 -1
  48. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +0 -23
  49. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +0 -1
  50. data/core/site-packages/pre_build/VUA_pylib/__init__.py +0 -1
  51. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +0 -1
  52. data/core/site-packages/pre_build/VUA_pylib/common/common.py +0 -28
  53. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +0 -1
  54. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +0 -156
  55. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +0 -1
  56. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +0 -121
  57. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +0 -1
  58. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +0 -72
  59. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
  60. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
  61. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
  62. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
  63. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
  64. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
  65. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
  66. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
  67. data/pre_build_requirements.txt +0 -3
@@ -1 +0,0 @@
1
- from lexicon import *
@@ -1,72 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- import os
4
- import re
5
- from VUA_pylib.common import normalize_pos
6
-
7
- __this_folder__ = os.path.dirname(os.path.realpath(__file__))
8
-
9
- class MPQA_subjectivity_lexicon:
10
- def __init__(self):
11
- self.__filename=os.path.join(__this_folder__,'data','subjclueslen1-HLTEMNLP05.tff')
12
- self.stemmed = {}
13
- self.stemmed_anypos = {}
14
- self.no_stemmed = {}
15
- self.no_stemmed_anypos = {}
16
-
17
- self.__load()
18
-
19
- def __load(self):
20
- # Format of lines:
21
- # type=weaksubj len=1 word1=abandoned pos1=adj stemmed1=n priorpolarity=negative
22
- fic = open(self.__filename)
23
- for line in fic:
24
- line=line.strip()+' '
25
- this_type = re.findall('type=([^ ]+)', line)[0]
26
- word = re.findall('word1=([^ ]+)', line)[0]
27
- pos = re.findall('pos1=([^ ]+)', line)[0]
28
- stemmed = re.findall('stemmed1=([^ ]+)', line)[0]
29
- prior_polarity = re.findall('priorpolarity=([^ ]+)', line)[0]
30
- pos = normalize_pos(pos)
31
- if stemmed == 'y':
32
- self.stemmed[(word,pos)] = (this_type,prior_polarity)
33
- if True or pos == '*': #anypos
34
- self.stemmed_anypos[word] = (this_type,prior_polarity)
35
-
36
- elif stemmed == 'n':
37
- self.no_stemmed[(word,pos)] = (this_type,prior_polarity)
38
- if True or pos == '*':
39
- self.no_stemmed_anypos[word] = (this_type,prior_polarity)
40
-
41
- fic.close()
42
-
43
- def get_type_and_polarity(self,word,pos=None):
44
- res = None
45
- if pos is not None:
46
- pos = normalize_pos(pos)
47
-
48
- # Try no stemmed with the given pos
49
- res = self.no_stemmed.get((word,pos))
50
-
51
- # Try stemmed with the given pos
52
- if res is None:
53
- res = self.stemmed.get((word,pos))
54
-
55
- # Try no stemmed with any pos
56
- if res is None:
57
- res = self.no_stemmed_anypos.get(word)
58
-
59
- # Try stemm with any pos
60
- if res is None:
61
- res = self.stemmed_anypos.get(word)
62
-
63
-
64
-
65
- return res
66
-
67
-
68
- if __name__ == '__main__':
69
- o = MPQA_subjectivity_lexicon()
70
- print o.get_type_and_polarity('abidance','adj')
71
-
72
-
@@ -1,10 +0,0 @@
1
- Metadata-Version: 1.0
2
- Name: VUKafParserPy
3
- Version: 1.0
4
- Summary: Library in python to parse kaf files
5
- Home-page: UNKNOWN
6
- Author: Ruben Izquierdo
7
- Author-email: r.izquierdobevia@vu.nl
8
- License: UNKNOWN
9
- Description: UNKNOWN
10
- Platform: UNKNOWN
@@ -1,7 +0,0 @@
1
- VUKafParserPy/KafDataObjectsMod.py
2
- VUKafParserPy/KafParserMod.py
3
- VUKafParserPy/__init__.py
4
- VUKafParserPy.egg-info/PKG-INFO
5
- VUKafParserPy.egg-info/SOURCES.txt
6
- VUKafParserPy.egg-info/dependency_links.txt
7
- VUKafParserPy.egg-info/top_level.txt
@@ -1,11 +0,0 @@
1
- ../VUKafParserPy/__init__.py
2
- ../VUKafParserPy/KafDataObjectsMod.py
3
- ../VUKafParserPy/KafParserMod.py
4
- ../VUKafParserPy/__init__.pyc
5
- ../VUKafParserPy/KafDataObjectsMod.pyc
6
- ../VUKafParserPy/KafParserMod.pyc
7
- ./
8
- dependency_links.txt
9
- PKG-INFO
10
- SOURCES.txt
11
- top_level.txt
@@ -1,165 +0,0 @@
1
- class KafTermSentiment:
2
- def __init__(self):
3
- self.resource=None
4
- self.polarity=None
5
- self.strength=None
6
- self.subjectivity=None
7
-
8
- def simpleInit(self,r,p,st,su,sm=None):
9
- self.resource=r
10
- self.polarity=p
11
- self.strength=st
12
- self.subjectivity=su
13
- self.sentiment_modifier = sm
14
-
15
- def getPolarity(self):
16
- return self.polarity
17
-
18
- def getSentimentModifier(self):
19
- return self.sentiment_modifier
20
-
21
-
22
- class KafToken:
23
- def __init__(self,wid, value, sent=None, para=None):
24
- self.token_id = wid
25
- self.value = value
26
- self.sent = sent
27
- self.para = para
28
-
29
-
30
- class KafOpinionExpression:
31
- def __init__(self,polarity,strength,targets):
32
- self.polarity = polarity
33
- self.strength = strength
34
- self.targets = targets
35
-
36
- def __str__(self):
37
- return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
38
-
39
- class KafOpinion:
40
- def __init__(self,id,holders, targets, opi_exp):
41
- self.id = id
42
- self.holders = holders
43
- self.targets = targets
44
- self.opi_exp = opi_exp
45
-
46
- def __str__(self):
47
- c='Opinion id'+self.id+'\n'
48
- c+=' Holders: '+'-'.join(self.holders)+'\n'
49
- c+=' Targets: '+'-'.join(self.targets)+'\n'
50
- c+=str(self.opi_exp)
51
- return c
52
-
53
-
54
-
55
- class KafSingleProperty:
56
- def __init__(self,id,type,targets):
57
- self.id = id
58
- self.type = type
59
- self.targets = targets
60
-
61
-
62
- def get_id(self):
63
- return self.id
64
-
65
- def get_type(self):
66
- return self.type
67
-
68
- def get_span(self):
69
- return self.targets
70
-
71
- def __str__(self):
72
- return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
73
-
74
-
75
- class KafSingleEntity:
76
- def __init__(self,id,type,targets):
77
- self.id = id
78
- self.type = type
79
- self.targets = targets
80
-
81
- def get_id(self):
82
- return self.id
83
-
84
- def get_type(self):
85
- return self.type
86
-
87
- def get_span(self):
88
- return self.targets
89
-
90
- def __str__(self):
91
- return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
92
-
93
- class KafTerm:
94
- def __init__(self):
95
- self.tid = None
96
- self.lemma = None
97
- self.pos = None
98
- self.morphofeat = None
99
- self.sentiment = None
100
- self.list_span_id = []
101
-
102
- def get_morphofeat(self):
103
- return self.morphofeat
104
-
105
- def set_list_span_id(self, L):
106
- self.list_span_id = L
107
-
108
- def get_list_span(self):
109
- return self.list_span_id
110
-
111
- def get_polarity(self):
112
- if self.sentiment != None:
113
- return self.sentiment.getPolarity()
114
- else:
115
- return None
116
-
117
- def get_sentiment_modifier(self):
118
- if self.sentiment != None:
119
- return self.sentiment.getSentimentModifier()
120
- else:
121
- return None
122
-
123
-
124
- def setSentiment(self,my_sent):
125
- self.sentiment = my_sent
126
-
127
- def getSentiment(self):
128
- return self.sentiment
129
-
130
- def getLemma(self):
131
- return self.lemma
132
-
133
- def setLemma(self,lemma):
134
- self.lemma = lemma
135
-
136
- def getPos(self):
137
- return self.pos
138
-
139
- def setPos(self,pos):
140
- self.pos = pos
141
-
142
- def getId(self):
143
- return self.tid
144
-
145
- def setId(self,id):
146
- self.tid = id
147
-
148
- def getShortPos(self):
149
- if self.pos==None:
150
- return None
151
- auxpos=self.pos.lower()[0]
152
- if auxpos == 'g': auxpos='a'
153
- elif auxpos == 'a': auxpos='r'
154
- return auxpos
155
-
156
- def __str__(self):
157
- if self.tid and self.lemma and self.pos:
158
- return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
159
- else:
160
- return 'None'
161
-
162
-
163
-
164
-
165
-
@@ -1,439 +0,0 @@
1
- ########################################################################
2
- # 14 Jan 2013: added function add_attrs_to_layer
3
- ########################################################################
4
-
5
- ###################
6
- # List of changes #
7
- ###################
8
- # 14 Jan 2013: added function add_attrs_to_layer
9
- # 27 Feb 2013: added code for comply with DTD
10
- # 18 Jun 2013: getSingleProperties adapted to the structure KAF/features/properties/property/references/span/target
11
- # 18 Jun 2013: funcion add_property created for adding the properties to the KAF
12
-
13
-
14
- from lxml import etree
15
- from KafDataObjectsMod import *
16
- import time
17
-
18
- class KafParser:
19
- def __init__(self,filename=None):
20
- self.tree=None
21
- self.__pathForToken={}
22
- self.__term_ids_for_token_id = None
23
-
24
- if filename:
25
- #self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
26
- self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True, strip_cdata=False))
27
- ## Do the text tokenization
28
- self.__textTokenization()
29
- else:
30
- root = etree.Element('KAF')
31
- root.set('version','v1.opener')
32
- root.set('{http://www.w3.org/XML/1998/namespace}lang','en')
33
- self.tree = etree.ElementTree(element=root)
34
-
35
- def __textTokenization(self):
36
- for wf in self.tree.findall('text/wf'):
37
- wid = wf.get('wid')
38
- self.__pathForToken[wid] = self.tree.getpath(wf)
39
-
40
-
41
- def getToken(self,tid):
42
- if tid in self.__pathForToken:
43
- path = self.__pathForToken[tid]
44
- return self.tree.xpath(self.__pathForToken[tid])[0]
45
- return None
46
-
47
-
48
- def getLanguage(self):
49
- lang = self.tree.getroot().get('{http://www.w3.org/XML/1998/namespace}lang','nl')
50
- return lang
51
-
52
- ## Return a list of (sentence_id, TOKENS) where tokens is a list of (token_id,token)
53
- ## [(s_id1, T1), (sent_id2, T2)....]
54
- ## T1 --> [(tokenid, token), (tokenid2,token2)....]
55
- def get_tokens_in_sentences(self):
56
- sents = []
57
- current = []
58
- previous_sent = None
59
- for element in self.tree.findall('text/wf'):
60
- w_id = element.get('wid')
61
- s_id = element.get('sent')
62
- word = element.text
63
-
64
- if previous_sent is not None and s_id != previous_sent:
65
- sents.append((previous_sent,current))
66
- current = []
67
- current.append((w_id,word))
68
- previous_sent = s_id
69
- ####
70
- sents.append((s_id,current))
71
- return sents
72
-
73
- def get_term_ids_for_token_id(self,tok_id):
74
- if self.__term_ids_for_token_id is None:
75
- self.__term_ids_for_token_id = {}
76
- for element in self.tree.findall('terms/term'):
77
- term_id = element.get('tid')
78
- for target in element.findall('span/target'):
79
- token_id = target.get('id')
80
- if token_id not in self.__term_ids_for_token_id:
81
- self.__term_ids_for_token_id[token_id] = [term_id]
82
- else:
83
- self.__term_ids_for_token_id[token_id].append(term_id)
84
- return self.__term_ids_for_token_id.get(tok_id,[])
85
-
86
-
87
-
88
- def getTokens(self):
89
- for element in self.tree.findall('text/wf'):
90
- w_id = element.get('wid')
91
- s_id = element.get('sent','0')
92
- word = element.text
93
- yield (word, s_id, w_id)
94
-
95
-
96
-
97
- def getTerms(self):
98
- if self.tree:
99
- for element in self.tree.findall('terms/term'):
100
- kafTermObj = KafTerm()
101
- kafTermObj.setId(element.get('tid'))
102
- kafTermObj.setLemma(element.get('lemma'))
103
- kafTermObj.setPos(element.get('pos'))
104
- kafTermObj.morphofeat = element.get('morphofeat')
105
-
106
- ## Parsing sentiment
107
- sentiment = element.find('sentiment')
108
- if sentiment is not None:
109
- resource = sentiment.get('resource','')
110
- polarity = sentiment.get('polarity',None)
111
- strength = sentiment.get('strength','')
112
- subjectivity = sentiment.get('subjectivity','')
113
- sentiment_modifier = sentiment.get('sentiment_modifier')
114
-
115
- my_sent = KafTermSentiment()
116
- my_sent.simpleInit(resource,polarity,strength,subjectivity,sentiment_modifier)
117
- kafTermObj.setSentiment(my_sent)
118
-
119
- ## Parsing the span
120
- span = element.find('span')
121
- if span is not None:
122
- list_ids = [target.get('id') for target in span.findall('target')]
123
- kafTermObj.set_list_span_id(list_ids)
124
-
125
-
126
- yield kafTermObj
127
- else:
128
- return
129
-
130
-
131
- def getSentimentTriples(self):
132
- data = []
133
- if self.tree:
134
- for term_element in self.tree.findall('terms/term'):
135
- lemma = term_element.get('lemma')
136
- polarity = None
137
- sentiment_modifier = None
138
-
139
- sentiment_element = term_element.find('sentiment')
140
- if sentiment_element is not None:
141
- polarity = sentiment_element.get('polarity',None)
142
- sentiment_modifier = sentiment_element.get('sentiment_modifier')
143
- data.append( (lemma,polarity,sentiment_modifier))
144
- return data
145
-
146
-
147
-
148
- def addPolarityToTerm(self,termid,my_sentiment_attribs,polarity_pos=None):
149
- if self.tree:
150
- for element in self.tree.find('terms'):
151
- if element.get('tid','')==termid:
152
-
153
- #In case there is no pos info, we use the polarityPos
154
- if not element.get('pos') and polarity_pos is not None:
155
- element.set('pos',polarity_pos)
156
- sentEle = etree.Element('sentiment',attrib=my_sentiment_attribs)
157
- element.append(sentEle)
158
-
159
- def saveToFile(self,filename,myencoding='UTF-8'):
160
- if self.tree:
161
- self.tree.write(filename,encoding=myencoding,pretty_print=True,xml_declaration=True)
162
-
163
-
164
- def addLinguisticProcessor(self,name,version, layer, time_stamp=True):
165
- aux = self.tree.findall('kafHeader')
166
- if len(aux)!=0:
167
- kaf_header = aux[0]
168
- else:
169
- kaf_header = etree.Element('kafHeader')
170
- self.tree.getroot().insert(0,kaf_header)
171
-
172
- aux2= kaf_header.findall('linguisticProcessors')
173
- if len(aux2) == 0:
174
- new_lp = etree.Element('linguisticProcessors')
175
- new_lp.set('layer',layer)
176
- kaf_header.append(new_lp)
177
-
178
- ## Check if there is already element for the layer
179
- my_lp_ele = None
180
-
181
- for element in kaf_header.findall('linguisticProcessors'):
182
- if element.get('layer','')==layer:
183
- my_lp_ele = element
184
- break
185
-
186
- if time_stamp:
187
- my_time = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
188
- else:
189
- my_time = '*'
190
-
191
- my_lp = etree.Element('lp')
192
- my_lp.set('timestamp',my_time)
193
- my_lp.set('version',version)
194
- my_lp.set('name',name)
195
-
196
- if my_lp_ele is not None: #Already an element for linguisticProcessor with the layer
197
- my_lp_ele.append(my_lp)
198
- else:
199
- # Create a new element for the LP layer
200
- my_lp_ele = etree.Element('linguisticProcessors')
201
- my_lp_ele.set('layer',layer)
202
- my_lp_ele.append(my_lp)
203
- #my_lp_ele.tail=my_lp_ele.text='\n'
204
- ## Should be inserted after the last linguisticProcessor element (stored in variable element)
205
- idx = kaf_header.index(element)
206
- kaf_header.insert(idx+1,my_lp_ele)
207
-
208
-
209
- def addLayer(self,type,element,first_char_id=None):
210
- if first_char_id is None:
211
- first_char_id = type[0]
212
-
213
- ## Check if there is already layer for the type
214
- layer_element = self.tree.find(type)
215
-
216
- if layer_element is None:
217
- layer_element = etree.Element(type)
218
- self.tree.getroot().append(layer_element)
219
- ## The id is going to be the first one
220
- new_id = first_char_id+'1'
221
- else:
222
- ## We need to know how many elements there are in the layer
223
- current_n = len(layer_element.getchildren())
224
- new_id = first_char_id+''+str(current_n+1)
225
-
226
-
227
- ## In this point layer_element points to the correct element, existing or created
228
-
229
- element.set(first_char_id+'id',new_id)
230
- layer_element.append(element)
231
- return new_id
232
-
233
- def addElementToLayer(self,layer, element,first_char_id=None):
234
- return self.addLayer(layer,element,first_char_id)
235
-
236
- def add_attrs_to_layer(self,layer,attrs):
237
- layer_element = self.tree.find(layer)
238
- if layer_element is not None:
239
- for att, val in attrs.items():
240
- layer_element.set(att,val)
241
-
242
-
243
- def addAttributeToElement(self,path,str_id, id, attribute, value,sub_path=None):
244
- for element in self.tree.findall(path):
245
- if id is not None and element.get(str_id,None) == id:
246
- if sub_path is not None:
247
- elements = element.findall(sub_path)
248
- if len(elements)!=0: element = elements[0]
249
- element.set(attribute,value)
250
- return
251
-
252
-
253
- ## This works with the original definition of the property layer
254
- ## KAF -> properties -> property* -> span* -> target*
255
- def getSingleProperties_old(self):
256
- for element in self.tree.findall('properties/property'):
257
- my_id = element.get('pid')
258
- my_type = element.get('type')
259
- ref = element.find('references')
260
- if ref is not None:
261
- element = ref
262
- for span_element in element.findall('span'):
263
- target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
264
- my_prop = KafSingleProperty(my_id,my_type,target_ids)
265
- yield my_prop
266
-
267
- ## 18-June-2013
268
- def getSingleProperties(self):
269
- for property in self.tree.findall('features/properties/property'):
270
- my_id = property.get('pid')
271
- if my_id is None:
272
- my_id = property.get('fpid')
273
- my_type = property.get('lemma')
274
- for span_element in property.findall('references/span'):
275
- target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
276
- my_prop = KafSingleProperty(my_id,my_type,target_ids)
277
- yield my_prop
278
-
279
- # This function adds a new property of the type given with the list of ids given
280
- # my_type -> 'sleeping comfort' list_ids = ['id1','id2']
281
- # It creates the features/properties layers in case
282
- # Agglomerates all the properties for the same TYPE under the same property element
283
- # It calculates automatically the number for the identifier depending on the number
284
- # of properties existing
285
- def add_property(self,my_type,list_ids,comment=None):
286
-
287
- #Looking for feature layer or creating it
288
- feature_layer = self.tree.find('features')
289
- if feature_layer is None:
290
- feature_layer = etree.Element('features')
291
- self.tree.getroot().append(feature_layer)
292
-
293
- #Looking for properties layer
294
- properties_layer = feature_layer.find('properties')
295
- if properties_layer is None:
296
- properties_layer = etree.Element('properties')
297
- feature_layer.append(properties_layer)
298
-
299
- num_props = 0
300
- property_layer = None
301
- for property in properties_layer.findall('property'):
302
- num_props += 1
303
- prop_type = property.get('lemma')
304
- if prop_type == my_type:
305
- property_layer = property
306
- break
307
-
308
- if property_layer is None: # There is no any property for that type, let's create one
309
- property_layer = etree.Element('property')
310
- property_layer.set('pid','p'+str(num_props+1))
311
- property_layer.set('lemma',my_type)
312
- properties_layer.append(property_layer)
313
-
314
-
315
- references = property_layer.find('references')
316
- if references is None:
317
- references = etree.Element('references')
318
- property_layer.append(references)
319
- ## Create the new span
320
- if comment is not None:
321
- references.append(etree.Comment(comment))
322
- span = etree.Element('span')
323
- references.append(span)
324
- for my_id in list_ids:
325
- span.append(etree.Element('target',attrib={'id':my_id}))
326
-
327
-
328
-
329
-
330
- def getSingleEntities(self):
331
- for element in self.tree.findall('entities/entity'):
332
- my_id = element.get('eid')
333
- my_type = element.get('type')
334
- my_path_to_span = None
335
- ref = element.find('references')
336
- if ref is not None:
337
- my_path_to_span = 'references/span'
338
- else:
339
- my_path_to_span = 'span'
340
-
341
- for span_element in element.findall(my_path_to_span):
342
- target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
343
- my_prop = KafSingleEntity(my_id,my_type,target_ids)
344
- yield my_prop
345
-
346
-
347
- def getOpinions(self):
348
- for element in self.tree.findall('opinions/opinion'):
349
- my_id = element.get('oid')
350
-
351
- tar_ids_hol = []
352
- tar_ids_tar = []
353
- polarity = strenght = ''
354
- tar_ids_exp = []
355
-
356
- #Holder
357
- opi_hol_eles = element.findall('opinion_holder')
358
- if len(opi_hol_eles)!=0:
359
- opi_hol_ele = opi_hol_eles[0]
360
- tar_ids_hol = [t_ele.get('id') for t_ele in opi_hol_ele.findall('span/target')]
361
-
362
- #Target
363
- opi_tar_eles = element.findall('opinion_target')
364
- if len(opi_tar_eles) != 0:
365
- opi_tar_ele = opi_tar_eles[0]
366
- tar_ids_tar = [t_ele.get('id') for t_ele in opi_tar_ele.findall('span/target')]
367
-
368
- ## Opinion expression
369
- opi_exp_eles = element.findall('opinion_expression')
370
- if len(opi_exp_eles) != 0:
371
- opi_exp_ele = opi_exp_eles[0]
372
- polarity = opi_exp_ele.get('polarity','')
373
- strength = opi_exp_ele.get('strength','')
374
- tar_ids_exp = [t_ele.get('id') for t_ele in opi_exp_ele.findall('span/target')]
375
-
376
- yield KafOpinion(my_id,tar_ids_hol, tar_ids_tar, KafOpinionExpression(polarity, strength,tar_ids_exp))
377
-
378
-
379
-
380
- def remove_opinion_layer(self):
381
- opinion_layer = self.tree.find('opinions')
382
- if opinion_layer is not None:
383
- self.tree.getroot().remove(opinion_layer)
384
-
385
- ## This function add an opinion to the opinion layer, creating it if does not exist
386
- ## The id is calculated automatically according to the number of elements and ensring there is no repetition
387
- def add_opinion(self,hol_ids,tar_ids,polarity,strength,exp_ids):
388
-
389
- #Looking for opinion layer or creating it
390
- opinion_layer = self.tree.find('opinions')
391
- if opinion_layer is None:
392
- opinion_layer = etree.Element('opinions')
393
- self.tree.getroot().append(opinion_layer)
394
-
395
- ## Generating unique id
396
- list_of_oids = [opi.get('oid') for opi in opinion_layer]
397
-
398
- n = 1
399
- while True:
400
- my_id = 'o'+str(n)
401
- if my_id not in list_of_oids:
402
- break
403
- n += 1
404
- #####
405
-
406
- op_ele = etree.Element('opinion')
407
- opinion_layer.append(op_ele)
408
- op_ele.set('oid',my_id)
409
-
410
- ## Holder
411
- op_hol = etree.Element('opinion_holder')
412
- op_ele.append(op_hol)
413
- span_op_hol = etree.Element('span')
414
- op_hol.append(span_op_hol)
415
- for my_id in hol_ids:
416
- span_op_hol.append(etree.Element('target',attrib={'id':my_id}))
417
-
418
- ## TARGET
419
- op_tar = etree.Element('opinion_target')
420
- op_ele.append(op_tar)
421
- span_op_tar = etree.Element('span')
422
- op_tar.append(span_op_tar)
423
- for my_id in tar_ids:
424
- span_op_tar.append(etree.Element('target',attrib={'id':my_id}))
425
-
426
- ## Expression
427
-
428
- op_exp = etree.Element('opinion_expression',attrib={'polarity':polarity,
429
- 'strength':str(strength)})
430
- op_ele.append(op_exp)
431
- span_exp = etree.Element('span')
432
- op_exp.append(span_exp)
433
- for my_id in exp_ids:
434
- span_exp.append(etree.Element('target',attrib={'id':my_id}))
435
-
436
-
437
-
438
-
439
-