opener-opinion-detector-base 2.0.1 → 2.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/core/python-scripts/README.md +78 -3
- data/core/python-scripts/classify_kaf_naf_file.py +94 -94
- data/core/python-scripts/models.cfg +1 -0
- data/core/python-scripts/scripts/config_manager.py +3 -0
- data/core/python-scripts/scripts/extract_features.py +0 -3
- data/core/python-scripts/scripts/relation_classifier.py +1 -1
- data/core/vendor/src/crfsuite/crfsuite.sln +42 -42
- data/core/vendor/src/liblbfgs/lbfgs.sln +26 -26
- data/ext/hack/Rakefile +5 -2
- data/lib/opener/opinion_detectors/base.rb +19 -15
- data/lib/opener/opinion_detectors/base/version.rb +1 -1
- data/lib/opener/opinion_detectors/configuration_creator.rb +6 -8
- data/lib/opener/opinion_detectors/de.rb +1 -1
- data/lib/opener/opinion_detectors/es.rb +7 -0
- data/lib/opener/opinion_detectors/fr.rb +7 -0
- data/opener-opinion-detector-base.gemspec +0 -1
- data/pre_install_requirements.txt +3 -0
- metadata +41 -85
- data/core/packages/KafNafParser-1.4.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +0 -22
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +0 -47
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +0 -390
- data/core/site-packages/pre_build/KafNafParser/__init__.py +0 -14
- data/core/site-packages/pre_build/KafNafParser/constituency_data.py +0 -125
- data/core/site-packages/pre_build/KafNafParser/coreference_data.py +0 -52
- data/core/site-packages/pre_build/KafNafParser/dependency_data.py +0 -78
- data/core/site-packages/pre_build/KafNafParser/entity_data.py +0 -59
- data/core/site-packages/pre_build/KafNafParser/external_references_data.py +0 -41
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +0 -2
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +0 -205
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +0 -309
- data/core/site-packages/pre_build/KafNafParser/features_data.py +0 -131
- data/core/site-packages/pre_build/KafNafParser/header_data.py +0 -127
- data/core/site-packages/pre_build/KafNafParser/opinion_data.py +0 -211
- data/core/site-packages/pre_build/KafNafParser/references_data.py +0 -23
- data/core/site-packages/pre_build/KafNafParser/span_data.py +0 -63
- data/core/site-packages/pre_build/KafNafParser/term_data.py +0 -111
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +0 -42
- data/core/site-packages/pre_build/KafNafParser/text_data.py +0 -99
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +0 -14
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +0 -23
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUA_pylib/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/common/common.py +0 -28
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +0 -156
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +0 -121
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +0 -72
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
- data/pre_build_requirements.txt +0 -3
@@ -1 +0,0 @@
|
|
1
|
-
from lexicon import *
|
@@ -1,72 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
|
3
|
-
import os
|
4
|
-
import re
|
5
|
-
from VUA_pylib.common import normalize_pos
|
6
|
-
|
7
|
-
__this_folder__ = os.path.dirname(os.path.realpath(__file__))
|
8
|
-
|
9
|
-
class MPQA_subjectivity_lexicon:
|
10
|
-
def __init__(self):
|
11
|
-
self.__filename=os.path.join(__this_folder__,'data','subjclueslen1-HLTEMNLP05.tff')
|
12
|
-
self.stemmed = {}
|
13
|
-
self.stemmed_anypos = {}
|
14
|
-
self.no_stemmed = {}
|
15
|
-
self.no_stemmed_anypos = {}
|
16
|
-
|
17
|
-
self.__load()
|
18
|
-
|
19
|
-
def __load(self):
|
20
|
-
# Format of lines:
|
21
|
-
# type=weaksubj len=1 word1=abandoned pos1=adj stemmed1=n priorpolarity=negative
|
22
|
-
fic = open(self.__filename)
|
23
|
-
for line in fic:
|
24
|
-
line=line.strip()+' '
|
25
|
-
this_type = re.findall('type=([^ ]+)', line)[0]
|
26
|
-
word = re.findall('word1=([^ ]+)', line)[0]
|
27
|
-
pos = re.findall('pos1=([^ ]+)', line)[0]
|
28
|
-
stemmed = re.findall('stemmed1=([^ ]+)', line)[0]
|
29
|
-
prior_polarity = re.findall('priorpolarity=([^ ]+)', line)[0]
|
30
|
-
pos = normalize_pos(pos)
|
31
|
-
if stemmed == 'y':
|
32
|
-
self.stemmed[(word,pos)] = (this_type,prior_polarity)
|
33
|
-
if True or pos == '*': #anypos
|
34
|
-
self.stemmed_anypos[word] = (this_type,prior_polarity)
|
35
|
-
|
36
|
-
elif stemmed == 'n':
|
37
|
-
self.no_stemmed[(word,pos)] = (this_type,prior_polarity)
|
38
|
-
if True or pos == '*':
|
39
|
-
self.no_stemmed_anypos[word] = (this_type,prior_polarity)
|
40
|
-
|
41
|
-
fic.close()
|
42
|
-
|
43
|
-
def get_type_and_polarity(self,word,pos=None):
|
44
|
-
res = None
|
45
|
-
if pos is not None:
|
46
|
-
pos = normalize_pos(pos)
|
47
|
-
|
48
|
-
# Try no stemmed with the given pos
|
49
|
-
res = self.no_stemmed.get((word,pos))
|
50
|
-
|
51
|
-
# Try stemmed with the given pos
|
52
|
-
if res is None:
|
53
|
-
res = self.stemmed.get((word,pos))
|
54
|
-
|
55
|
-
# Try no stemmed with any pos
|
56
|
-
if res is None:
|
57
|
-
res = self.no_stemmed_anypos.get(word)
|
58
|
-
|
59
|
-
# Try stemm with any pos
|
60
|
-
if res is None:
|
61
|
-
res = self.stemmed_anypos.get(word)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
return res
|
66
|
-
|
67
|
-
|
68
|
-
if __name__ == '__main__':
|
69
|
-
o = MPQA_subjectivity_lexicon()
|
70
|
-
print o.get_type_and_polarity('abidance','adj')
|
71
|
-
|
72
|
-
|
@@ -1 +0,0 @@
|
|
1
|
-
|
@@ -1,11 +0,0 @@
|
|
1
|
-
../VUKafParserPy/__init__.py
|
2
|
-
../VUKafParserPy/KafDataObjectsMod.py
|
3
|
-
../VUKafParserPy/KafParserMod.py
|
4
|
-
../VUKafParserPy/__init__.pyc
|
5
|
-
../VUKafParserPy/KafDataObjectsMod.pyc
|
6
|
-
../VUKafParserPy/KafParserMod.pyc
|
7
|
-
./
|
8
|
-
dependency_links.txt
|
9
|
-
PKG-INFO
|
10
|
-
SOURCES.txt
|
11
|
-
top_level.txt
|
@@ -1 +0,0 @@
|
|
1
|
-
VUKafParserPy
|
@@ -1,165 +0,0 @@
|
|
1
|
-
class KafTermSentiment:
|
2
|
-
def __init__(self):
|
3
|
-
self.resource=None
|
4
|
-
self.polarity=None
|
5
|
-
self.strength=None
|
6
|
-
self.subjectivity=None
|
7
|
-
|
8
|
-
def simpleInit(self,r,p,st,su,sm=None):
|
9
|
-
self.resource=r
|
10
|
-
self.polarity=p
|
11
|
-
self.strength=st
|
12
|
-
self.subjectivity=su
|
13
|
-
self.sentiment_modifier = sm
|
14
|
-
|
15
|
-
def getPolarity(self):
|
16
|
-
return self.polarity
|
17
|
-
|
18
|
-
def getSentimentModifier(self):
|
19
|
-
return self.sentiment_modifier
|
20
|
-
|
21
|
-
|
22
|
-
class KafToken:
|
23
|
-
def __init__(self,wid, value, sent=None, para=None):
|
24
|
-
self.token_id = wid
|
25
|
-
self.value = value
|
26
|
-
self.sent = sent
|
27
|
-
self.para = para
|
28
|
-
|
29
|
-
|
30
|
-
class KafOpinionExpression:
|
31
|
-
def __init__(self,polarity,strength,targets):
|
32
|
-
self.polarity = polarity
|
33
|
-
self.strength = strength
|
34
|
-
self.targets = targets
|
35
|
-
|
36
|
-
def __str__(self):
|
37
|
-
return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
|
38
|
-
|
39
|
-
class KafOpinion:
|
40
|
-
def __init__(self,id,holders, targets, opi_exp):
|
41
|
-
self.id = id
|
42
|
-
self.holders = holders
|
43
|
-
self.targets = targets
|
44
|
-
self.opi_exp = opi_exp
|
45
|
-
|
46
|
-
def __str__(self):
|
47
|
-
c='Opinion id'+self.id+'\n'
|
48
|
-
c+=' Holders: '+'-'.join(self.holders)+'\n'
|
49
|
-
c+=' Targets: '+'-'.join(self.targets)+'\n'
|
50
|
-
c+=str(self.opi_exp)
|
51
|
-
return c
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
class KafSingleProperty:
|
56
|
-
def __init__(self,id,type,targets):
|
57
|
-
self.id = id
|
58
|
-
self.type = type
|
59
|
-
self.targets = targets
|
60
|
-
|
61
|
-
|
62
|
-
def get_id(self):
|
63
|
-
return self.id
|
64
|
-
|
65
|
-
def get_type(self):
|
66
|
-
return self.type
|
67
|
-
|
68
|
-
def get_span(self):
|
69
|
-
return self.targets
|
70
|
-
|
71
|
-
def __str__(self):
|
72
|
-
return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
|
73
|
-
|
74
|
-
|
75
|
-
class KafSingleEntity:
|
76
|
-
def __init__(self,id,type,targets):
|
77
|
-
self.id = id
|
78
|
-
self.type = type
|
79
|
-
self.targets = targets
|
80
|
-
|
81
|
-
def get_id(self):
|
82
|
-
return self.id
|
83
|
-
|
84
|
-
def get_type(self):
|
85
|
-
return self.type
|
86
|
-
|
87
|
-
def get_span(self):
|
88
|
-
return self.targets
|
89
|
-
|
90
|
-
def __str__(self):
|
91
|
-
return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
|
92
|
-
|
93
|
-
class KafTerm:
|
94
|
-
def __init__(self):
|
95
|
-
self.tid = None
|
96
|
-
self.lemma = None
|
97
|
-
self.pos = None
|
98
|
-
self.morphofeat = None
|
99
|
-
self.sentiment = None
|
100
|
-
self.list_span_id = []
|
101
|
-
|
102
|
-
def get_morphofeat(self):
|
103
|
-
return self.morphofeat
|
104
|
-
|
105
|
-
def set_list_span_id(self, L):
|
106
|
-
self.list_span_id = L
|
107
|
-
|
108
|
-
def get_list_span(self):
|
109
|
-
return self.list_span_id
|
110
|
-
|
111
|
-
def get_polarity(self):
|
112
|
-
if self.sentiment != None:
|
113
|
-
return self.sentiment.getPolarity()
|
114
|
-
else:
|
115
|
-
return None
|
116
|
-
|
117
|
-
def get_sentiment_modifier(self):
|
118
|
-
if self.sentiment != None:
|
119
|
-
return self.sentiment.getSentimentModifier()
|
120
|
-
else:
|
121
|
-
return None
|
122
|
-
|
123
|
-
|
124
|
-
def setSentiment(self,my_sent):
|
125
|
-
self.sentiment = my_sent
|
126
|
-
|
127
|
-
def getSentiment(self):
|
128
|
-
return self.sentiment
|
129
|
-
|
130
|
-
def getLemma(self):
|
131
|
-
return self.lemma
|
132
|
-
|
133
|
-
def setLemma(self,lemma):
|
134
|
-
self.lemma = lemma
|
135
|
-
|
136
|
-
def getPos(self):
|
137
|
-
return self.pos
|
138
|
-
|
139
|
-
def setPos(self,pos):
|
140
|
-
self.pos = pos
|
141
|
-
|
142
|
-
def getId(self):
|
143
|
-
return self.tid
|
144
|
-
|
145
|
-
def setId(self,id):
|
146
|
-
self.tid = id
|
147
|
-
|
148
|
-
def getShortPos(self):
|
149
|
-
if self.pos==None:
|
150
|
-
return None
|
151
|
-
auxpos=self.pos.lower()[0]
|
152
|
-
if auxpos == 'g': auxpos='a'
|
153
|
-
elif auxpos == 'a': auxpos='r'
|
154
|
-
return auxpos
|
155
|
-
|
156
|
-
def __str__(self):
|
157
|
-
if self.tid and self.lemma and self.pos:
|
158
|
-
return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
|
159
|
-
else:
|
160
|
-
return 'None'
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
@@ -1,439 +0,0 @@
|
|
1
|
-
########################################################################
|
2
|
-
# 14 Jan 2013: added function add_attrs_to_layer
|
3
|
-
########################################################################
|
4
|
-
|
5
|
-
###################
|
6
|
-
# List of changes #
|
7
|
-
###################
|
8
|
-
# 14 Jan 2013: added function add_attrs_to_layer
|
9
|
-
# 27 Feb 2013: added code for comply with DTD
|
10
|
-
# 18 Jun 2013: getSingleProperties adapted to the structure KAF/features/properties/property/references/span/target
|
11
|
-
# 18 Jun 2013: funcion add_property created for adding the properties to the KAF
|
12
|
-
|
13
|
-
|
14
|
-
from lxml import etree
|
15
|
-
from KafDataObjectsMod import *
|
16
|
-
import time
|
17
|
-
|
18
|
-
class KafParser:
|
19
|
-
def __init__(self,filename=None):
|
20
|
-
self.tree=None
|
21
|
-
self.__pathForToken={}
|
22
|
-
self.__term_ids_for_token_id = None
|
23
|
-
|
24
|
-
if filename:
|
25
|
-
#self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
|
26
|
-
self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True, strip_cdata=False))
|
27
|
-
## Do the text tokenization
|
28
|
-
self.__textTokenization()
|
29
|
-
else:
|
30
|
-
root = etree.Element('KAF')
|
31
|
-
root.set('version','v1.opener')
|
32
|
-
root.set('{http://www.w3.org/XML/1998/namespace}lang','en')
|
33
|
-
self.tree = etree.ElementTree(element=root)
|
34
|
-
|
35
|
-
def __textTokenization(self):
|
36
|
-
for wf in self.tree.findall('text/wf'):
|
37
|
-
wid = wf.get('wid')
|
38
|
-
self.__pathForToken[wid] = self.tree.getpath(wf)
|
39
|
-
|
40
|
-
|
41
|
-
def getToken(self,tid):
|
42
|
-
if tid in self.__pathForToken:
|
43
|
-
path = self.__pathForToken[tid]
|
44
|
-
return self.tree.xpath(self.__pathForToken[tid])[0]
|
45
|
-
return None
|
46
|
-
|
47
|
-
|
48
|
-
def getLanguage(self):
|
49
|
-
lang = self.tree.getroot().get('{http://www.w3.org/XML/1998/namespace}lang','nl')
|
50
|
-
return lang
|
51
|
-
|
52
|
-
## Return a list of (sentence_id, TOKENS) where tokens is a list of (token_id,token)
|
53
|
-
## [(s_id1, T1), (sent_id2, T2)....]
|
54
|
-
## T1 --> [(tokenid, token), (tokenid2,token2)....]
|
55
|
-
def get_tokens_in_sentences(self):
|
56
|
-
sents = []
|
57
|
-
current = []
|
58
|
-
previous_sent = None
|
59
|
-
for element in self.tree.findall('text/wf'):
|
60
|
-
w_id = element.get('wid')
|
61
|
-
s_id = element.get('sent')
|
62
|
-
word = element.text
|
63
|
-
|
64
|
-
if previous_sent is not None and s_id != previous_sent:
|
65
|
-
sents.append((previous_sent,current))
|
66
|
-
current = []
|
67
|
-
current.append((w_id,word))
|
68
|
-
previous_sent = s_id
|
69
|
-
####
|
70
|
-
sents.append((s_id,current))
|
71
|
-
return sents
|
72
|
-
|
73
|
-
def get_term_ids_for_token_id(self,tok_id):
|
74
|
-
if self.__term_ids_for_token_id is None:
|
75
|
-
self.__term_ids_for_token_id = {}
|
76
|
-
for element in self.tree.findall('terms/term'):
|
77
|
-
term_id = element.get('tid')
|
78
|
-
for target in element.findall('span/target'):
|
79
|
-
token_id = target.get('id')
|
80
|
-
if token_id not in self.__term_ids_for_token_id:
|
81
|
-
self.__term_ids_for_token_id[token_id] = [term_id]
|
82
|
-
else:
|
83
|
-
self.__term_ids_for_token_id[token_id].append(term_id)
|
84
|
-
return self.__term_ids_for_token_id.get(tok_id,[])
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
def getTokens(self):
|
89
|
-
for element in self.tree.findall('text/wf'):
|
90
|
-
w_id = element.get('wid')
|
91
|
-
s_id = element.get('sent','0')
|
92
|
-
word = element.text
|
93
|
-
yield (word, s_id, w_id)
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
def getTerms(self):
|
98
|
-
if self.tree:
|
99
|
-
for element in self.tree.findall('terms/term'):
|
100
|
-
kafTermObj = KafTerm()
|
101
|
-
kafTermObj.setId(element.get('tid'))
|
102
|
-
kafTermObj.setLemma(element.get('lemma'))
|
103
|
-
kafTermObj.setPos(element.get('pos'))
|
104
|
-
kafTermObj.morphofeat = element.get('morphofeat')
|
105
|
-
|
106
|
-
## Parsing sentiment
|
107
|
-
sentiment = element.find('sentiment')
|
108
|
-
if sentiment is not None:
|
109
|
-
resource = sentiment.get('resource','')
|
110
|
-
polarity = sentiment.get('polarity',None)
|
111
|
-
strength = sentiment.get('strength','')
|
112
|
-
subjectivity = sentiment.get('subjectivity','')
|
113
|
-
sentiment_modifier = sentiment.get('sentiment_modifier')
|
114
|
-
|
115
|
-
my_sent = KafTermSentiment()
|
116
|
-
my_sent.simpleInit(resource,polarity,strength,subjectivity,sentiment_modifier)
|
117
|
-
kafTermObj.setSentiment(my_sent)
|
118
|
-
|
119
|
-
## Parsing the span
|
120
|
-
span = element.find('span')
|
121
|
-
if span is not None:
|
122
|
-
list_ids = [target.get('id') for target in span.findall('target')]
|
123
|
-
kafTermObj.set_list_span_id(list_ids)
|
124
|
-
|
125
|
-
|
126
|
-
yield kafTermObj
|
127
|
-
else:
|
128
|
-
return
|
129
|
-
|
130
|
-
|
131
|
-
def getSentimentTriples(self):
|
132
|
-
data = []
|
133
|
-
if self.tree:
|
134
|
-
for term_element in self.tree.findall('terms/term'):
|
135
|
-
lemma = term_element.get('lemma')
|
136
|
-
polarity = None
|
137
|
-
sentiment_modifier = None
|
138
|
-
|
139
|
-
sentiment_element = term_element.find('sentiment')
|
140
|
-
if sentiment_element is not None:
|
141
|
-
polarity = sentiment_element.get('polarity',None)
|
142
|
-
sentiment_modifier = sentiment_element.get('sentiment_modifier')
|
143
|
-
data.append( (lemma,polarity,sentiment_modifier))
|
144
|
-
return data
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
def addPolarityToTerm(self,termid,my_sentiment_attribs,polarity_pos=None):
|
149
|
-
if self.tree:
|
150
|
-
for element in self.tree.find('terms'):
|
151
|
-
if element.get('tid','')==termid:
|
152
|
-
|
153
|
-
#In case there is no pos info, we use the polarityPos
|
154
|
-
if not element.get('pos') and polarity_pos is not None:
|
155
|
-
element.set('pos',polarity_pos)
|
156
|
-
sentEle = etree.Element('sentiment',attrib=my_sentiment_attribs)
|
157
|
-
element.append(sentEle)
|
158
|
-
|
159
|
-
def saveToFile(self,filename,myencoding='UTF-8'):
|
160
|
-
if self.tree:
|
161
|
-
self.tree.write(filename,encoding=myencoding,pretty_print=True,xml_declaration=True)
|
162
|
-
|
163
|
-
|
164
|
-
def addLinguisticProcessor(self,name,version, layer, time_stamp=True):
|
165
|
-
aux = self.tree.findall('kafHeader')
|
166
|
-
if len(aux)!=0:
|
167
|
-
kaf_header = aux[0]
|
168
|
-
else:
|
169
|
-
kaf_header = etree.Element('kafHeader')
|
170
|
-
self.tree.getroot().insert(0,kaf_header)
|
171
|
-
|
172
|
-
aux2= kaf_header.findall('linguisticProcessors')
|
173
|
-
if len(aux2) == 0:
|
174
|
-
new_lp = etree.Element('linguisticProcessors')
|
175
|
-
new_lp.set('layer',layer)
|
176
|
-
kaf_header.append(new_lp)
|
177
|
-
|
178
|
-
## Check if there is already element for the layer
|
179
|
-
my_lp_ele = None
|
180
|
-
|
181
|
-
for element in kaf_header.findall('linguisticProcessors'):
|
182
|
-
if element.get('layer','')==layer:
|
183
|
-
my_lp_ele = element
|
184
|
-
break
|
185
|
-
|
186
|
-
if time_stamp:
|
187
|
-
my_time = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
|
188
|
-
else:
|
189
|
-
my_time = '*'
|
190
|
-
|
191
|
-
my_lp = etree.Element('lp')
|
192
|
-
my_lp.set('timestamp',my_time)
|
193
|
-
my_lp.set('version',version)
|
194
|
-
my_lp.set('name',name)
|
195
|
-
|
196
|
-
if my_lp_ele is not None: #Already an element for linguisticProcessor with the layer
|
197
|
-
my_lp_ele.append(my_lp)
|
198
|
-
else:
|
199
|
-
# Create a new element for the LP layer
|
200
|
-
my_lp_ele = etree.Element('linguisticProcessors')
|
201
|
-
my_lp_ele.set('layer',layer)
|
202
|
-
my_lp_ele.append(my_lp)
|
203
|
-
#my_lp_ele.tail=my_lp_ele.text='\n'
|
204
|
-
## Should be inserted after the last linguisticProcessor element (stored in variable element)
|
205
|
-
idx = kaf_header.index(element)
|
206
|
-
kaf_header.insert(idx+1,my_lp_ele)
|
207
|
-
|
208
|
-
|
209
|
-
def addLayer(self,type,element,first_char_id=None):
|
210
|
-
if first_char_id is None:
|
211
|
-
first_char_id = type[0]
|
212
|
-
|
213
|
-
## Check if there is already layer for the type
|
214
|
-
layer_element = self.tree.find(type)
|
215
|
-
|
216
|
-
if layer_element is None:
|
217
|
-
layer_element = etree.Element(type)
|
218
|
-
self.tree.getroot().append(layer_element)
|
219
|
-
## The id is going to be the first one
|
220
|
-
new_id = first_char_id+'1'
|
221
|
-
else:
|
222
|
-
## We need to know how many elements there are in the layer
|
223
|
-
current_n = len(layer_element.getchildren())
|
224
|
-
new_id = first_char_id+''+str(current_n+1)
|
225
|
-
|
226
|
-
|
227
|
-
## In this point layer_element points to the correct element, existing or created
|
228
|
-
|
229
|
-
element.set(first_char_id+'id',new_id)
|
230
|
-
layer_element.append(element)
|
231
|
-
return new_id
|
232
|
-
|
233
|
-
def addElementToLayer(self,layer, element,first_char_id=None):
|
234
|
-
return self.addLayer(layer,element,first_char_id)
|
235
|
-
|
236
|
-
def add_attrs_to_layer(self,layer,attrs):
|
237
|
-
layer_element = self.tree.find(layer)
|
238
|
-
if layer_element is not None:
|
239
|
-
for att, val in attrs.items():
|
240
|
-
layer_element.set(att,val)
|
241
|
-
|
242
|
-
|
243
|
-
def addAttributeToElement(self,path,str_id, id, attribute, value,sub_path=None):
|
244
|
-
for element in self.tree.findall(path):
|
245
|
-
if id is not None and element.get(str_id,None) == id:
|
246
|
-
if sub_path is not None:
|
247
|
-
elements = element.findall(sub_path)
|
248
|
-
if len(elements)!=0: element = elements[0]
|
249
|
-
element.set(attribute,value)
|
250
|
-
return
|
251
|
-
|
252
|
-
|
253
|
-
## This works with the original definition of the property layer
|
254
|
-
## KAF -> properties -> property* -> span* -> target*
|
255
|
-
def getSingleProperties_old(self):
|
256
|
-
for element in self.tree.findall('properties/property'):
|
257
|
-
my_id = element.get('pid')
|
258
|
-
my_type = element.get('type')
|
259
|
-
ref = element.find('references')
|
260
|
-
if ref is not None:
|
261
|
-
element = ref
|
262
|
-
for span_element in element.findall('span'):
|
263
|
-
target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
|
264
|
-
my_prop = KafSingleProperty(my_id,my_type,target_ids)
|
265
|
-
yield my_prop
|
266
|
-
|
267
|
-
## 18-June-2013
|
268
|
-
def getSingleProperties(self):
|
269
|
-
for property in self.tree.findall('features/properties/property'):
|
270
|
-
my_id = property.get('pid')
|
271
|
-
if my_id is None:
|
272
|
-
my_id = property.get('fpid')
|
273
|
-
my_type = property.get('lemma')
|
274
|
-
for span_element in property.findall('references/span'):
|
275
|
-
target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
|
276
|
-
my_prop = KafSingleProperty(my_id,my_type,target_ids)
|
277
|
-
yield my_prop
|
278
|
-
|
279
|
-
# This function adds a new property of the type given with the list of ids given
|
280
|
-
# my_type -> 'sleeping comfort' list_ids = ['id1','id2']
|
281
|
-
# It creates the features/properties layers in case
|
282
|
-
# Agglomerates all the properties for the same TYPE under the same property element
|
283
|
-
# It calculates automatically the number for the identifier depending on the number
|
284
|
-
# of properties existing
|
285
|
-
def add_property(self,my_type,list_ids,comment=None):
|
286
|
-
|
287
|
-
#Looking for feature layer or creating it
|
288
|
-
feature_layer = self.tree.find('features')
|
289
|
-
if feature_layer is None:
|
290
|
-
feature_layer = etree.Element('features')
|
291
|
-
self.tree.getroot().append(feature_layer)
|
292
|
-
|
293
|
-
#Looking for properties layer
|
294
|
-
properties_layer = feature_layer.find('properties')
|
295
|
-
if properties_layer is None:
|
296
|
-
properties_layer = etree.Element('properties')
|
297
|
-
feature_layer.append(properties_layer)
|
298
|
-
|
299
|
-
num_props = 0
|
300
|
-
property_layer = None
|
301
|
-
for property in properties_layer.findall('property'):
|
302
|
-
num_props += 1
|
303
|
-
prop_type = property.get('lemma')
|
304
|
-
if prop_type == my_type:
|
305
|
-
property_layer = property
|
306
|
-
break
|
307
|
-
|
308
|
-
if property_layer is None: # There is no any property for that type, let's create one
|
309
|
-
property_layer = etree.Element('property')
|
310
|
-
property_layer.set('pid','p'+str(num_props+1))
|
311
|
-
property_layer.set('lemma',my_type)
|
312
|
-
properties_layer.append(property_layer)
|
313
|
-
|
314
|
-
|
315
|
-
references = property_layer.find('references')
|
316
|
-
if references is None:
|
317
|
-
references = etree.Element('references')
|
318
|
-
property_layer.append(references)
|
319
|
-
## Create the new span
|
320
|
-
if comment is not None:
|
321
|
-
references.append(etree.Comment(comment))
|
322
|
-
span = etree.Element('span')
|
323
|
-
references.append(span)
|
324
|
-
for my_id in list_ids:
|
325
|
-
span.append(etree.Element('target',attrib={'id':my_id}))
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
def getSingleEntities(self):
|
331
|
-
for element in self.tree.findall('entities/entity'):
|
332
|
-
my_id = element.get('eid')
|
333
|
-
my_type = element.get('type')
|
334
|
-
my_path_to_span = None
|
335
|
-
ref = element.find('references')
|
336
|
-
if ref is not None:
|
337
|
-
my_path_to_span = 'references/span'
|
338
|
-
else:
|
339
|
-
my_path_to_span = 'span'
|
340
|
-
|
341
|
-
for span_element in element.findall(my_path_to_span):
|
342
|
-
target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
|
343
|
-
my_prop = KafSingleEntity(my_id,my_type,target_ids)
|
344
|
-
yield my_prop
|
345
|
-
|
346
|
-
|
347
|
-
def getOpinions(self):
|
348
|
-
for element in self.tree.findall('opinions/opinion'):
|
349
|
-
my_id = element.get('oid')
|
350
|
-
|
351
|
-
tar_ids_hol = []
|
352
|
-
tar_ids_tar = []
|
353
|
-
polarity = strenght = ''
|
354
|
-
tar_ids_exp = []
|
355
|
-
|
356
|
-
#Holder
|
357
|
-
opi_hol_eles = element.findall('opinion_holder')
|
358
|
-
if len(opi_hol_eles)!=0:
|
359
|
-
opi_hol_ele = opi_hol_eles[0]
|
360
|
-
tar_ids_hol = [t_ele.get('id') for t_ele in opi_hol_ele.findall('span/target')]
|
361
|
-
|
362
|
-
#Target
|
363
|
-
opi_tar_eles = element.findall('opinion_target')
|
364
|
-
if len(opi_tar_eles) != 0:
|
365
|
-
opi_tar_ele = opi_tar_eles[0]
|
366
|
-
tar_ids_tar = [t_ele.get('id') for t_ele in opi_tar_ele.findall('span/target')]
|
367
|
-
|
368
|
-
## Opinion expression
|
369
|
-
opi_exp_eles = element.findall('opinion_expression')
|
370
|
-
if len(opi_exp_eles) != 0:
|
371
|
-
opi_exp_ele = opi_exp_eles[0]
|
372
|
-
polarity = opi_exp_ele.get('polarity','')
|
373
|
-
strength = opi_exp_ele.get('strength','')
|
374
|
-
tar_ids_exp = [t_ele.get('id') for t_ele in opi_exp_ele.findall('span/target')]
|
375
|
-
|
376
|
-
yield KafOpinion(my_id,tar_ids_hol, tar_ids_tar, KafOpinionExpression(polarity, strength,tar_ids_exp))
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
def remove_opinion_layer(self):
|
381
|
-
opinion_layer = self.tree.find('opinions')
|
382
|
-
if opinion_layer is not None:
|
383
|
-
self.tree.getroot().remove(opinion_layer)
|
384
|
-
|
385
|
-
## This function add an opinion to the opinion layer, creating it if does not exist
|
386
|
-
## The id is calculated automatically according to the number of elements and ensring there is no repetition
|
387
|
-
def add_opinion(self,hol_ids,tar_ids,polarity,strength,exp_ids):
|
388
|
-
|
389
|
-
#Looking for opinion layer or creating it
|
390
|
-
opinion_layer = self.tree.find('opinions')
|
391
|
-
if opinion_layer is None:
|
392
|
-
opinion_layer = etree.Element('opinions')
|
393
|
-
self.tree.getroot().append(opinion_layer)
|
394
|
-
|
395
|
-
## Generating unique id
|
396
|
-
list_of_oids = [opi.get('oid') for opi in opinion_layer]
|
397
|
-
|
398
|
-
n = 1
|
399
|
-
while True:
|
400
|
-
my_id = 'o'+str(n)
|
401
|
-
if my_id not in list_of_oids:
|
402
|
-
break
|
403
|
-
n += 1
|
404
|
-
#####
|
405
|
-
|
406
|
-
op_ele = etree.Element('opinion')
|
407
|
-
opinion_layer.append(op_ele)
|
408
|
-
op_ele.set('oid',my_id)
|
409
|
-
|
410
|
-
## Holder
|
411
|
-
op_hol = etree.Element('opinion_holder')
|
412
|
-
op_ele.append(op_hol)
|
413
|
-
span_op_hol = etree.Element('span')
|
414
|
-
op_hol.append(span_op_hol)
|
415
|
-
for my_id in hol_ids:
|
416
|
-
span_op_hol.append(etree.Element('target',attrib={'id':my_id}))
|
417
|
-
|
418
|
-
## TARGET
|
419
|
-
op_tar = etree.Element('opinion_target')
|
420
|
-
op_ele.append(op_tar)
|
421
|
-
span_op_tar = etree.Element('span')
|
422
|
-
op_tar.append(span_op_tar)
|
423
|
-
for my_id in tar_ids:
|
424
|
-
span_op_tar.append(etree.Element('target',attrib={'id':my_id}))
|
425
|
-
|
426
|
-
## Expression
|
427
|
-
|
428
|
-
op_exp = etree.Element('opinion_expression',attrib={'polarity':polarity,
|
429
|
-
'strength':str(strength)})
|
430
|
-
op_ele.append(op_exp)
|
431
|
-
span_exp = etree.Element('span')
|
432
|
-
op_exp.append(span_exp)
|
433
|
-
for my_id in exp_ids:
|
434
|
-
span_exp.append(etree.Element('target',attrib={'id':my_id}))
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|