opener-opinion-detector-basic 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +118 -7
- data/bin/opinion-detector-basic-daemon +10 -0
- data/core/opinion_detector_basic_multi.py +62 -62
- data/exec/opinion-detector-basic.rb +9 -0
- data/ext/hack/Rakefile +3 -2
- data/lib/opener/opinion_detector_basic/version.rb +1 -1
- data/opener-opinion-detector-basic.gemspec +5 -1
- data/pre_install_requirements.txt +1 -0
- metadata +36 -14
- data/core/packages/KafNafParser-1.3.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.4.tar.gz +0 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
- data/pre_build_requirements.txt +0 -1
data/ext/hack/Rakefile
CHANGED
@@ -12,6 +12,7 @@ task :compile => :requirements do
|
|
12
12
|
end
|
13
13
|
|
14
14
|
task :default => [:compile] do
|
15
|
-
|
16
|
-
|
15
|
+
path = File.join(PYTHON_SITE_PACKAGES, 'pre_install')
|
16
|
+
|
17
|
+
pip_install(PRE_INSTALL_REQUIREMENTS, path)
|
17
18
|
end
|
@@ -7,6 +7,7 @@ Gem::Specification.new do |gem|
|
|
7
7
|
gem.summary = 'Basic Opinion Detector.'
|
8
8
|
gem.description = gem.summary
|
9
9
|
gem.homepage = 'http://opener-project.github.com/'
|
10
|
+
gem.extensions = ['ext/hack/Rakefile']
|
10
11
|
|
11
12
|
gem.required_ruby_version = '>= 1.9.2'
|
12
13
|
|
@@ -20,7 +21,8 @@ Gem::Specification.new do |gem|
|
|
20
21
|
'config.ru',
|
21
22
|
'*.gemspec',
|
22
23
|
'*_requirements.txt',
|
23
|
-
'README.md'
|
24
|
+
'README.md',
|
25
|
+
'exec/**/*'
|
24
26
|
]).select { |file| File.file?(file) }
|
25
27
|
|
26
28
|
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
@@ -30,6 +32,8 @@ Gem::Specification.new do |gem|
|
|
30
32
|
gem.add_dependency 'sinatra'
|
31
33
|
gem.add_dependency 'httpclient'
|
32
34
|
gem.add_dependency 'puma'
|
35
|
+
gem.add_dependency 'opener-daemons'
|
36
|
+
gem.add_dependency 'opener-core', ['>= 0.1.2']
|
33
37
|
gem.add_dependency 'opener-webservice'
|
34
38
|
|
35
39
|
gem.add_development_dependency 'rspec'
|
@@ -0,0 +1 @@
|
|
1
|
+
https://github.com/opener-project/VU-kaf-parser/archive/master.zip#egg=VUKafParserPy
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-opinion-detector-basic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-build-tools
|
@@ -80,6 +80,34 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: opener-daemons
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: opener-core
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 0.1.2
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.1.2
|
83
111
|
- !ruby/object:Gem::Dependency
|
84
112
|
name: opener-webservice
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -127,24 +155,17 @@ email:
|
|
127
155
|
executables:
|
128
156
|
- opinion-detector-basic-server
|
129
157
|
- opinion-detector-basic
|
130
|
-
|
158
|
+
- opinion-detector-basic-daemon
|
159
|
+
extensions:
|
160
|
+
- ext/hack/Rakefile
|
131
161
|
extra_rdoc_files: []
|
132
162
|
files:
|
133
163
|
- README.md
|
134
164
|
- bin/opinion-detector-basic
|
165
|
+
- bin/opinion-detector-basic-daemon
|
135
166
|
- bin/opinion-detector-basic-server
|
136
167
|
- config.ru
|
137
168
|
- core/opinion_detector_basic_multi.py
|
138
|
-
- core/packages/KafNafParser-1.3.tar.gz
|
139
|
-
- core/packages/VUA_pylib-1.4.tar.gz
|
140
|
-
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO
|
141
|
-
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt
|
142
|
-
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt
|
143
|
-
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt
|
144
|
-
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt
|
145
|
-
- core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py
|
146
|
-
- core/site-packages/pre_build/VUKafParserPy/KafParserMod.py
|
147
|
-
- core/site-packages/pre_build/VUKafParserPy/__init__.py
|
148
169
|
- core/vendor/src/crfsuite/AUTHORS
|
149
170
|
- core/vendor/src/crfsuite/COPYING
|
150
171
|
- core/vendor/src/crfsuite/ChangeLog
|
@@ -283,6 +304,7 @@ files:
|
|
283
304
|
- core/vendor/src/svm_light/svm_learn.h
|
284
305
|
- core/vendor/src/svm_light/svm_learn_main.c
|
285
306
|
- core/vendor/src/svm_light/svm_loqo.c
|
307
|
+
- exec/opinion-detector-basic.rb
|
286
308
|
- ext/hack/Rakefile
|
287
309
|
- ext/hack/support.rb
|
288
310
|
- lib/opener/opinion_detector_basic.rb
|
@@ -292,7 +314,7 @@ files:
|
|
292
314
|
- lib/opener/opinion_detector_basic/views/index.erb
|
293
315
|
- lib/opener/opinion_detector_basic/views/result.erb
|
294
316
|
- opener-opinion-detector-basic.gemspec
|
295
|
-
-
|
317
|
+
- pre_install_requirements.txt
|
296
318
|
homepage: http://opener-project.github.com/
|
297
319
|
licenses: []
|
298
320
|
metadata: {}
|
Binary file
|
Binary file
|
@@ -1 +0,0 @@
|
|
1
|
-
|
@@ -1,11 +0,0 @@
|
|
1
|
-
../VUKafParserPy/KafParserMod.py
|
2
|
-
../VUKafParserPy/__init__.py
|
3
|
-
../VUKafParserPy/KafDataObjectsMod.py
|
4
|
-
../VUKafParserPy/KafParserMod.pyc
|
5
|
-
../VUKafParserPy/__init__.pyc
|
6
|
-
../VUKafParserPy/KafDataObjectsMod.pyc
|
7
|
-
./
|
8
|
-
top_level.txt
|
9
|
-
SOURCES.txt
|
10
|
-
PKG-INFO
|
11
|
-
dependency_links.txt
|
@@ -1 +0,0 @@
|
|
1
|
-
VUKafParserPy
|
@@ -1,165 +0,0 @@
|
|
1
|
-
class KafTermSentiment:
|
2
|
-
def __init__(self):
|
3
|
-
self.resource=None
|
4
|
-
self.polarity=None
|
5
|
-
self.strength=None
|
6
|
-
self.subjectivity=None
|
7
|
-
|
8
|
-
def simpleInit(self,r,p,st,su,sm=None):
|
9
|
-
self.resource=r
|
10
|
-
self.polarity=p
|
11
|
-
self.strength=st
|
12
|
-
self.subjectivity=su
|
13
|
-
self.sentiment_modifier = sm
|
14
|
-
|
15
|
-
def getPolarity(self):
|
16
|
-
return self.polarity
|
17
|
-
|
18
|
-
def getSentimentModifier(self):
|
19
|
-
return self.sentiment_modifier
|
20
|
-
|
21
|
-
|
22
|
-
class KafToken:
|
23
|
-
def __init__(self,wid, value, sent=None, para=None):
|
24
|
-
self.token_id = wid
|
25
|
-
self.value = value
|
26
|
-
self.sent = sent
|
27
|
-
self.para = para
|
28
|
-
|
29
|
-
|
30
|
-
class KafOpinionExpression:
|
31
|
-
def __init__(self,polarity,strength,targets):
|
32
|
-
self.polarity = polarity
|
33
|
-
self.strength = strength
|
34
|
-
self.targets = targets
|
35
|
-
|
36
|
-
def __str__(self):
|
37
|
-
return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
|
38
|
-
|
39
|
-
class KafOpinion:
|
40
|
-
def __init__(self,id,holders, targets, opi_exp):
|
41
|
-
self.id = id
|
42
|
-
self.holders = holders
|
43
|
-
self.targets = targets
|
44
|
-
self.opi_exp = opi_exp
|
45
|
-
|
46
|
-
def __str__(self):
|
47
|
-
c='Opinion id'+self.id+'\n'
|
48
|
-
c+=' Holders: '+'-'.join(self.holders)+'\n'
|
49
|
-
c+=' Targets: '+'-'.join(self.targets)+'\n'
|
50
|
-
c+=str(self.opi_exp)
|
51
|
-
return c
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
class KafSingleProperty:
|
56
|
-
def __init__(self,id,type,targets):
|
57
|
-
self.id = id
|
58
|
-
self.type = type
|
59
|
-
self.targets = targets
|
60
|
-
|
61
|
-
|
62
|
-
def get_id(self):
|
63
|
-
return self.id
|
64
|
-
|
65
|
-
def get_type(self):
|
66
|
-
return self.type
|
67
|
-
|
68
|
-
def get_span(self):
|
69
|
-
return self.targets
|
70
|
-
|
71
|
-
def __str__(self):
|
72
|
-
return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
|
73
|
-
|
74
|
-
|
75
|
-
class KafSingleEntity:
|
76
|
-
def __init__(self,id,type,targets):
|
77
|
-
self.id = id
|
78
|
-
self.type = type
|
79
|
-
self.targets = targets
|
80
|
-
|
81
|
-
def get_id(self):
|
82
|
-
return self.id
|
83
|
-
|
84
|
-
def get_type(self):
|
85
|
-
return self.type
|
86
|
-
|
87
|
-
def get_span(self):
|
88
|
-
return self.targets
|
89
|
-
|
90
|
-
def __str__(self):
|
91
|
-
return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
|
92
|
-
|
93
|
-
class KafTerm:
|
94
|
-
def __init__(self):
|
95
|
-
self.tid = None
|
96
|
-
self.lemma = None
|
97
|
-
self.pos = None
|
98
|
-
self.morphofeat = None
|
99
|
-
self.sentiment = None
|
100
|
-
self.list_span_id = []
|
101
|
-
|
102
|
-
def get_morphofeat(self):
|
103
|
-
return self.morphofeat
|
104
|
-
|
105
|
-
def set_list_span_id(self, L):
|
106
|
-
self.list_span_id = L
|
107
|
-
|
108
|
-
def get_list_span(self):
|
109
|
-
return self.list_span_id
|
110
|
-
|
111
|
-
def get_polarity(self):
|
112
|
-
if self.sentiment != None:
|
113
|
-
return self.sentiment.getPolarity()
|
114
|
-
else:
|
115
|
-
return None
|
116
|
-
|
117
|
-
def get_sentiment_modifier(self):
|
118
|
-
if self.sentiment != None:
|
119
|
-
return self.sentiment.getSentimentModifier()
|
120
|
-
else:
|
121
|
-
return None
|
122
|
-
|
123
|
-
|
124
|
-
def setSentiment(self,my_sent):
|
125
|
-
self.sentiment = my_sent
|
126
|
-
|
127
|
-
def getSentiment(self):
|
128
|
-
return self.sentiment
|
129
|
-
|
130
|
-
def getLemma(self):
|
131
|
-
return self.lemma
|
132
|
-
|
133
|
-
def setLemma(self,lemma):
|
134
|
-
self.lemma = lemma
|
135
|
-
|
136
|
-
def getPos(self):
|
137
|
-
return self.pos
|
138
|
-
|
139
|
-
def setPos(self,pos):
|
140
|
-
self.pos = pos
|
141
|
-
|
142
|
-
def getId(self):
|
143
|
-
return self.tid
|
144
|
-
|
145
|
-
def setId(self,id):
|
146
|
-
self.tid = id
|
147
|
-
|
148
|
-
def getShortPos(self):
|
149
|
-
if self.pos==None:
|
150
|
-
return None
|
151
|
-
auxpos=self.pos.lower()[0]
|
152
|
-
if auxpos == 'g': auxpos='a'
|
153
|
-
elif auxpos == 'a': auxpos='r'
|
154
|
-
return auxpos
|
155
|
-
|
156
|
-
def __str__(self):
|
157
|
-
if self.tid and self.lemma and self.pos:
|
158
|
-
return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
|
159
|
-
else:
|
160
|
-
return 'None'
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
@@ -1,439 +0,0 @@
|
|
1
|
-
########################################################################
|
2
|
-
# 14 Jan 2013: added function add_attrs_to_layer
|
3
|
-
########################################################################
|
4
|
-
|
5
|
-
###################
|
6
|
-
# List of changes #
|
7
|
-
###################
|
8
|
-
# 14 Jan 2013: added function add_attrs_to_layer
|
9
|
-
# 27 Feb 2013: added code for comply with DTD
|
10
|
-
# 18 Jun 2013: getSingleProperties adapted to the structure KAF/features/properties/property/references/span/target
|
11
|
-
# 18 Jun 2013: funcion add_property created for adding the properties to the KAF
|
12
|
-
|
13
|
-
|
14
|
-
from lxml import etree
|
15
|
-
from KafDataObjectsMod import *
|
16
|
-
import time
|
17
|
-
|
18
|
-
class KafParser:
|
19
|
-
def __init__(self,filename=None):
|
20
|
-
self.tree=None
|
21
|
-
self.__pathForToken={}
|
22
|
-
self.__term_ids_for_token_id = None
|
23
|
-
|
24
|
-
if filename:
|
25
|
-
#self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
|
26
|
-
self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True, strip_cdata=False))
|
27
|
-
## Do the text tokenization
|
28
|
-
self.__textTokenization()
|
29
|
-
else:
|
30
|
-
root = etree.Element('KAF')
|
31
|
-
root.set('version','v1.opener')
|
32
|
-
root.set('{http://www.w3.org/XML/1998/namespace}lang','en')
|
33
|
-
self.tree = etree.ElementTree(element=root)
|
34
|
-
|
35
|
-
def __textTokenization(self):
|
36
|
-
for wf in self.tree.findall('text/wf'):
|
37
|
-
wid = wf.get('wid')
|
38
|
-
self.__pathForToken[wid] = self.tree.getpath(wf)
|
39
|
-
|
40
|
-
|
41
|
-
def getToken(self,tid):
|
42
|
-
if tid in self.__pathForToken:
|
43
|
-
path = self.__pathForToken[tid]
|
44
|
-
return self.tree.xpath(self.__pathForToken[tid])[0]
|
45
|
-
return None
|
46
|
-
|
47
|
-
|
48
|
-
def getLanguage(self):
|
49
|
-
lang = self.tree.getroot().get('{http://www.w3.org/XML/1998/namespace}lang','nl')
|
50
|
-
return lang
|
51
|
-
|
52
|
-
## Return a list of (sentence_id, TOKENS) where tokens is a list of (token_id,token)
|
53
|
-
## [(s_id1, T1), (sent_id2, T2)....]
|
54
|
-
## T1 --> [(tokenid, token), (tokenid2,token2)....]
|
55
|
-
def get_tokens_in_sentences(self):
|
56
|
-
sents = []
|
57
|
-
current = []
|
58
|
-
previous_sent = None
|
59
|
-
for element in self.tree.findall('text/wf'):
|
60
|
-
w_id = element.get('wid')
|
61
|
-
s_id = element.get('sent')
|
62
|
-
word = element.text
|
63
|
-
|
64
|
-
if previous_sent is not None and s_id != previous_sent:
|
65
|
-
sents.append((previous_sent,current))
|
66
|
-
current = []
|
67
|
-
current.append((w_id,word))
|
68
|
-
previous_sent = s_id
|
69
|
-
####
|
70
|
-
sents.append((s_id,current))
|
71
|
-
return sents
|
72
|
-
|
73
|
-
def get_term_ids_for_token_id(self,tok_id):
|
74
|
-
if self.__term_ids_for_token_id is None:
|
75
|
-
self.__term_ids_for_token_id = {}
|
76
|
-
for element in self.tree.findall('terms/term'):
|
77
|
-
term_id = element.get('tid')
|
78
|
-
for target in element.findall('span/target'):
|
79
|
-
token_id = target.get('id')
|
80
|
-
if token_id not in self.__term_ids_for_token_id:
|
81
|
-
self.__term_ids_for_token_id[token_id] = [term_id]
|
82
|
-
else:
|
83
|
-
self.__term_ids_for_token_id[token_id].append(term_id)
|
84
|
-
return self.__term_ids_for_token_id.get(tok_id,[])
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
def getTokens(self):
|
89
|
-
for element in self.tree.findall('text/wf'):
|
90
|
-
w_id = element.get('wid')
|
91
|
-
s_id = element.get('sent','0')
|
92
|
-
word = element.text
|
93
|
-
yield (word, s_id, w_id)
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
def getTerms(self):
|
98
|
-
if self.tree:
|
99
|
-
for element in self.tree.findall('terms/term'):
|
100
|
-
kafTermObj = KafTerm()
|
101
|
-
kafTermObj.setId(element.get('tid'))
|
102
|
-
kafTermObj.setLemma(element.get('lemma'))
|
103
|
-
kafTermObj.setPos(element.get('pos'))
|
104
|
-
kafTermObj.morphofeat = element.get('morphofeat')
|
105
|
-
|
106
|
-
## Parsing sentiment
|
107
|
-
sentiment = element.find('sentiment')
|
108
|
-
if sentiment is not None:
|
109
|
-
resource = sentiment.get('resource','')
|
110
|
-
polarity = sentiment.get('polarity',None)
|
111
|
-
strength = sentiment.get('strength','')
|
112
|
-
subjectivity = sentiment.get('subjectivity','')
|
113
|
-
sentiment_modifier = sentiment.get('sentiment_modifier')
|
114
|
-
|
115
|
-
my_sent = KafTermSentiment()
|
116
|
-
my_sent.simpleInit(resource,polarity,strength,subjectivity,sentiment_modifier)
|
117
|
-
kafTermObj.setSentiment(my_sent)
|
118
|
-
|
119
|
-
## Parsing the span
|
120
|
-
span = element.find('span')
|
121
|
-
if span is not None:
|
122
|
-
list_ids = [target.get('id') for target in span.findall('target')]
|
123
|
-
kafTermObj.set_list_span_id(list_ids)
|
124
|
-
|
125
|
-
|
126
|
-
yield kafTermObj
|
127
|
-
else:
|
128
|
-
return
|
129
|
-
|
130
|
-
|
131
|
-
def getSentimentTriples(self):
|
132
|
-
data = []
|
133
|
-
if self.tree:
|
134
|
-
for term_element in self.tree.findall('terms/term'):
|
135
|
-
lemma = term_element.get('lemma')
|
136
|
-
polarity = None
|
137
|
-
sentiment_modifier = None
|
138
|
-
|
139
|
-
sentiment_element = term_element.find('sentiment')
|
140
|
-
if sentiment_element is not None:
|
141
|
-
polarity = sentiment_element.get('polarity',None)
|
142
|
-
sentiment_modifier = sentiment_element.get('sentiment_modifier')
|
143
|
-
data.append( (lemma,polarity,sentiment_modifier))
|
144
|
-
return data
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
def addPolarityToTerm(self,termid,my_sentiment_attribs,polarity_pos=None):
|
149
|
-
if self.tree:
|
150
|
-
for element in self.tree.find('terms'):
|
151
|
-
if element.get('tid','')==termid:
|
152
|
-
|
153
|
-
#In case there is no pos info, we use the polarityPos
|
154
|
-
if not element.get('pos') and polarity_pos is not None:
|
155
|
-
element.set('pos',polarity_pos)
|
156
|
-
sentEle = etree.Element('sentiment',attrib=my_sentiment_attribs)
|
157
|
-
element.append(sentEle)
|
158
|
-
|
159
|
-
def saveToFile(self,filename,myencoding='UTF-8'):
|
160
|
-
if self.tree:
|
161
|
-
self.tree.write(filename,encoding=myencoding,pretty_print=True,xml_declaration=True)
|
162
|
-
|
163
|
-
|
164
|
-
def addLinguisticProcessor(self,name,version, layer, time_stamp=True):
|
165
|
-
aux = self.tree.findall('kafHeader')
|
166
|
-
if len(aux)!=0:
|
167
|
-
kaf_header = aux[0]
|
168
|
-
else:
|
169
|
-
kaf_header = etree.Element('kafHeader')
|
170
|
-
self.tree.getroot().insert(0,kaf_header)
|
171
|
-
|
172
|
-
aux2= kaf_header.findall('linguisticProcessors')
|
173
|
-
if len(aux2) == 0:
|
174
|
-
new_lp = etree.Element('linguisticProcessors')
|
175
|
-
new_lp.set('layer',layer)
|
176
|
-
kaf_header.append(new_lp)
|
177
|
-
|
178
|
-
## Check if there is already element for the layer
|
179
|
-
my_lp_ele = None
|
180
|
-
|
181
|
-
for element in kaf_header.findall('linguisticProcessors'):
|
182
|
-
if element.get('layer','')==layer:
|
183
|
-
my_lp_ele = element
|
184
|
-
break
|
185
|
-
|
186
|
-
if time_stamp:
|
187
|
-
my_time = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
|
188
|
-
else:
|
189
|
-
my_time = '*'
|
190
|
-
|
191
|
-
my_lp = etree.Element('lp')
|
192
|
-
my_lp.set('timestamp',my_time)
|
193
|
-
my_lp.set('version',version)
|
194
|
-
my_lp.set('name',name)
|
195
|
-
|
196
|
-
if my_lp_ele is not None: #Already an element for linguisticProcessor with the layer
|
197
|
-
my_lp_ele.append(my_lp)
|
198
|
-
else:
|
199
|
-
# Create a new element for the LP layer
|
200
|
-
my_lp_ele = etree.Element('linguisticProcessors')
|
201
|
-
my_lp_ele.set('layer',layer)
|
202
|
-
my_lp_ele.append(my_lp)
|
203
|
-
#my_lp_ele.tail=my_lp_ele.text='\n'
|
204
|
-
## Should be inserted after the last linguisticProcessor element (stored in variable element)
|
205
|
-
idx = kaf_header.index(element)
|
206
|
-
kaf_header.insert(idx+1,my_lp_ele)
|
207
|
-
|
208
|
-
|
209
|
-
def addLayer(self,type,element,first_char_id=None):
|
210
|
-
if first_char_id is None:
|
211
|
-
first_char_id = type[0]
|
212
|
-
|
213
|
-
## Check if there is already layer for the type
|
214
|
-
layer_element = self.tree.find(type)
|
215
|
-
|
216
|
-
if layer_element is None:
|
217
|
-
layer_element = etree.Element(type)
|
218
|
-
self.tree.getroot().append(layer_element)
|
219
|
-
## The id is going to be the first one
|
220
|
-
new_id = first_char_id+'1'
|
221
|
-
else:
|
222
|
-
## We need to know how many elements there are in the layer
|
223
|
-
current_n = len(layer_element.getchildren())
|
224
|
-
new_id = first_char_id+''+str(current_n+1)
|
225
|
-
|
226
|
-
|
227
|
-
## In this point layer_element points to the correct element, existing or created
|
228
|
-
|
229
|
-
element.set(first_char_id+'id',new_id)
|
230
|
-
layer_element.append(element)
|
231
|
-
return new_id
|
232
|
-
|
233
|
-
def addElementToLayer(self,layer, element,first_char_id=None):
|
234
|
-
return self.addLayer(layer,element,first_char_id)
|
235
|
-
|
236
|
-
def add_attrs_to_layer(self,layer,attrs):
|
237
|
-
layer_element = self.tree.find(layer)
|
238
|
-
if layer_element is not None:
|
239
|
-
for att, val in attrs.items():
|
240
|
-
layer_element.set(att,val)
|
241
|
-
|
242
|
-
|
243
|
-
def addAttributeToElement(self,path,str_id, id, attribute, value,sub_path=None):
|
244
|
-
for element in self.tree.findall(path):
|
245
|
-
if id is not None and element.get(str_id,None) == id:
|
246
|
-
if sub_path is not None:
|
247
|
-
elements = element.findall(sub_path)
|
248
|
-
if len(elements)!=0: element = elements[0]
|
249
|
-
element.set(attribute,value)
|
250
|
-
return
|
251
|
-
|
252
|
-
|
253
|
-
## This works with the original definition of the property layer
|
254
|
-
## KAF -> properties -> property* -> span* -> target*
|
255
|
-
def getSingleProperties_old(self):
|
256
|
-
for element in self.tree.findall('properties/property'):
|
257
|
-
my_id = element.get('pid')
|
258
|
-
my_type = element.get('type')
|
259
|
-
ref = element.find('references')
|
260
|
-
if ref is not None:
|
261
|
-
element = ref
|
262
|
-
for span_element in element.findall('span'):
|
263
|
-
target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
|
264
|
-
my_prop = KafSingleProperty(my_id,my_type,target_ids)
|
265
|
-
yield my_prop
|
266
|
-
|
267
|
-
## 18-June-2013
|
268
|
-
def getSingleProperties(self):
|
269
|
-
for property in self.tree.findall('features/properties/property'):
|
270
|
-
my_id = property.get('pid')
|
271
|
-
if my_id is None:
|
272
|
-
my_id = property.get('fpid')
|
273
|
-
my_type = property.get('lemma')
|
274
|
-
for span_element in property.findall('references/span'):
|
275
|
-
target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
|
276
|
-
my_prop = KafSingleProperty(my_id,my_type,target_ids)
|
277
|
-
yield my_prop
|
278
|
-
|
279
|
-
# This function adds a new property of the type given with the list of ids given
|
280
|
-
# my_type -> 'sleeping comfort' list_ids = ['id1','id2']
|
281
|
-
# It creates the features/properties layers in case
|
282
|
-
# Agglomerates all the properties for the same TYPE under the same property element
|
283
|
-
# It calculates automatically the number for the identifier depending on the number
|
284
|
-
# of properties existing
|
285
|
-
def add_property(self,my_type,list_ids,comment=None):
|
286
|
-
|
287
|
-
#Looking for feature layer or creating it
|
288
|
-
feature_layer = self.tree.find('features')
|
289
|
-
if feature_layer is None:
|
290
|
-
feature_layer = etree.Element('features')
|
291
|
-
self.tree.getroot().append(feature_layer)
|
292
|
-
|
293
|
-
#Looking for properties layer
|
294
|
-
properties_layer = feature_layer.find('properties')
|
295
|
-
if properties_layer is None:
|
296
|
-
properties_layer = etree.Element('properties')
|
297
|
-
feature_layer.append(properties_layer)
|
298
|
-
|
299
|
-
num_props = 0
|
300
|
-
property_layer = None
|
301
|
-
for property in properties_layer.findall('property'):
|
302
|
-
num_props += 1
|
303
|
-
prop_type = property.get('lemma')
|
304
|
-
if prop_type == my_type:
|
305
|
-
property_layer = property
|
306
|
-
break
|
307
|
-
|
308
|
-
if property_layer is None: # There is no any property for that type, let's create one
|
309
|
-
property_layer = etree.Element('property')
|
310
|
-
property_layer.set('pid','p'+str(num_props+1))
|
311
|
-
property_layer.set('lemma',my_type)
|
312
|
-
properties_layer.append(property_layer)
|
313
|
-
|
314
|
-
|
315
|
-
references = property_layer.find('references')
|
316
|
-
if references is None:
|
317
|
-
references = etree.Element('references')
|
318
|
-
property_layer.append(references)
|
319
|
-
## Create the new span
|
320
|
-
if comment is not None:
|
321
|
-
references.append(etree.Comment(comment))
|
322
|
-
span = etree.Element('span')
|
323
|
-
references.append(span)
|
324
|
-
for my_id in list_ids:
|
325
|
-
span.append(etree.Element('target',attrib={'id':my_id}))
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
def getSingleEntities(self):
|
331
|
-
for element in self.tree.findall('entities/entity'):
|
332
|
-
my_id = element.get('eid')
|
333
|
-
my_type = element.get('type')
|
334
|
-
my_path_to_span = None
|
335
|
-
ref = element.find('references')
|
336
|
-
if ref is not None:
|
337
|
-
my_path_to_span = 'references/span'
|
338
|
-
else:
|
339
|
-
my_path_to_span = 'span'
|
340
|
-
|
341
|
-
for span_element in element.findall(my_path_to_span):
|
342
|
-
target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
|
343
|
-
my_prop = KafSingleEntity(my_id,my_type,target_ids)
|
344
|
-
yield my_prop
|
345
|
-
|
346
|
-
|
347
|
-
def getOpinions(self):
|
348
|
-
for element in self.tree.findall('opinions/opinion'):
|
349
|
-
my_id = element.get('oid')
|
350
|
-
|
351
|
-
tar_ids_hol = []
|
352
|
-
tar_ids_tar = []
|
353
|
-
polarity = strenght = ''
|
354
|
-
tar_ids_exp = []
|
355
|
-
|
356
|
-
#Holder
|
357
|
-
opi_hol_eles = element.findall('opinion_holder')
|
358
|
-
if len(opi_hol_eles)!=0:
|
359
|
-
opi_hol_ele = opi_hol_eles[0]
|
360
|
-
tar_ids_hol = [t_ele.get('id') for t_ele in opi_hol_ele.findall('span/target')]
|
361
|
-
|
362
|
-
#Target
|
363
|
-
opi_tar_eles = element.findall('opinion_target')
|
364
|
-
if len(opi_tar_eles) != 0:
|
365
|
-
opi_tar_ele = opi_tar_eles[0]
|
366
|
-
tar_ids_tar = [t_ele.get('id') for t_ele in opi_tar_ele.findall('span/target')]
|
367
|
-
|
368
|
-
## Opinion expression
|
369
|
-
opi_exp_eles = element.findall('opinion_expression')
|
370
|
-
if len(opi_exp_eles) != 0:
|
371
|
-
opi_exp_ele = opi_exp_eles[0]
|
372
|
-
polarity = opi_exp_ele.get('polarity','')
|
373
|
-
strength = opi_exp_ele.get('strength','')
|
374
|
-
tar_ids_exp = [t_ele.get('id') for t_ele in opi_exp_ele.findall('span/target')]
|
375
|
-
|
376
|
-
yield KafOpinion(my_id,tar_ids_hol, tar_ids_tar, KafOpinionExpression(polarity, strength,tar_ids_exp))
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
def remove_opinion_layer(self):
|
381
|
-
opinion_layer = self.tree.find('opinions')
|
382
|
-
if opinion_layer is not None:
|
383
|
-
self.tree.getroot().remove(opinion_layer)
|
384
|
-
|
385
|
-
## This function add an opinion to the opinion layer, creating it if does not exist
|
386
|
-
## The id is calculated automatically according to the number of elements and ensring there is no repetition
|
387
|
-
def add_opinion(self,hol_ids,tar_ids,polarity,strength,exp_ids):
|
388
|
-
|
389
|
-
#Looking for opinion layer or creating it
|
390
|
-
opinion_layer = self.tree.find('opinions')
|
391
|
-
if opinion_layer is None:
|
392
|
-
opinion_layer = etree.Element('opinions')
|
393
|
-
self.tree.getroot().append(opinion_layer)
|
394
|
-
|
395
|
-
## Generating unique id
|
396
|
-
list_of_oids = [opi.get('oid') for opi in opinion_layer]
|
397
|
-
|
398
|
-
n = 1
|
399
|
-
while True:
|
400
|
-
my_id = 'o'+str(n)
|
401
|
-
if my_id not in list_of_oids:
|
402
|
-
break
|
403
|
-
n += 1
|
404
|
-
#####
|
405
|
-
|
406
|
-
op_ele = etree.Element('opinion')
|
407
|
-
opinion_layer.append(op_ele)
|
408
|
-
op_ele.set('oid',my_id)
|
409
|
-
|
410
|
-
## Holder
|
411
|
-
op_hol = etree.Element('opinion_holder')
|
412
|
-
op_ele.append(op_hol)
|
413
|
-
span_op_hol = etree.Element('span')
|
414
|
-
op_hol.append(span_op_hol)
|
415
|
-
for my_id in hol_ids:
|
416
|
-
span_op_hol.append(etree.Element('target',attrib={'id':my_id}))
|
417
|
-
|
418
|
-
## TARGET
|
419
|
-
op_tar = etree.Element('opinion_target')
|
420
|
-
op_ele.append(op_tar)
|
421
|
-
span_op_tar = etree.Element('span')
|
422
|
-
op_tar.append(span_op_tar)
|
423
|
-
for my_id in tar_ids:
|
424
|
-
span_op_tar.append(etree.Element('target',attrib={'id':my_id}))
|
425
|
-
|
426
|
-
## Expression
|
427
|
-
|
428
|
-
op_exp = etree.Element('opinion_expression',attrib={'polarity':polarity,
|
429
|
-
'strength':str(strength)})
|
430
|
-
op_ele.append(op_exp)
|
431
|
-
span_exp = etree.Element('span')
|
432
|
-
op_exp.append(span_exp)
|
433
|
-
for my_id in exp_ids:
|
434
|
-
span_exp.append(etree.Element('target',attrib={'id':my_id}))
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|