opener-property-tagger 2.1.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/README.md +14 -17
- data/bin/property-tagger-daemon +10 -0
- data/core/hotel_property_tagger_nl_en.py +4 -3
- data/exec/property-tagger.rb +9 -0
- data/lib/opener/property_tagger/version.rb +1 -1
- data/opener-property-tagger.gemspec +5 -3
- metadata +50 -40
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
checksums.yaml
CHANGED
|
@@ -1,15 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
|
|
5
|
-
data.tar.gz: !binary |-
|
|
6
|
-
ZTdmYmMwMGViYTc3MDcyOTRjOTQzM2M4MjMyZTU1NDJjMjUxNWNlNg==
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 01266e13df09e19768ca16aba60a5dde22334d38
|
|
4
|
+
data.tar.gz: 73fa035f1949b9a45efdd28cbe2fa51edf41a418
|
|
7
5
|
SHA512:
|
|
8
|
-
metadata.gz:
|
|
9
|
-
|
|
10
|
-
NGVmYmNjNTZhYjZjMDFjNWYzMDhjODE5MWRiMzI1MTg2MTIxODE5ZTBkMWE1
|
|
11
|
-
N2Y2OWExNWZlOTQzNjEyYmRiZTgzMjUyMjI4Y2U4NTAzODAzNDc=
|
|
12
|
-
data.tar.gz: !binary |-
|
|
13
|
-
Y2M2ZWQyMWYyZDhhYjJjOTYwMmQyYWI1ODljNTgxNzU2YTU1YjhkYzQyMmFi
|
|
14
|
-
MTEzYjg2MzM1Zjk1MzUwMDM5NGRiMmZiMTU3ZWM0MWUwNDNjNzQwYTk4Mjc2
|
|
15
|
-
MTg1NzhmY2UyMzI3Yjg5ZGVkNmFlZTg0YmRlZTgyODI2MTQyNjA=
|
|
6
|
+
metadata.gz: c0317a20e5d4aca38d4cdfde47a9911143b024bdab221c00714f81079231093cdf627866744a18a73ab38f278fc8bea451195ba9f7ac928ac19763a5e63c1c38
|
|
7
|
+
data.tar.gz: c83585d02d72f38cc6f3ebf2386ad6c399772b1e4e094595b1a7b3d3ab3f6918ea53894f4b5dfa6f932053d11b1398ef8e2503e412808e21a367e299cacbd7d3
|
data/README.md
CHANGED
|
@@ -11,7 +11,7 @@ the input KAF file, from the lang attribute of the KAF element (make sure your
|
|
|
11
11
|
|
|
12
12
|
This software is part of a larger collection of natural language processing
|
|
13
13
|
tools known as "the OpeNER project". You can find more information about the
|
|
14
|
-
project at
|
|
14
|
+
project at the [OpeNER portal](http://opener-project.github.io). There you can
|
|
15
15
|
also find references to terms like KAF (an XML standard to represent linguistic
|
|
16
16
|
annotations in texts), component, cores, scenario's and pipelines.
|
|
17
17
|
|
|
@@ -112,10 +112,8 @@ Description of dependencies
|
|
|
112
112
|
---------------------------
|
|
113
113
|
|
|
114
114
|
This component runs best if you run it in an environment suited for OpeNER
|
|
115
|
-
components. You can find an installation guide and helper tools in the (
|
|
116
|
-
|
|
117
|
-
installation guide on the Opener
|
|
118
|
-
Website)[http://opener-project.github.io/getting-started/how-to/local-installation.html]
|
|
115
|
+
components. You can find an installation guide and helper tools in the [OpeNER installer](https://github.com/opener-project/opener-installer) and an
|
|
116
|
+
[installation guide on the Opener Website](http://opener-project.github.io/getting-started/how-to/local-installation.html)
|
|
119
117
|
|
|
120
118
|
At least you need the following system setup:
|
|
121
119
|
|
|
@@ -135,11 +133,11 @@ Domain Adaption and Language Extension
|
|
|
135
133
|
|
|
136
134
|
The lexicons in the resource path must be stored in a file and follow this format:
|
|
137
135
|
|
|
138
|
-
|
|
139
|
-
surf verb facilities
|
|
140
|
-
surfer noun facilities
|
|
141
|
-
surfing verb facilities
|
|
142
|
-
|
|
136
|
+
shell
|
|
137
|
+
surf verb facilities
|
|
138
|
+
surfer noun facilities
|
|
139
|
+
surfing verb facilities
|
|
140
|
+
|
|
143
141
|
|
|
144
142
|
So, one aspect per line, with 3 fields separated by a tabulator, the first one is the word or span of words (in this case use whitespaces), then the part
|
|
145
143
|
of speech (which actually it is not use, you can include a dummy label) and finally the aspect class associated with the word.
|
|
@@ -153,21 +151,20 @@ can find the core technolies (python) in the ```/core``` directory.
|
|
|
153
151
|
Where to go from here
|
|
154
152
|
---------------------
|
|
155
153
|
|
|
156
|
-
* Check
|
|
157
|
-
*
|
|
154
|
+
* [Check the project websitere](http://opener-project.github.io)
|
|
155
|
+
* [Checkout the webservice](http://opener.olery.com/property-tagger)
|
|
158
156
|
|
|
159
157
|
Report problem/Get help
|
|
160
158
|
-----------------------
|
|
161
159
|
|
|
162
|
-
If you encounter problems, please email support@opener-project.eu or leave an
|
|
163
|
-
issue in the
|
|
164
|
-
|
|
160
|
+
If you encounter problems, please email <support@opener-project.eu> or leave an
|
|
161
|
+
issue in the [issue tracker](https://github.com/opener-project/property-tagger/issues).
|
|
165
162
|
|
|
166
163
|
Contributing
|
|
167
164
|
------------
|
|
168
165
|
|
|
169
|
-
1. Fork it
|
|
170
|
-
2. Create your feature branch (`git checkout -b my-new-feature`)
|
|
166
|
+
1. Fork it <http://github.com/opener-project/property-tagger/fork>
|
|
167
|
+
2. 2. Create your feature branch (`git checkout -b my-new-feature`)
|
|
171
168
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
|
172
169
|
4. Push to the branch (`git push origin my-new-feature`)
|
|
173
170
|
5. Create new Pull Request
|
|
@@ -10,7 +10,7 @@ this_folder = os.path.dirname(os.path.realpath(__file__))
|
|
|
10
10
|
|
|
11
11
|
# This updates the load path to ensure that the local site-packages directory
|
|
12
12
|
# can be used to load packages (e.g. a locally installed copy of lxml).
|
|
13
|
-
sys.path.append(os.path.join(this_folder, 'site-packages/
|
|
13
|
+
sys.path.append(os.path.join(this_folder, 'site-packages/pre_install'))
|
|
14
14
|
|
|
15
15
|
from VUKafParserPy import KafParser
|
|
16
16
|
from lxml import etree
|
|
@@ -48,8 +48,9 @@ def loadAspects(my_lang,this_file=None):
|
|
|
48
48
|
fic = codecs.open(aspects_filename,'r','utf-8')
|
|
49
49
|
for line in fic:
|
|
50
50
|
fields = line.strip().split('\t')
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
if len(fields) == 3:
|
|
52
|
+
lemma,pos,aspect = fields
|
|
53
|
+
my_aspects[lemma] = aspect
|
|
53
54
|
fic.close()
|
|
54
55
|
return aspects_filename, my_aspects
|
|
55
56
|
########################################
|
|
@@ -20,18 +20,20 @@ Gem::Specification.new do |gem|
|
|
|
20
20
|
'config.ru',
|
|
21
21
|
'*.gemspec',
|
|
22
22
|
'*_requirements.txt',
|
|
23
|
-
'README.md'
|
|
23
|
+
'README.md',
|
|
24
|
+
'exec/**/*'
|
|
24
25
|
]).select { |file| File.file?(file) }
|
|
25
26
|
|
|
26
27
|
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
|
27
28
|
|
|
28
|
-
gem.add_dependency 'opener-build-tools', ['>= 0.
|
|
29
|
+
gem.add_dependency 'opener-build-tools', ['>= 1.0.1']
|
|
29
30
|
gem.add_dependency 'rake'
|
|
30
31
|
gem.add_dependency 'sinatra'
|
|
31
32
|
gem.add_dependency 'httpclient'
|
|
32
33
|
gem.add_dependency 'puma'
|
|
34
|
+
gem.add_dependency 'opener-daemons'
|
|
33
35
|
gem.add_dependency 'opener-webservice'
|
|
34
|
-
gem.add_dependency 'opener-core'
|
|
36
|
+
gem.add_dependency 'opener-core', ['>= 0.1.2']
|
|
35
37
|
|
|
36
38
|
gem.add_development_dependency 'rspec'
|
|
37
39
|
gem.add_development_dependency 'cucumber'
|
metadata
CHANGED
|
@@ -1,175 +1,184 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: opener-property-tagger
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.1.
|
|
4
|
+
version: 2.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- development@olery.com
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-05-
|
|
11
|
+
date: 2014-05-23 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: opener-build-tools
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- -
|
|
17
|
+
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 0.
|
|
19
|
+
version: 1.0.1
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
|
-
- -
|
|
24
|
+
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 0.
|
|
26
|
+
version: 1.0.1
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
28
|
name: rake
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
|
-
- -
|
|
31
|
+
- - ">="
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
33
|
version: '0'
|
|
34
34
|
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
|
-
- -
|
|
38
|
+
- - ">="
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: '0'
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
42
|
name: sinatra
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
44
44
|
requirements:
|
|
45
|
-
- -
|
|
45
|
+
- - ">="
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
47
|
version: '0'
|
|
48
48
|
type: :runtime
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
|
-
- -
|
|
52
|
+
- - ">="
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '0'
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: httpclient
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
58
58
|
requirements:
|
|
59
|
-
- -
|
|
59
|
+
- - ">="
|
|
60
60
|
- !ruby/object:Gem::Version
|
|
61
61
|
version: '0'
|
|
62
62
|
type: :runtime
|
|
63
63
|
prerelease: false
|
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
|
66
|
-
- -
|
|
66
|
+
- - ">="
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
68
|
version: '0'
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
70
|
name: puma
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
|
72
72
|
requirements:
|
|
73
|
-
- -
|
|
73
|
+
- - ">="
|
|
74
74
|
- !ruby/object:Gem::Version
|
|
75
75
|
version: '0'
|
|
76
76
|
type: :runtime
|
|
77
77
|
prerelease: false
|
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
|
79
79
|
requirements:
|
|
80
|
-
- -
|
|
80
|
+
- - ">="
|
|
81
81
|
- !ruby/object:Gem::Version
|
|
82
82
|
version: '0'
|
|
83
83
|
- !ruby/object:Gem::Dependency
|
|
84
|
-
name: opener-
|
|
84
|
+
name: opener-daemons
|
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
|
86
86
|
requirements:
|
|
87
|
-
- -
|
|
87
|
+
- - ">="
|
|
88
88
|
- !ruby/object:Gem::Version
|
|
89
89
|
version: '0'
|
|
90
90
|
type: :runtime
|
|
91
91
|
prerelease: false
|
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
|
93
93
|
requirements:
|
|
94
|
-
- -
|
|
94
|
+
- - ">="
|
|
95
95
|
- !ruby/object:Gem::Version
|
|
96
96
|
version: '0'
|
|
97
97
|
- !ruby/object:Gem::Dependency
|
|
98
|
-
name: opener-
|
|
98
|
+
name: opener-webservice
|
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
|
100
100
|
requirements:
|
|
101
|
-
- -
|
|
101
|
+
- - ">="
|
|
102
102
|
- !ruby/object:Gem::Version
|
|
103
103
|
version: '0'
|
|
104
104
|
type: :runtime
|
|
105
105
|
prerelease: false
|
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
|
107
107
|
requirements:
|
|
108
|
-
- -
|
|
108
|
+
- - ">="
|
|
109
109
|
- !ruby/object:Gem::Version
|
|
110
110
|
version: '0'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: opener-core
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - ">="
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: 0.1.2
|
|
118
|
+
type: :runtime
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - ">="
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: 0.1.2
|
|
111
125
|
- !ruby/object:Gem::Dependency
|
|
112
126
|
name: rspec
|
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
|
114
128
|
requirements:
|
|
115
|
-
- -
|
|
129
|
+
- - ">="
|
|
116
130
|
- !ruby/object:Gem::Version
|
|
117
131
|
version: '0'
|
|
118
132
|
type: :development
|
|
119
133
|
prerelease: false
|
|
120
134
|
version_requirements: !ruby/object:Gem::Requirement
|
|
121
135
|
requirements:
|
|
122
|
-
- -
|
|
136
|
+
- - ">="
|
|
123
137
|
- !ruby/object:Gem::Version
|
|
124
138
|
version: '0'
|
|
125
139
|
- !ruby/object:Gem::Dependency
|
|
126
140
|
name: cucumber
|
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
|
128
142
|
requirements:
|
|
129
|
-
- -
|
|
143
|
+
- - ">="
|
|
130
144
|
- !ruby/object:Gem::Version
|
|
131
145
|
version: '0'
|
|
132
146
|
type: :development
|
|
133
147
|
prerelease: false
|
|
134
148
|
version_requirements: !ruby/object:Gem::Requirement
|
|
135
149
|
requirements:
|
|
136
|
-
- -
|
|
150
|
+
- - ">="
|
|
137
151
|
- !ruby/object:Gem::Version
|
|
138
152
|
version: '0'
|
|
139
153
|
description: Property tagger for hotels in Dutch and English.
|
|
140
154
|
email:
|
|
141
155
|
executables:
|
|
142
156
|
- property-tagger
|
|
157
|
+
- property-tagger-daemon
|
|
143
158
|
- property-tagger-server
|
|
144
159
|
extensions:
|
|
145
160
|
- ext/hack/Rakefile
|
|
146
161
|
extra_rdoc_files: []
|
|
147
162
|
files:
|
|
148
|
-
-
|
|
149
|
-
-
|
|
150
|
-
-
|
|
151
|
-
-
|
|
152
|
-
-
|
|
153
|
-
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO
|
|
154
|
-
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt
|
|
155
|
-
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt
|
|
163
|
+
- README.md
|
|
164
|
+
- bin/property-tagger
|
|
165
|
+
- bin/property-tagger-daemon
|
|
166
|
+
- bin/property-tagger-server
|
|
167
|
+
- config.ru
|
|
156
168
|
- core/extract_aspects.py
|
|
157
169
|
- core/hotel_property_tagger_nl_en.py
|
|
170
|
+
- exec/property-tagger.rb
|
|
158
171
|
- ext/hack/Rakefile
|
|
159
172
|
- ext/hack/support.rb
|
|
173
|
+
- lib/opener/property_tagger.rb
|
|
160
174
|
- lib/opener/property_tagger/cli.rb
|
|
161
175
|
- lib/opener/property_tagger/public/markdown.css
|
|
162
176
|
- lib/opener/property_tagger/server.rb
|
|
163
177
|
- lib/opener/property_tagger/version.rb
|
|
164
178
|
- lib/opener/property_tagger/views/index.erb
|
|
165
179
|
- lib/opener/property_tagger/views/result.erb
|
|
166
|
-
- lib/opener/property_tagger.rb
|
|
167
|
-
- config.ru
|
|
168
180
|
- opener-property-tagger.gemspec
|
|
169
181
|
- pre_install_requirements.txt
|
|
170
|
-
- README.md
|
|
171
|
-
- bin/property-tagger
|
|
172
|
-
- bin/property-tagger-server
|
|
173
182
|
homepage: http://opener-project.github.com/
|
|
174
183
|
licenses: []
|
|
175
184
|
metadata: {}
|
|
@@ -179,18 +188,19 @@ require_paths:
|
|
|
179
188
|
- lib
|
|
180
189
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
181
190
|
requirements:
|
|
182
|
-
- -
|
|
191
|
+
- - ">="
|
|
183
192
|
- !ruby/object:Gem::Version
|
|
184
193
|
version: 1.9.2
|
|
185
194
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
186
195
|
requirements:
|
|
187
|
-
- -
|
|
196
|
+
- - ">="
|
|
188
197
|
- !ruby/object:Gem::Version
|
|
189
198
|
version: '0'
|
|
190
199
|
requirements: []
|
|
191
200
|
rubyforge_project:
|
|
192
|
-
rubygems_version: 2.
|
|
201
|
+
rubygems_version: 2.2.2
|
|
193
202
|
signing_key:
|
|
194
203
|
specification_version: 4
|
|
195
204
|
summary: Property tagger for hotels in Dutch and English.
|
|
196
205
|
test_files: []
|
|
206
|
+
has_rdoc:
|
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
class KafTermSentiment:
|
|
2
|
-
def __init__(self):
|
|
3
|
-
self.resource=None
|
|
4
|
-
self.polarity=None
|
|
5
|
-
self.strength=None
|
|
6
|
-
self.subjectivity=None
|
|
7
|
-
|
|
8
|
-
def simpleInit(self,r,p,st,su,sm=None):
|
|
9
|
-
self.resource=r
|
|
10
|
-
self.polarity=p
|
|
11
|
-
self.strength=st
|
|
12
|
-
self.subjectivity=su
|
|
13
|
-
self.sentiment_modifier = sm
|
|
14
|
-
|
|
15
|
-
def getPolarity(self):
|
|
16
|
-
return self.polarity
|
|
17
|
-
|
|
18
|
-
def getSentimentModifier(self):
|
|
19
|
-
return self.sentiment_modifier
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class KafToken:
|
|
23
|
-
def __init__(self,wid, value, sent=None, para=None):
|
|
24
|
-
self.token_id = wid
|
|
25
|
-
self.value = value
|
|
26
|
-
self.sent = sent
|
|
27
|
-
self.para = para
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class KafOpinionExpression:
|
|
31
|
-
def __init__(self,polarity,strength,targets):
|
|
32
|
-
self.polarity = polarity
|
|
33
|
-
self.strength = strength
|
|
34
|
-
self.targets = targets
|
|
35
|
-
|
|
36
|
-
def __str__(self):
|
|
37
|
-
return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
|
|
38
|
-
|
|
39
|
-
class KafOpinion:
|
|
40
|
-
def __init__(self,id,holders, targets, opi_exp):
|
|
41
|
-
self.id = id
|
|
42
|
-
self.holders = holders
|
|
43
|
-
self.targets = targets
|
|
44
|
-
self.opi_exp = opi_exp
|
|
45
|
-
|
|
46
|
-
def __str__(self):
|
|
47
|
-
c='Opinion id'+self.id+'\n'
|
|
48
|
-
c+=' Holders: '+'-'.join(self.holders)+'\n'
|
|
49
|
-
c+=' Targets: '+'-'.join(self.targets)+'\n'
|
|
50
|
-
c+=str(self.opi_exp)
|
|
51
|
-
return c
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class KafSingleProperty:
|
|
56
|
-
def __init__(self,id,type,targets):
|
|
57
|
-
self.id = id
|
|
58
|
-
self.type = type
|
|
59
|
-
self.targets = targets
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def get_id(self):
|
|
63
|
-
return self.id
|
|
64
|
-
|
|
65
|
-
def get_type(self):
|
|
66
|
-
return self.type
|
|
67
|
-
|
|
68
|
-
def get_span(self):
|
|
69
|
-
return self.targets
|
|
70
|
-
|
|
71
|
-
def __str__(self):
|
|
72
|
-
return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
class KafSingleEntity:
|
|
76
|
-
def __init__(self,id,type,targets):
|
|
77
|
-
self.id = id
|
|
78
|
-
self.type = type
|
|
79
|
-
self.targets = targets
|
|
80
|
-
|
|
81
|
-
def get_id(self):
|
|
82
|
-
return self.id
|
|
83
|
-
|
|
84
|
-
def get_type(self):
|
|
85
|
-
return self.type
|
|
86
|
-
|
|
87
|
-
def get_span(self):
|
|
88
|
-
return self.targets
|
|
89
|
-
|
|
90
|
-
def __str__(self):
|
|
91
|
-
return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
|
|
92
|
-
|
|
93
|
-
class KafTerm:
|
|
94
|
-
def __init__(self):
|
|
95
|
-
self.tid = None
|
|
96
|
-
self.lemma = None
|
|
97
|
-
self.pos = None
|
|
98
|
-
self.morphofeat = None
|
|
99
|
-
self.sentiment = None
|
|
100
|
-
self.list_span_id = []
|
|
101
|
-
|
|
102
|
-
def get_morphofeat(self):
|
|
103
|
-
return self.morphofeat
|
|
104
|
-
|
|
105
|
-
def set_list_span_id(self, L):
|
|
106
|
-
self.list_span_id = L
|
|
107
|
-
|
|
108
|
-
def get_list_span(self):
|
|
109
|
-
return self.list_span_id
|
|
110
|
-
|
|
111
|
-
def get_polarity(self):
|
|
112
|
-
if self.sentiment != None:
|
|
113
|
-
return self.sentiment.getPolarity()
|
|
114
|
-
else:
|
|
115
|
-
return None
|
|
116
|
-
|
|
117
|
-
def get_sentiment_modifier(self):
|
|
118
|
-
if self.sentiment != None:
|
|
119
|
-
return self.sentiment.getSentimentModifier()
|
|
120
|
-
else:
|
|
121
|
-
return None
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
def setSentiment(self,my_sent):
|
|
125
|
-
self.sentiment = my_sent
|
|
126
|
-
|
|
127
|
-
def getSentiment(self):
|
|
128
|
-
return self.sentiment
|
|
129
|
-
|
|
130
|
-
def getLemma(self):
|
|
131
|
-
return self.lemma
|
|
132
|
-
|
|
133
|
-
def setLemma(self,lemma):
|
|
134
|
-
self.lemma = lemma
|
|
135
|
-
|
|
136
|
-
def getPos(self):
|
|
137
|
-
return self.pos
|
|
138
|
-
|
|
139
|
-
def setPos(self,pos):
|
|
140
|
-
self.pos = pos
|
|
141
|
-
|
|
142
|
-
def getId(self):
|
|
143
|
-
return self.tid
|
|
144
|
-
|
|
145
|
-
def setId(self,id):
|
|
146
|
-
self.tid = id
|
|
147
|
-
|
|
148
|
-
def getShortPos(self):
|
|
149
|
-
if self.pos==None:
|
|
150
|
-
return None
|
|
151
|
-
auxpos=self.pos.lower()[0]
|
|
152
|
-
if auxpos == 'g': auxpos='a'
|
|
153
|
-
elif auxpos == 'a': auxpos='r'
|
|
154
|
-
return auxpos
|
|
155
|
-
|
|
156
|
-
def __str__(self):
|
|
157
|
-
if self.tid and self.lemma and self.pos:
|
|
158
|
-
return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
|
|
159
|
-
else:
|
|
160
|
-
return 'None'
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
@@ -1,439 +0,0 @@
|
|
|
1
|
-
########################################################################
|
|
2
|
-
# 14 Jan 2013: added function add_attrs_to_layer
|
|
3
|
-
########################################################################
|
|
4
|
-
|
|
5
|
-
###################
|
|
6
|
-
# List of changes #
|
|
7
|
-
###################
|
|
8
|
-
# 14 Jan 2013: added function add_attrs_to_layer
|
|
9
|
-
# 27 Feb 2013: added code for comply with DTD
|
|
10
|
-
# 18 Jun 2013: getSingleProperties adapted to the structure KAF/features/properties/property/references/span/target
|
|
11
|
-
# 18 Jun 2013: funcion add_property created for adding the properties to the KAF
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
from lxml import etree
|
|
15
|
-
from KafDataObjectsMod import *
|
|
16
|
-
import time
|
|
17
|
-
|
|
18
|
-
class KafParser:
|
|
19
|
-
def __init__(self,filename=None):
|
|
20
|
-
self.tree=None
|
|
21
|
-
self.__pathForToken={}
|
|
22
|
-
self.__term_ids_for_token_id = None
|
|
23
|
-
|
|
24
|
-
if filename:
|
|
25
|
-
#self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
|
|
26
|
-
self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True, strip_cdata=False))
|
|
27
|
-
## Do the text tokenization
|
|
28
|
-
self.__textTokenization()
|
|
29
|
-
else:
|
|
30
|
-
root = etree.Element('KAF')
|
|
31
|
-
root.set('version','v1.opener')
|
|
32
|
-
root.set('{http://www.w3.org/XML/1998/namespace}lang','en')
|
|
33
|
-
self.tree = etree.ElementTree(element=root)
|
|
34
|
-
|
|
35
|
-
def __textTokenization(self):
|
|
36
|
-
for wf in self.tree.findall('text/wf'):
|
|
37
|
-
wid = wf.get('wid')
|
|
38
|
-
self.__pathForToken[wid] = self.tree.getpath(wf)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def getToken(self,tid):
|
|
42
|
-
if tid in self.__pathForToken:
|
|
43
|
-
path = self.__pathForToken[tid]
|
|
44
|
-
return self.tree.xpath(self.__pathForToken[tid])[0]
|
|
45
|
-
return None
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def getLanguage(self):
|
|
49
|
-
lang = self.tree.getroot().get('{http://www.w3.org/XML/1998/namespace}lang','nl')
|
|
50
|
-
return lang
|
|
51
|
-
|
|
52
|
-
## Return a list of (sentence_id, TOKENS) where tokens is a list of (token_id,token)
|
|
53
|
-
## [(s_id1, T1), (sent_id2, T2)....]
|
|
54
|
-
## T1 --> [(tokenid, token), (tokenid2,token2)....]
|
|
55
|
-
def get_tokens_in_sentences(self):
|
|
56
|
-
sents = []
|
|
57
|
-
current = []
|
|
58
|
-
previous_sent = None
|
|
59
|
-
for element in self.tree.findall('text/wf'):
|
|
60
|
-
w_id = element.get('wid')
|
|
61
|
-
s_id = element.get('sent')
|
|
62
|
-
word = element.text
|
|
63
|
-
|
|
64
|
-
if previous_sent is not None and s_id != previous_sent:
|
|
65
|
-
sents.append((previous_sent,current))
|
|
66
|
-
current = []
|
|
67
|
-
current.append((w_id,word))
|
|
68
|
-
previous_sent = s_id
|
|
69
|
-
####
|
|
70
|
-
sents.append((s_id,current))
|
|
71
|
-
return sents
|
|
72
|
-
|
|
73
|
-
def get_term_ids_for_token_id(self,tok_id):
|
|
74
|
-
if self.__term_ids_for_token_id is None:
|
|
75
|
-
self.__term_ids_for_token_id = {}
|
|
76
|
-
for element in self.tree.findall('terms/term'):
|
|
77
|
-
term_id = element.get('tid')
|
|
78
|
-
for target in element.findall('span/target'):
|
|
79
|
-
token_id = target.get('id')
|
|
80
|
-
if token_id not in self.__term_ids_for_token_id:
|
|
81
|
-
self.__term_ids_for_token_id[token_id] = [term_id]
|
|
82
|
-
else:
|
|
83
|
-
self.__term_ids_for_token_id[token_id].append(term_id)
|
|
84
|
-
return self.__term_ids_for_token_id.get(tok_id,[])
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def getTokens(self):
|
|
89
|
-
for element in self.tree.findall('text/wf'):
|
|
90
|
-
w_id = element.get('wid')
|
|
91
|
-
s_id = element.get('sent','0')
|
|
92
|
-
word = element.text
|
|
93
|
-
yield (word, s_id, w_id)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def getTerms(self):
|
|
98
|
-
if self.tree:
|
|
99
|
-
for element in self.tree.findall('terms/term'):
|
|
100
|
-
kafTermObj = KafTerm()
|
|
101
|
-
kafTermObj.setId(element.get('tid'))
|
|
102
|
-
kafTermObj.setLemma(element.get('lemma'))
|
|
103
|
-
kafTermObj.setPos(element.get('pos'))
|
|
104
|
-
kafTermObj.morphofeat = element.get('morphofeat')
|
|
105
|
-
|
|
106
|
-
## Parsing sentiment
|
|
107
|
-
sentiment = element.find('sentiment')
|
|
108
|
-
if sentiment is not None:
|
|
109
|
-
resource = sentiment.get('resource','')
|
|
110
|
-
polarity = sentiment.get('polarity',None)
|
|
111
|
-
strength = sentiment.get('strength','')
|
|
112
|
-
subjectivity = sentiment.get('subjectivity','')
|
|
113
|
-
sentiment_modifier = sentiment.get('sentiment_modifier')
|
|
114
|
-
|
|
115
|
-
my_sent = KafTermSentiment()
|
|
116
|
-
my_sent.simpleInit(resource,polarity,strength,subjectivity,sentiment_modifier)
|
|
117
|
-
kafTermObj.setSentiment(my_sent)
|
|
118
|
-
|
|
119
|
-
## Parsing the span
|
|
120
|
-
span = element.find('span')
|
|
121
|
-
if span is not None:
|
|
122
|
-
list_ids = [target.get('id') for target in span.findall('target')]
|
|
123
|
-
kafTermObj.set_list_span_id(list_ids)
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
yield kafTermObj
|
|
127
|
-
else:
|
|
128
|
-
return
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def getSentimentTriples(self):
|
|
132
|
-
data = []
|
|
133
|
-
if self.tree:
|
|
134
|
-
for term_element in self.tree.findall('terms/term'):
|
|
135
|
-
lemma = term_element.get('lemma')
|
|
136
|
-
polarity = None
|
|
137
|
-
sentiment_modifier = None
|
|
138
|
-
|
|
139
|
-
sentiment_element = term_element.find('sentiment')
|
|
140
|
-
if sentiment_element is not None:
|
|
141
|
-
polarity = sentiment_element.get('polarity',None)
|
|
142
|
-
sentiment_modifier = sentiment_element.get('sentiment_modifier')
|
|
143
|
-
data.append( (lemma,polarity,sentiment_modifier))
|
|
144
|
-
return data
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def addPolarityToTerm(self,termid,my_sentiment_attribs,polarity_pos=None):
|
|
149
|
-
if self.tree:
|
|
150
|
-
for element in self.tree.find('terms'):
|
|
151
|
-
if element.get('tid','')==termid:
|
|
152
|
-
|
|
153
|
-
#In case there is no pos info, we use the polarityPos
|
|
154
|
-
if not element.get('pos') and polarity_pos is not None:
|
|
155
|
-
element.set('pos',polarity_pos)
|
|
156
|
-
sentEle = etree.Element('sentiment',attrib=my_sentiment_attribs)
|
|
157
|
-
element.append(sentEle)
|
|
158
|
-
|
|
159
|
-
def saveToFile(self,filename,myencoding='UTF-8'):
|
|
160
|
-
if self.tree:
|
|
161
|
-
self.tree.write(filename,encoding=myencoding,pretty_print=True,xml_declaration=True)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def addLinguisticProcessor(self,name,version, layer, time_stamp=True):
|
|
165
|
-
aux = self.tree.findall('kafHeader')
|
|
166
|
-
if len(aux)!=0:
|
|
167
|
-
kaf_header = aux[0]
|
|
168
|
-
else:
|
|
169
|
-
kaf_header = etree.Element('kafHeader')
|
|
170
|
-
self.tree.getroot().insert(0,kaf_header)
|
|
171
|
-
|
|
172
|
-
aux2= kaf_header.findall('linguisticProcessors')
|
|
173
|
-
if len(aux2) == 0:
|
|
174
|
-
new_lp = etree.Element('linguisticProcessors')
|
|
175
|
-
new_lp.set('layer',layer)
|
|
176
|
-
kaf_header.append(new_lp)
|
|
177
|
-
|
|
178
|
-
## Check if there is already element for the layer
|
|
179
|
-
my_lp_ele = None
|
|
180
|
-
|
|
181
|
-
for element in kaf_header.findall('linguisticProcessors'):
|
|
182
|
-
if element.get('layer','')==layer:
|
|
183
|
-
my_lp_ele = element
|
|
184
|
-
break
|
|
185
|
-
|
|
186
|
-
if time_stamp:
|
|
187
|
-
my_time = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
|
|
188
|
-
else:
|
|
189
|
-
my_time = '*'
|
|
190
|
-
|
|
191
|
-
my_lp = etree.Element('lp')
|
|
192
|
-
my_lp.set('timestamp',my_time)
|
|
193
|
-
my_lp.set('version',version)
|
|
194
|
-
my_lp.set('name',name)
|
|
195
|
-
|
|
196
|
-
if my_lp_ele is not None: #Already an element for linguisticProcessor with the layer
|
|
197
|
-
my_lp_ele.append(my_lp)
|
|
198
|
-
else:
|
|
199
|
-
# Create a new element for the LP layer
|
|
200
|
-
my_lp_ele = etree.Element('linguisticProcessors')
|
|
201
|
-
my_lp_ele.set('layer',layer)
|
|
202
|
-
my_lp_ele.append(my_lp)
|
|
203
|
-
#my_lp_ele.tail=my_lp_ele.text='\n'
|
|
204
|
-
## Should be inserted after the last linguisticProcessor element (stored in variable element)
|
|
205
|
-
idx = kaf_header.index(element)
|
|
206
|
-
kaf_header.insert(idx+1,my_lp_ele)
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
def addLayer(self,type,element,first_char_id=None):
|
|
210
|
-
if first_char_id is None:
|
|
211
|
-
first_char_id = type[0]
|
|
212
|
-
|
|
213
|
-
## Check if there is already layer for the type
|
|
214
|
-
layer_element = self.tree.find(type)
|
|
215
|
-
|
|
216
|
-
if layer_element is None:
|
|
217
|
-
layer_element = etree.Element(type)
|
|
218
|
-
self.tree.getroot().append(layer_element)
|
|
219
|
-
## The id is going to be the first one
|
|
220
|
-
new_id = first_char_id+'1'
|
|
221
|
-
else:
|
|
222
|
-
## We need to know how many elements there are in the layer
|
|
223
|
-
current_n = len(layer_element.getchildren())
|
|
224
|
-
new_id = first_char_id+''+str(current_n+1)
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
## In this point layer_element points to the correct element, existing or created
|
|
228
|
-
|
|
229
|
-
element.set(first_char_id+'id',new_id)
|
|
230
|
-
layer_element.append(element)
|
|
231
|
-
return new_id
|
|
232
|
-
|
|
233
|
-
def addElementToLayer(self,layer, element,first_char_id=None):
|
|
234
|
-
return self.addLayer(layer,element,first_char_id)
|
|
235
|
-
|
|
236
|
-
def add_attrs_to_layer(self,layer,attrs):
|
|
237
|
-
layer_element = self.tree.find(layer)
|
|
238
|
-
if layer_element is not None:
|
|
239
|
-
for att, val in attrs.items():
|
|
240
|
-
layer_element.set(att,val)
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
def addAttributeToElement(self,path,str_id, id, attribute, value,sub_path=None):
|
|
244
|
-
for element in self.tree.findall(path):
|
|
245
|
-
if id is not None and element.get(str_id,None) == id:
|
|
246
|
-
if sub_path is not None:
|
|
247
|
-
elements = element.findall(sub_path)
|
|
248
|
-
if len(elements)!=0: element = elements[0]
|
|
249
|
-
element.set(attribute,value)
|
|
250
|
-
return
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
## This works with the original definition of the property layer
|
|
254
|
-
## KAF -> properties -> property* -> span* -> target*
|
|
255
|
-
def getSingleProperties_old(self):
|
|
256
|
-
for element in self.tree.findall('properties/property'):
|
|
257
|
-
my_id = element.get('pid')
|
|
258
|
-
my_type = element.get('type')
|
|
259
|
-
ref = element.find('references')
|
|
260
|
-
if ref is not None:
|
|
261
|
-
element = ref
|
|
262
|
-
for span_element in element.findall('span'):
|
|
263
|
-
target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
|
|
264
|
-
my_prop = KafSingleProperty(my_id,my_type,target_ids)
|
|
265
|
-
yield my_prop
|
|
266
|
-
|
|
267
|
-
## 18-June-2013
|
|
268
|
-
def getSingleProperties(self):
|
|
269
|
-
for property in self.tree.findall('features/properties/property'):
|
|
270
|
-
my_id = property.get('pid')
|
|
271
|
-
if my_id is None:
|
|
272
|
-
my_id = property.get('fpid')
|
|
273
|
-
my_type = property.get('lemma')
|
|
274
|
-
for span_element in property.findall('references/span'):
|
|
275
|
-
target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
|
|
276
|
-
my_prop = KafSingleProperty(my_id,my_type,target_ids)
|
|
277
|
-
yield my_prop
|
|
278
|
-
|
|
279
|
-
# This function adds a new property of the type given with the list of ids given
|
|
280
|
-
# my_type -> 'sleeping comfort' list_ids = ['id1','id2']
|
|
281
|
-
# It creates the features/properties layers in case
|
|
282
|
-
# Agglomerates all the properties for the same TYPE under the same property element
|
|
283
|
-
# It calculates automatically the number for the identifier depending on the number
|
|
284
|
-
# of properties existing
|
|
285
|
-
def add_property(self,my_type,list_ids,comment=None):
|
|
286
|
-
|
|
287
|
-
#Looking for feature layer or creating it
|
|
288
|
-
feature_layer = self.tree.find('features')
|
|
289
|
-
if feature_layer is None:
|
|
290
|
-
feature_layer = etree.Element('features')
|
|
291
|
-
self.tree.getroot().append(feature_layer)
|
|
292
|
-
|
|
293
|
-
#Looking for properties layer
|
|
294
|
-
properties_layer = feature_layer.find('properties')
|
|
295
|
-
if properties_layer is None:
|
|
296
|
-
properties_layer = etree.Element('properties')
|
|
297
|
-
feature_layer.append(properties_layer)
|
|
298
|
-
|
|
299
|
-
num_props = 0
|
|
300
|
-
property_layer = None
|
|
301
|
-
for property in properties_layer.findall('property'):
|
|
302
|
-
num_props += 1
|
|
303
|
-
prop_type = property.get('lemma')
|
|
304
|
-
if prop_type == my_type:
|
|
305
|
-
property_layer = property
|
|
306
|
-
break
|
|
307
|
-
|
|
308
|
-
if property_layer is None: # There is no any property for that type, let's create one
|
|
309
|
-
property_layer = etree.Element('property')
|
|
310
|
-
property_layer.set('pid','p'+str(num_props+1))
|
|
311
|
-
property_layer.set('lemma',my_type)
|
|
312
|
-
properties_layer.append(property_layer)
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
references = property_layer.find('references')
|
|
316
|
-
if references is None:
|
|
317
|
-
references = etree.Element('references')
|
|
318
|
-
property_layer.append(references)
|
|
319
|
-
## Create the new span
|
|
320
|
-
if comment is not None:
|
|
321
|
-
references.append(etree.Comment(comment))
|
|
322
|
-
span = etree.Element('span')
|
|
323
|
-
references.append(span)
|
|
324
|
-
for my_id in list_ids:
|
|
325
|
-
span.append(etree.Element('target',attrib={'id':my_id}))
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
def getSingleEntities(self):
|
|
331
|
-
for element in self.tree.findall('entities/entity'):
|
|
332
|
-
my_id = element.get('eid')
|
|
333
|
-
my_type = element.get('type')
|
|
334
|
-
my_path_to_span = None
|
|
335
|
-
ref = element.find('references')
|
|
336
|
-
if ref is not None:
|
|
337
|
-
my_path_to_span = 'references/span'
|
|
338
|
-
else:
|
|
339
|
-
my_path_to_span = 'span'
|
|
340
|
-
|
|
341
|
-
for span_element in element.findall(my_path_to_span):
|
|
342
|
-
target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
|
|
343
|
-
my_prop = KafSingleEntity(my_id,my_type,target_ids)
|
|
344
|
-
yield my_prop
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
def getOpinions(self):
|
|
348
|
-
for element in self.tree.findall('opinions/opinion'):
|
|
349
|
-
my_id = element.get('oid')
|
|
350
|
-
|
|
351
|
-
tar_ids_hol = []
|
|
352
|
-
tar_ids_tar = []
|
|
353
|
-
polarity = strenght = ''
|
|
354
|
-
tar_ids_exp = []
|
|
355
|
-
|
|
356
|
-
#Holder
|
|
357
|
-
opi_hol_eles = element.findall('opinion_holder')
|
|
358
|
-
if len(opi_hol_eles)!=0:
|
|
359
|
-
opi_hol_ele = opi_hol_eles[0]
|
|
360
|
-
tar_ids_hol = [t_ele.get('id') for t_ele in opi_hol_ele.findall('span/target')]
|
|
361
|
-
|
|
362
|
-
#Target
|
|
363
|
-
opi_tar_eles = element.findall('opinion_target')
|
|
364
|
-
if len(opi_tar_eles) != 0:
|
|
365
|
-
opi_tar_ele = opi_tar_eles[0]
|
|
366
|
-
tar_ids_tar = [t_ele.get('id') for t_ele in opi_tar_ele.findall('span/target')]
|
|
367
|
-
|
|
368
|
-
## Opinion expression
|
|
369
|
-
opi_exp_eles = element.findall('opinion_expression')
|
|
370
|
-
if len(opi_exp_eles) != 0:
|
|
371
|
-
opi_exp_ele = opi_exp_eles[0]
|
|
372
|
-
polarity = opi_exp_ele.get('polarity','')
|
|
373
|
-
strength = opi_exp_ele.get('strength','')
|
|
374
|
-
tar_ids_exp = [t_ele.get('id') for t_ele in opi_exp_ele.findall('span/target')]
|
|
375
|
-
|
|
376
|
-
yield KafOpinion(my_id,tar_ids_hol, tar_ids_tar, KafOpinionExpression(polarity, strength,tar_ids_exp))
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
def remove_opinion_layer(self):
|
|
381
|
-
opinion_layer = self.tree.find('opinions')
|
|
382
|
-
if opinion_layer is not None:
|
|
383
|
-
self.tree.getroot().remove(opinion_layer)
|
|
384
|
-
|
|
385
|
-
## This function add an opinion to the opinion layer, creating it if does not exist
|
|
386
|
-
## The id is calculated automatically according to the number of elements and ensring there is no repetition
|
|
387
|
-
def add_opinion(self,hol_ids,tar_ids,polarity,strength,exp_ids):
|
|
388
|
-
|
|
389
|
-
#Looking for opinion layer or creating it
|
|
390
|
-
opinion_layer = self.tree.find('opinions')
|
|
391
|
-
if opinion_layer is None:
|
|
392
|
-
opinion_layer = etree.Element('opinions')
|
|
393
|
-
self.tree.getroot().append(opinion_layer)
|
|
394
|
-
|
|
395
|
-
## Generating unique id
|
|
396
|
-
list_of_oids = [opi.get('oid') for opi in opinion_layer]
|
|
397
|
-
|
|
398
|
-
n = 1
|
|
399
|
-
while True:
|
|
400
|
-
my_id = 'o'+str(n)
|
|
401
|
-
if my_id not in list_of_oids:
|
|
402
|
-
break
|
|
403
|
-
n += 1
|
|
404
|
-
#####
|
|
405
|
-
|
|
406
|
-
op_ele = etree.Element('opinion')
|
|
407
|
-
opinion_layer.append(op_ele)
|
|
408
|
-
op_ele.set('oid',my_id)
|
|
409
|
-
|
|
410
|
-
## Holder
|
|
411
|
-
op_hol = etree.Element('opinion_holder')
|
|
412
|
-
op_ele.append(op_hol)
|
|
413
|
-
span_op_hol = etree.Element('span')
|
|
414
|
-
op_hol.append(span_op_hol)
|
|
415
|
-
for my_id in hol_ids:
|
|
416
|
-
span_op_hol.append(etree.Element('target',attrib={'id':my_id}))
|
|
417
|
-
|
|
418
|
-
## TARGET
|
|
419
|
-
op_tar = etree.Element('opinion_target')
|
|
420
|
-
op_ele.append(op_tar)
|
|
421
|
-
span_op_tar = etree.Element('span')
|
|
422
|
-
op_tar.append(span_op_tar)
|
|
423
|
-
for my_id in tar_ids:
|
|
424
|
-
span_op_tar.append(etree.Element('target',attrib={'id':my_id}))
|
|
425
|
-
|
|
426
|
-
## Expression
|
|
427
|
-
|
|
428
|
-
op_exp = etree.Element('opinion_expression',attrib={'polarity':polarity,
|
|
429
|
-
'strength':str(strength)})
|
|
430
|
-
op_ele.append(op_exp)
|
|
431
|
-
span_exp = etree.Element('span')
|
|
432
|
-
op_exp.append(span_exp)
|
|
433
|
-
for my_id in exp_ids:
|
|
434
|
-
span_exp.append(etree.Element('target',attrib={'id':my_id}))
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
## version = 0.2
|
|
2
|
-
## Added timestamp to function addLinguisitcProcessor
|
|
3
|
-
## 24-april-2013 --> getSingleEntieies and getSingleProperties reads both entities/props in format
|
|
4
|
-
## entities -> entity -> span -> target and entities -> entity -> references -> span
|
|
5
|
-
####
|
|
6
|
-
|
|
7
|
-
from KafParserMod import KafParser
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
../VUKafParserPy/__init__.py
|
|
2
|
-
../VUKafParserPy/KafDataObjectsMod.py
|
|
3
|
-
../VUKafParserPy/KafParserMod.py
|
|
4
|
-
../VUKafParserPy/__init__.pyc
|
|
5
|
-
../VUKafParserPy/KafDataObjectsMod.pyc
|
|
6
|
-
../VUKafParserPy/KafParserMod.pyc
|
|
7
|
-
./
|
|
8
|
-
dependency_links.txt
|
|
9
|
-
PKG-INFO
|
|
10
|
-
SOURCES.txt
|
|
11
|
-
top_level.txt
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
VUKafParserPy
|