opener-polarity-tagger 2.4.1 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/LICENSE.txt +13 -0
- data/README.md +115 -93
- data/bin/console +13 -0
- data/bin/polarity-tagger +8 -2
- data/bin/polarity-tagger-daemon +9 -5
- data/bin/polarity-tagger-server +8 -48
- data/core/LexiconMod.py +0 -13
- data/exec/polarity-tagger.rb +1 -11
- data/lib/opener/polarity_tagger.rb +13 -86
- data/lib/opener/polarity_tagger/cli.rb +43 -52
- data/lib/opener/polarity_tagger/external.rb +105 -0
- data/lib/opener/polarity_tagger/internal.rb +57 -0
- data/lib/opener/polarity_tagger/kaf/document.rb +53 -0
- data/lib/opener/polarity_tagger/kaf/term.rb +35 -0
- data/lib/opener/polarity_tagger/lexicon_map.rb +82 -0
- data/lib/opener/polarity_tagger/lexicons_cache.rb +67 -0
- data/lib/opener/polarity_tagger/server.rb +4 -5
- data/lib/opener/polarity_tagger/version.rb +5 -3
- data/opener-polarity-tagger.gemspec +10 -6
- data/task/requirements.rake +1 -1
- metadata +85 -75
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1407a3f9dab798e58f92e033a6255151545d225c03a5e056a2f2dbb6878832ed
|
4
|
+
data.tar.gz: 9c79bb5e40a5882effff686d11f180f7d0ababbd5e2c9d11c0a4bab1a835eb22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53b7b5649087d0ab9728df51ccf67dd7c6ff48eeaa0c6f4c3f74a257f07b610f29dc05b0919b1d2c379becb5b31f36c5609bca49583a3eb1d0b085df256ad9e7
|
7
|
+
data.tar.gz: f510909ce50427bb126773dedb60b21f3998fb80f49b9e602254e3adf1de1a658ce5d5a3b8ba7a452a78d764fa3960e455ce89d3d495c82147440e17780237ea
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright 2014 OpeNER Project Consortium
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
-
Introduction
|
2
|
-
------------
|
1
|
+
# Introduction
|
3
2
|
|
4
|
-
This repository contains the code for the OpeNER polarity tagger. This tool tags
|
3
|
+
This repository contains the code for the OpeNER polarity tagger. This tool tags
|
4
|
+
words in a KAF file with polarity information, which basically is:
|
5
5
|
|
6
|
-
* Polarity information, which represents positive or negative facts in a certain
|
7
|
-
|
6
|
+
* Polarity information, which represents positive or negative facts in a certain
|
7
|
+
domain. Good, cheap and clean can be positive words in a hotel domain, while
|
8
|
+
bad, expensive and dirty could be negative ones.
|
9
|
+
* Sentiment modifiers, which modify the polarity of a surrounding polarity word.
|
10
|
+
For instance very or no are sentiment modifiers
|
8
11
|
|
9
12
|
The polarity tagger supports the following languages:
|
10
13
|
|
@@ -15,72 +18,82 @@ The polarity tagger supports the following languages:
|
|
15
18
|
* Italian
|
16
19
|
* Spanish
|
17
20
|
|
18
|
-
##How-
|
19
|
-
|
20
|
-
The main script of this tool is a python file, which accepts a set of parameters
|
21
|
-
or options we want to use. The language is read from
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
21
|
+
## How-To
|
22
|
+
|
23
|
+
The main script of this tool is a python file, which accepts a set of parameters
|
24
|
+
to determine which features or options we want to use. The language is read from
|
25
|
+
the KAF file, so it doesn't need to be specified as a parameter The program
|
26
|
+
reads a KAF file from the standard input and writes the resulting KAf in the
|
27
|
+
standard output. To see the options you can call to the main script with the -h
|
28
|
+
or `--help` option.
|
29
|
+
|
30
|
+
$ python core/poltagger-basic-multi.py -h
|
31
|
+
usage: poltagger-basic-multi.py [-h] [--no-time] [--ignore-pos]
|
32
|
+
[--show-lexicons {nl,en,de,es,it,fr}]
|
33
|
+
[--lexicon LEXICON] [--silent] [--version]
|
34
|
+
|
35
|
+
Tags a text with polarities at lemma level
|
36
|
+
|
37
|
+
optional arguments:
|
38
|
+
-h, --help show this help message and exit
|
39
|
+
--no-time For not including timestamp in header
|
40
|
+
--ignore-pos Ignore the pos labels
|
41
|
+
--show-lexicons {nl,en,de,es,it,fr}
|
42
|
+
Show lexicons for the given language and exit
|
43
|
+
--lexicon LEXICON Lexicon identifier, check with --show-lexicons LANG
|
44
|
+
for options
|
45
|
+
--lexicon-path LEXICON The path of the lexicons
|
46
|
+
--silent Turn off debug info
|
47
|
+
--version show program's version number and exit
|
48
|
+
|
49
|
+
The `--ignore-pos` parameter must be used when want to ignore the part-of-speech
|
50
|
+
information assigned to the lemmas, and we want to assign polarities just to the
|
51
|
+
lemmas, not considering the POS tag. This could be useful when the information
|
52
|
+
provided by the pos-tagger is not accurate or the pos-tagging has not been
|
53
|
+
processed.
|
54
|
+
|
55
|
+
The main options are those concerning with the usage of different lexicons. The
|
56
|
+
lexicons are provided by the VU-sentiment-lexicon library
|
57
|
+
(https://github.com/opener-project/VU-sentiment-lexicon), which needs to be
|
58
|
+
installed. You can see what the lexicons available for a given language are by
|
59
|
+
calling to the program with the option --show-lexicons LANG, for instance:
|
60
|
+
|
61
|
+
core/poltagger-basic-multi.py --show-lexicons nl
|
62
|
+
|
63
|
+
##############################
|
64
|
+
Available lexicons for nl
|
65
|
+
Identifier: "hotel" (Default)
|
66
|
+
Desc: Hotel domain lexicon for Dutch
|
67
|
+
Res: VUA_olery_lexicon_nl_lmf
|
68
|
+
File: /Users/ruben/python_envs/python2.7/lib/python2.7/VUSentimentLexicon/NL-lexicon/Sentiment-Dutch-HotelDomain.xml
|
69
|
+
|
70
|
+
Identifier:"general"
|
71
|
+
Desc: General lexicon for Dutch
|
72
|
+
Res: VUA_olery_lexicon_nl_lmf
|
73
|
+
File: /Users/ruben/python_envs/python2.7/lib/python2.7/VUSentimentLexicon/NL-lexicon/Sentiment-Dutch-general.xml
|
74
|
+
|
75
|
+
##############################
|
76
|
+
|
77
|
+
Then you can use the lexicon identifiers to select the proper lexicon, with the
|
78
|
+
option --lexicon
|
79
|
+
|
80
|
+
cat my_input.nl.kaf | core/poltagger-basic-multi.py --lexicon general
|
81
|
+
|
82
|
+
This command will call to the polarity tagger using the general lexicon for
|
83
|
+
Dutch. The lexicon identifiers are unique only per language. If the lexicon id
|
84
|
+
is not specified(you skip the --lexicon option), or you provide a wrong
|
85
|
+
identifier, the default lexicon will be loaded. If there is no lexicon marked
|
86
|
+
as default in the --show-lexicon options, the first one in the list will be
|
87
|
+
used. Check the VU-sentiment-lexicon for further information about how to manage
|
88
|
+
lexicons and add new ones.
|
80
89
|
|
81
90
|
### Confused by some terminology?
|
82
91
|
|
83
|
-
This software is part of a larger collection of natural language processing
|
92
|
+
This software is part of a larger collection of natural language processing
|
93
|
+
tools known as "the OpeNER project". You can find more information about the
|
94
|
+
project at [the OpeNER portal](http://opener-project.github.io). There you can
|
95
|
+
also find references to terms like KAF (an XML standard to represent linguistic
|
96
|
+
annotations in texts), component, cores, scenario's and pipelines.
|
84
97
|
|
85
98
|
Quick Use Example
|
86
99
|
-----------------
|
@@ -89,32 +102,33 @@ Installing the polarity-tagger can be done by executing:
|
|
89
102
|
|
90
103
|
gem install opener-polarity-tagger
|
91
104
|
|
92
|
-
The polarity tagger uses python. So it is advised to run a virtualenv before
|
105
|
+
The polarity tagger uses python. So it is advised to run a virtualenv before
|
106
|
+
installing the gem.
|
93
107
|
|
94
|
-
Please bare in mind that all components in OpeNER take KAF as an input and
|
108
|
+
Please bare in mind that all components in OpeNER take KAF as an input and
|
109
|
+
output KAF by default.
|
95
110
|
|
96
111
|
### Command line interface
|
97
112
|
|
98
|
-
You should now be able to call the polarity tagger as a regular shell command:
|
113
|
+
You should now be able to call the polarity tagger as a regular shell command:
|
114
|
+
by its name. Once installed the gem normally sits in your path so you can call
|
115
|
+
it directly from anywhere.
|
99
116
|
|
100
117
|
This aplication reads a text from standard input in order process it.
|
101
118
|
|
102
119
|
cat some_kind_of_kaf_file.kaf | polarity-tagger
|
103
120
|
|
104
|
-
|
105
121
|
This will output:
|
106
122
|
|
107
|
-
|
108
|
-
<
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
</term>
|
115
|
-
```
|
123
|
+
<term lemma="donner" morphofeat="VP3s" pos="V" tid="t119" type="open">
|
124
|
+
<span>
|
125
|
+
<!--donne-->
|
126
|
+
<target id="w119"/>
|
127
|
+
</span>
|
128
|
+
<sentiment polarity="neutral" resource="General domain lexicon for French . Vicomtech_general_lexicon_french"/>
|
129
|
+
</term>
|
116
130
|
|
117
|
-
##Requirements
|
131
|
+
## Requirements
|
118
132
|
|
119
133
|
* Python 2.7.0 or newer
|
120
134
|
* Ruby 1.9.2 or newer
|
@@ -127,7 +141,8 @@ You can launch a webservice by executing:
|
|
127
141
|
|
128
142
|
polarity-tagger-server
|
129
143
|
|
130
|
-
This will launch a mini webserver with the webservice. It defaults to port 9292,
|
144
|
+
This will launch a mini webserver with the webservice. It defaults to port 9292,
|
145
|
+
so you can access it at <http://localhost:9292>.
|
131
146
|
|
132
147
|
To launch it on a different port provide the `-p [port-number]` option like this:
|
133
148
|
|
@@ -135,20 +150,24 @@ To launch it on a different port provide the `-p [port-number]` option like this
|
|
135
150
|
|
136
151
|
It then launches at <http://localhost:1234>
|
137
152
|
|
138
|
-
Documentation on the Webservice is provided by surfing to the urls provided
|
139
|
-
|
153
|
+
Documentation on the Webservice is provided by surfing to the urls provided
|
154
|
+
above. For more information on how to launch a webservice run the command with
|
155
|
+
the `--help` option.
|
140
156
|
|
141
157
|
### Daemon
|
142
158
|
|
143
|
-
Last but not least the polarity tagger comes shipped with a daemon that can read
|
159
|
+
Last but not least the polarity tagger comes shipped with a daemon that can read
|
160
|
+
jobs (and write) jobs to and from Amazon SQS queues. For more information type:
|
144
161
|
|
145
162
|
polarity-tagger-daemon -h
|
146
163
|
|
147
|
-
|
148
164
|
Description of dependencies
|
149
165
|
---------------------------
|
150
166
|
|
151
|
-
This component runs best if you run it in an environment suited for OpeNER
|
167
|
+
This component runs best if you run it in an environment suited for OpeNER
|
168
|
+
components. You can find an installation guide and helper tools in the
|
169
|
+
[OpeNER installer](https://github.com/opener-project/opener-installer) and an
|
170
|
+
[installation guide on the Opener Website](http://opener-project.github.io/getting-started/how-to/local-installation.html)
|
152
171
|
|
153
172
|
At least you need the following system setup:
|
154
173
|
|
@@ -156,7 +175,10 @@ At least you need the following system setup:
|
|
156
175
|
|
157
176
|
* Ruby 1.9.3 or newer
|
158
177
|
* Python 2.6 or newer
|
159
|
-
*
|
178
|
+
* lxml installed
|
179
|
+
* libarchive, on Debian/Ubuntu based systems this can be installed using
|
180
|
+
`sudo apt-get install libarchive-dev`
|
181
|
+
* VUKafParserPy, install with sudo pip install 'https://github.com/opener-project/VU-kaf-parser/archive/v1.1.zip#egg=VUKafParserPy'
|
160
182
|
|
161
183
|
Domain Adaption
|
162
184
|
---------------
|
@@ -171,7 +193,8 @@ Language Extension
|
|
171
193
|
The Core
|
172
194
|
--------
|
173
195
|
|
174
|
-
The component is a
|
196
|
+
The component is a wrapper around the actual language technology core. You
|
197
|
+
can find the core technolies in the `core/` folder.
|
175
198
|
|
176
199
|
Where to go from here
|
177
200
|
---------------------
|
@@ -182,10 +205,10 @@ Where to go from here
|
|
182
205
|
Report problem/Get help
|
183
206
|
-----------------------
|
184
207
|
|
185
|
-
If you encounter problems, please email <support@opener-project.eu> or leave an
|
208
|
+
If you encounter problems, please email <support@opener-project.eu> or leave an
|
209
|
+
issue in the
|
186
210
|
[issue tracker](https://github.com/opener-project/polarity-tagger/issues).
|
187
211
|
|
188
|
-
|
189
212
|
Contributing
|
190
213
|
------------
|
191
214
|
|
@@ -194,4 +217,3 @@ Contributing
|
|
194
217
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
195
218
|
4. Push to the branch (`git push origin my-new-feature`)
|
196
219
|
5. Create new Pull Request
|
197
|
-
|
data/bin/console
ADDED
data/bin/polarity-tagger
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'opener/core'
|
4
|
+
|
3
5
|
require_relative '../lib/opener/polarity_tagger'
|
4
6
|
|
5
|
-
|
6
|
-
cli
|
7
|
+
switcher = Opener::Core::ResourceSwitcher.new
|
8
|
+
cli = Opener::PolarityTagger::CLI.new
|
9
|
+
|
10
|
+
switcher.bind(cli.parser)
|
11
|
+
|
12
|
+
cli.run
|
data/bin/polarity-tagger-daemon
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'opener/daemons'
|
4
|
+
require 'opener/core'
|
4
5
|
|
5
|
-
|
6
|
+
switcher = Opener::Core::ResourceSwitcher.new
|
7
|
+
controller = Opener::Daemons::Controller.new(
|
8
|
+
:name => 'opener-polarity-tagger',
|
9
|
+
:exec_path => File.expand_path('../../exec/polarity-tagger.rb', __FILE__)
|
10
|
+
)
|
6
11
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
)
|
12
|
+
switcher.bind(controller.parser.parser)
|
13
|
+
|
14
|
+
controller.run
|
data/bin/polarity-tagger-server
CHANGED
@@ -1,54 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require '
|
4
|
-
require 'optparse'
|
3
|
+
require 'opener/webservice'
|
5
4
|
require 'opener/core'
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
switcher = Opener::Core::ResourceSwitcher.new
|
7
|
+
parser = Opener::Webservice::OptionParser.new(
|
8
|
+
'opener-polarity-tagger',
|
9
|
+
File.expand_path('../../config.ru', __FILE__)
|
10
|
+
)
|
11
11
|
|
12
|
-
|
13
|
-
opts.banner = "Usage: #{File.basename($0)} [OPTIONS]"
|
12
|
+
switcher.bind(parser.parser)
|
14
13
|
|
15
|
-
|
16
|
-
|
17
|
-
opts.on('-h', '--help', 'Shows this help message') do
|
18
|
-
abort option_parser.to_s
|
19
|
-
end
|
20
|
-
|
21
|
-
opts.on('-S', '--state PATH', 'Where to store the state details') do |val|
|
22
|
-
puma_args += ['--state', val]
|
23
|
-
end
|
24
|
-
|
25
|
-
opts.on('-b', '--bind URI', 'URI to bind to (tcp://, unix://, ssl://)') do |val|
|
26
|
-
puma_args += ['--bind', val]
|
27
|
-
end
|
28
|
-
|
29
|
-
opts.on('--pidfile PATH', 'Use PATH as a pidfile') do |val|
|
30
|
-
puma_args += ['--pidfile', val]
|
31
|
-
end
|
32
|
-
|
33
|
-
opts.on('--daemon', 'Daemonize the server into the background') do |val|
|
34
|
-
puma_args << '--daemon'
|
35
|
-
end
|
36
|
-
|
37
|
-
opts.on('-e', '--environment ENVIRONMENT', 'The environment to use') do |val|
|
38
|
-
puma_args += ['--environment', val]
|
39
|
-
end
|
40
|
-
|
41
|
-
opts.separator "\nResource Options:\n\n"
|
42
|
-
|
43
|
-
switcher.bind(opts, switcher_opts)
|
44
|
-
end
|
45
|
-
|
46
|
-
option_parser.parse!(ARGV)
|
47
|
-
|
48
|
-
switcher.install(switcher_opts)
|
49
|
-
|
50
|
-
# Puma on JRuby does some weird stuff with forking/exec. As a result of this we
|
51
|
-
# *have to* update ARGV as otherwise running Puma as a daemon does not work.
|
52
|
-
ARGV.replace(puma_args)
|
53
|
-
|
54
|
-
Puma::CLI.new(ARGV).run
|
14
|
+
parser.run
|
data/core/LexiconMod.py
CHANGED
@@ -61,7 +61,6 @@ def show_lexicons(language, path=None):
|
|
61
61
|
print '#'*30
|
62
62
|
print
|
63
63
|
|
64
|
-
|
65
64
|
class LexiconSent:
|
66
65
|
|
67
66
|
def __init__(self,language='nl',lexicon_id=None, path=None):
|
@@ -79,10 +78,8 @@ class LexiconSent:
|
|
79
78
|
|
80
79
|
self.load_resources(language,lexicon_id, path)
|
81
80
|
|
82
|
-
|
83
81
|
self.__load_lexicon_xml()
|
84
82
|
|
85
|
-
|
86
83
|
def load_resources(self,language,my_id=None, path=None):
|
87
84
|
if path is None:
|
88
85
|
path = os.path.dirname(__file__)
|
@@ -100,14 +97,9 @@ class LexiconSent:
|
|
100
97
|
self.filename = os.path.join(this_folder,folder_per_lang[language],lexicons[id_to_load][0])
|
101
98
|
self.resource = lexicons[id_to_load][1]+" . "+lexicons[id_to_load][2]
|
102
99
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
100
|
def getResource(self):
|
108
101
|
return self.resource
|
109
102
|
|
110
|
-
|
111
103
|
def convert_pos_to_kaf(self,pos):
|
112
104
|
my_map = {}
|
113
105
|
my_map['adj'] = 'G'
|
@@ -119,7 +111,6 @@ class LexiconSent:
|
|
119
111
|
my_map['verb']= 'V'
|
120
112
|
return my_map.get(pos.lower(),'O')
|
121
113
|
|
122
|
-
|
123
114
|
def __load_lexicon_xml(self):
|
124
115
|
logging.debug('Loading lexicon from the file'+self.filename)
|
125
116
|
from collections import defaultdict
|
@@ -160,16 +151,12 @@ class LexiconSent:
|
|
160
151
|
logging.debug('Loaded: '+str(len(self.intensifiers))+' intensifiers')
|
161
152
|
logging.debug('Loaded: '+str(len(self.sentLex))+' elements with polarity')
|
162
153
|
|
163
|
-
|
164
|
-
|
165
154
|
def isIntensifier(self,lemma):
|
166
155
|
return lemma in self.intensifiers
|
167
156
|
|
168
|
-
|
169
157
|
def isNegator(self,lemma):
|
170
158
|
return lemma in self.negators
|
171
159
|
|
172
|
-
|
173
160
|
def getPolarity(self,lemma,pos):
|
174
161
|
if pos:
|
175
162
|
return self.sentLex.get((lemma,pos),'unknown'),pos
|