opener-polarity-tagger 3.0.1 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +1 -0
- data/bin/console +13 -0
- data/core/LexiconMod.py +0 -13
- data/lib/opener/polarity_tagger.rb +12 -91
- data/lib/opener/polarity_tagger/cli.rb +5 -4
- data/lib/opener/polarity_tagger/external.rb +105 -0
- data/lib/opener/polarity_tagger/internal.rb +57 -0
- data/lib/opener/polarity_tagger/kaf/document.rb +53 -0
- data/lib/opener/polarity_tagger/kaf/term.rb +35 -0
- data/lib/opener/polarity_tagger/lexicon_map.rb +82 -0
- data/lib/opener/polarity_tagger/lexicons_cache.rb +67 -0
- data/lib/opener/polarity_tagger/version.rb +5 -3
- data/opener-polarity-tagger.gemspec +2 -0
- metadata +46 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1407a3f9dab798e58f92e033a6255151545d225c03a5e056a2f2dbb6878832ed
|
4
|
+
data.tar.gz: 9c79bb5e40a5882effff686d11f180f7d0ababbd5e2c9d11c0a4bab1a835eb22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53b7b5649087d0ab9728df51ccf67dd7c6ff48eeaa0c6f4c3f74a257f07b610f29dc05b0919b1d2c379becb5b31f36c5609bca49583a3eb1d0b085df256ad9e7
|
7
|
+
data.tar.gz: f510909ce50427bb126773dedb60b21f3998fb80f49b9e602254e3adf1de1a658ce5d5a3b8ba7a452a78d764fa3960e455ce89d3d495c82147440e17780237ea
|
data/README.md
CHANGED
@@ -178,6 +178,7 @@ At least you need the following system setup:
|
|
178
178
|
* lxml installed
|
179
179
|
* libarchive, on Debian/Ubuntu based systems this can be installed using
|
180
180
|
`sudo apt-get install libarchive-dev`
|
181
|
+
* VUKafParserPy, install with sudo pip install 'https://github.com/opener-project/VU-kaf-parser/archive/v1.1.zip#egg=VUKafParserPy'
|
181
182
|
|
182
183
|
Domain Adaption
|
183
184
|
---------------
|
data/bin/console
ADDED
data/core/LexiconMod.py
CHANGED
@@ -61,7 +61,6 @@ def show_lexicons(language, path=None):
|
|
61
61
|
print '#'*30
|
62
62
|
print
|
63
63
|
|
64
|
-
|
65
64
|
class LexiconSent:
|
66
65
|
|
67
66
|
def __init__(self,language='nl',lexicon_id=None, path=None):
|
@@ -79,10 +78,8 @@ class LexiconSent:
|
|
79
78
|
|
80
79
|
self.load_resources(language,lexicon_id, path)
|
81
80
|
|
82
|
-
|
83
81
|
self.__load_lexicon_xml()
|
84
82
|
|
85
|
-
|
86
83
|
def load_resources(self,language,my_id=None, path=None):
|
87
84
|
if path is None:
|
88
85
|
path = os.path.dirname(__file__)
|
@@ -100,14 +97,9 @@ class LexiconSent:
|
|
100
97
|
self.filename = os.path.join(this_folder,folder_per_lang[language],lexicons[id_to_load][0])
|
101
98
|
self.resource = lexicons[id_to_load][1]+" . "+lexicons[id_to_load][2]
|
102
99
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
100
|
def getResource(self):
|
108
101
|
return self.resource
|
109
102
|
|
110
|
-
|
111
103
|
def convert_pos_to_kaf(self,pos):
|
112
104
|
my_map = {}
|
113
105
|
my_map['adj'] = 'G'
|
@@ -119,7 +111,6 @@ class LexiconSent:
|
|
119
111
|
my_map['verb']= 'V'
|
120
112
|
return my_map.get(pos.lower(),'O')
|
121
113
|
|
122
|
-
|
123
114
|
def __load_lexicon_xml(self):
|
124
115
|
logging.debug('Loading lexicon from the file'+self.filename)
|
125
116
|
from collections import defaultdict
|
@@ -160,16 +151,12 @@ class LexiconSent:
|
|
160
151
|
logging.debug('Loaded: '+str(len(self.intensifiers))+' intensifiers')
|
161
152
|
logging.debug('Loaded: '+str(len(self.sentLex))+' elements with polarity')
|
162
153
|
|
163
|
-
|
164
|
-
|
165
154
|
def isIntensifier(self,lemma):
|
166
155
|
return lemma in self.intensifiers
|
167
156
|
|
168
|
-
|
169
157
|
def isNegator(self,lemma):
|
170
158
|
return lemma in self.negators
|
171
159
|
|
172
|
-
|
173
160
|
def getPolarity(self,lemma,pos):
|
174
161
|
if pos:
|
175
162
|
return self.sentLex.get((lemma,pos),'unknown'),pos
|
@@ -1,106 +1,27 @@
|
|
1
1
|
require 'open3'
|
2
2
|
require 'opener/core'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'hashie'
|
3
5
|
|
4
6
|
require_relative 'polarity_tagger/version'
|
5
7
|
require_relative 'polarity_tagger/cli'
|
8
|
+
require_relative 'polarity_tagger/external'
|
9
|
+
|
10
|
+
require_relative 'polarity_tagger/internal'
|
6
11
|
|
7
12
|
module Opener
|
8
|
-
##
|
9
|
-
# Ruby wrapper around the Python based polarity tagger.
|
10
|
-
#
|
11
|
-
# @!attribute [r] options
|
12
|
-
# @return [Hash]
|
13
|
-
#
|
14
|
-
# @!attribute [r] args
|
15
|
-
# @return [Array]
|
16
|
-
#
|
17
13
|
class PolarityTagger
|
18
|
-
attr_reader :options, :args
|
19
14
|
|
20
|
-
|
21
|
-
# @param [Hash] options
|
22
|
-
#
|
23
|
-
# @option options [Array] :args Collection of arbitrary arguments to pass
|
24
|
-
# to the underlying kernel.
|
25
|
-
#
|
26
|
-
# @option options [String] :resource_path Path to the lexicons to use.
|
27
|
-
#
|
28
|
-
def initialize(options = {})
|
15
|
+
def initialize options = {}
|
29
16
|
@args = options.delete(:args) || []
|
30
17
|
@options = options
|
18
|
+
@klass = if ENV['LEGACY'] then External else Internal end
|
19
|
+
@proc = @klass.new args: @args
|
31
20
|
end
|
32
21
|
|
33
|
-
|
34
|
-
|
35
|
-
#
|
36
|
-
# @return [String]
|
37
|
-
#
|
38
|
-
def command
|
39
|
-
return "#{adjust_python_path} python -E #{kernel} #{lexicon_path} #{args.join(" ")}"
|
22
|
+
def run input
|
23
|
+
@proc.run input
|
40
24
|
end
|
41
25
|
|
42
|
-
|
43
|
-
|
44
|
-
#
|
45
|
-
def lexicon_path
|
46
|
-
path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
|
47
|
-
ENV['POLARITY_LEXICON_PATH']
|
48
|
-
|
49
|
-
return path ? "--lexicon-path #{path}" : nil
|
50
|
-
end
|
51
|
-
|
52
|
-
##
|
53
|
-
# Processes the input and returns an Array containing the output of STDOUT,
|
54
|
-
# STDERR and an object containing process information.
|
55
|
-
#
|
56
|
-
# @param [String] input The text of which to detect the language.
|
57
|
-
# @return [Array]
|
58
|
-
#
|
59
|
-
def run(input)
|
60
|
-
stdout, stderr, process = capture(input)
|
61
|
-
|
62
|
-
raise stderr unless process.success?
|
63
|
-
|
64
|
-
return stdout
|
65
|
-
end
|
66
|
-
|
67
|
-
protected
|
68
|
-
|
69
|
-
##
|
70
|
-
# @return [String]
|
71
|
-
#
|
72
|
-
def adjust_python_path
|
73
|
-
site_packages = File.join(core_dir, 'site-packages')
|
74
|
-
|
75
|
-
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
76
|
-
end
|
77
|
-
|
78
|
-
##
|
79
|
-
# capture3 method doesn't work properly with Jruby, so
|
80
|
-
# this is a workaround
|
81
|
-
#
|
82
|
-
def capture(input)
|
83
|
-
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
84
|
-
out_reader = Thread.new { o.read }
|
85
|
-
err_reader = Thread.new { e.read }
|
86
|
-
i.write input
|
87
|
-
i.close
|
88
|
-
[out_reader.value, err_reader.value, t.value]
|
89
|
-
}
|
90
|
-
end
|
91
|
-
|
92
|
-
##
|
93
|
-
# @return [String]
|
94
|
-
#
|
95
|
-
def core_dir
|
96
|
-
return File.expand_path('../../../core', __FILE__)
|
97
|
-
end
|
98
|
-
|
99
|
-
##
|
100
|
-
# @return [String]
|
101
|
-
#
|
102
|
-
def kernel
|
103
|
-
return File.join(core_dir, 'poltagger-basic-multi.py')
|
104
|
-
end
|
105
|
-
end # PolarityTagger
|
106
|
-
end # Opener
|
26
|
+
end
|
27
|
+
end
|
@@ -26,7 +26,7 @@ module Opener
|
|
26
26
|
# @return [Slop]
|
27
27
|
#
|
28
28
|
def configure_slop
|
29
|
-
|
29
|
+
Slop.new strict: false, indent: 2, help: true do
|
30
30
|
banner 'Usage: polarity-tagger [OPTIONS] -- [PYTHON OPTIONS]'
|
31
31
|
|
32
32
|
separator <<-EOF.chomp
|
@@ -62,6 +62,7 @@ Examples:
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
end
|
65
|
-
|
66
|
-
|
67
|
-
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Opener
|
2
|
+
class PolarityTagger
|
3
|
+
##
|
4
|
+
# Ruby wrapper around the Python based polarity tagger.
|
5
|
+
#
|
6
|
+
# @!attribute [r] options
|
7
|
+
# @return [Hash]
|
8
|
+
#
|
9
|
+
# @!attribute [r] args
|
10
|
+
# @return [Array]
|
11
|
+
#
|
12
|
+
class External
|
13
|
+
|
14
|
+
attr_reader :options, :args
|
15
|
+
|
16
|
+
##
|
17
|
+
# @param [Hash] options
|
18
|
+
#
|
19
|
+
# @option options [Array] :args Collection of arbitrary arguments to pass
|
20
|
+
# to the underlying kernel.
|
21
|
+
#
|
22
|
+
# @option options [String] :resource_path Path to the lexicons to use.
|
23
|
+
#
|
24
|
+
def initialize options = {}
|
25
|
+
@args = options.delete(:args) || []
|
26
|
+
@options = options
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Returns a String containing the command to use for executing the kernel.
|
31
|
+
#
|
32
|
+
# @return [String]
|
33
|
+
#
|
34
|
+
def command
|
35
|
+
return "#{adjust_python_path} python -E #{kernel} #{lexicon_path} #{args.join(" ")}"
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# @return [String]
|
40
|
+
#
|
41
|
+
def lexicon_path
|
42
|
+
path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
|
43
|
+
ENV['POLARITY_LEXICON_PATH']
|
44
|
+
|
45
|
+
return path ? "--lexicon-path #{path}" : nil
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Processes the input and returns an Array containing the output of STDOUT,
|
50
|
+
# STDERR and an object containing process information.
|
51
|
+
#
|
52
|
+
# @param [String] input The text of which to detect the language.
|
53
|
+
# @return [Array]
|
54
|
+
#
|
55
|
+
def run(input)
|
56
|
+
stdout, stderr, process = capture(input)
|
57
|
+
|
58
|
+
raise stderr unless process.success?
|
59
|
+
puts stderr if ENV['DEBUG']
|
60
|
+
|
61
|
+
return stdout
|
62
|
+
end
|
63
|
+
|
64
|
+
protected
|
65
|
+
|
66
|
+
##
|
67
|
+
# @return [String]
|
68
|
+
#
|
69
|
+
def adjust_python_path
|
70
|
+
site_packages = File.join(core_dir, 'site-packages')
|
71
|
+
|
72
|
+
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# capture3 method doesn't work properly with Jruby, so
|
77
|
+
# this is a workaround
|
78
|
+
#
|
79
|
+
def capture(input)
|
80
|
+
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
81
|
+
out_reader = Thread.new { o.read }
|
82
|
+
err_reader = Thread.new { e.read }
|
83
|
+
i.write input
|
84
|
+
i.close
|
85
|
+
[out_reader.value, err_reader.value, t.value]
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
##
|
90
|
+
# @return [String]
|
91
|
+
#
|
92
|
+
def core_dir
|
93
|
+
File.expand_path '../../../../core', __FILE__
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# @return [String]
|
98
|
+
#
|
99
|
+
def kernel
|
100
|
+
File.join core_dir, 'poltagger-basic-multi.py'
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require_relative 'lexicons_cache'
|
2
|
+
require_relative 'lexicon_map'
|
3
|
+
require_relative 'kaf/document'
|
4
|
+
|
5
|
+
module Opener
|
6
|
+
class PolarityTagger
|
7
|
+
class Internal
|
8
|
+
|
9
|
+
DESC = 'VUA polarity tagger multilanguage'
|
10
|
+
LAST_EDITED = '21may2014'
|
11
|
+
VERSION = '1.2'
|
12
|
+
|
13
|
+
def initialize ignore_pos: false, **params
|
14
|
+
@cache = LexiconsCache.new
|
15
|
+
|
16
|
+
@ignore_pos = ignore_pos
|
17
|
+
end
|
18
|
+
|
19
|
+
def run input
|
20
|
+
@kaf = KAF::Document.from_xml input
|
21
|
+
@map = @kaf.map = @cache[@kaf.language]
|
22
|
+
|
23
|
+
negators = 0
|
24
|
+
@kaf.terms.each do |t|
|
25
|
+
lemma = t.lemma&.downcase
|
26
|
+
pos = if @ignore_pos then nil else t.pos end
|
27
|
+
attrs = Hashie::Mash.new
|
28
|
+
|
29
|
+
lexicon, polarity_pos = @map.by_polarity lemma, pos
|
30
|
+
|
31
|
+
if lexicon.polarity != 'unknown'
|
32
|
+
attrs.polarity = lexicon.polarity
|
33
|
+
end
|
34
|
+
if l = @map.by_negator(lemma)
|
35
|
+
negators += 1
|
36
|
+
lexicon, polarity = l, nil
|
37
|
+
attrs.sentiment_modifier = 'shifter'
|
38
|
+
end
|
39
|
+
if l = @map.by_intensifier(lemma)
|
40
|
+
lexicon, polarity = l, nil
|
41
|
+
attrs.sentiment_modifier = 'intensifier'
|
42
|
+
end
|
43
|
+
|
44
|
+
if attrs.size > 0
|
45
|
+
attrs.resource = lexicon.resource if lexicon.resource
|
46
|
+
t.setPolarity attrs, polarity_pos
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
@kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
|
51
|
+
|
52
|
+
@kaf.to_xml
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require_relative 'term'
|
2
|
+
|
3
|
+
module Opener
|
4
|
+
module KAF
|
5
|
+
class Document
|
6
|
+
|
7
|
+
attr_reader :document
|
8
|
+
attr_reader :lexicons
|
9
|
+
|
10
|
+
attr_accessor :map
|
11
|
+
|
12
|
+
def initialize xml
|
13
|
+
@document = xml
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.from_xml xml
|
17
|
+
new Nokogiri::XML xml
|
18
|
+
end
|
19
|
+
|
20
|
+
def language
|
21
|
+
@language ||= @document.at_xpath('KAF').attr 'xml:lang'
|
22
|
+
end
|
23
|
+
|
24
|
+
def terms
|
25
|
+
@terms ||= collection 'KAF/terms/term', Term
|
26
|
+
end
|
27
|
+
|
28
|
+
def add_linguistic_processor name, version, layer, timestamp: false
|
29
|
+
header = @document.at('kafHeader') || @document.root.add_child('<kafHeader/>')
|
30
|
+
procs = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer }
|
31
|
+
procs ||= header.add_child("<linguisticProcessors layer='#{layer}'/>")
|
32
|
+
lp = procs.add_child('<lp/>')
|
33
|
+
lp.attr(
|
34
|
+
timestamp: if timestamp then Time.now.iso8601 else '*' end,
|
35
|
+
version: version,
|
36
|
+
name: name,
|
37
|
+
)
|
38
|
+
lp
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_xml
|
42
|
+
@document.to_xml indent: 2
|
43
|
+
end
|
44
|
+
|
45
|
+
protected
|
46
|
+
|
47
|
+
def collection query, wrapper
|
48
|
+
@document.xpath(query).map{ |node| wrapper.new self, node }
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Opener
|
2
|
+
module KAF
|
3
|
+
class Term
|
4
|
+
|
5
|
+
attr_reader :document
|
6
|
+
attr_reader :node
|
7
|
+
|
8
|
+
def initialize document, node
|
9
|
+
@document = document
|
10
|
+
@node = node
|
11
|
+
end
|
12
|
+
|
13
|
+
def id
|
14
|
+
@id ||= @node.attr :tid
|
15
|
+
end
|
16
|
+
|
17
|
+
def lemma
|
18
|
+
@node.attr :lemma
|
19
|
+
end
|
20
|
+
|
21
|
+
def pos
|
22
|
+
@node.attr :pos
|
23
|
+
end
|
24
|
+
|
25
|
+
def setPolarity attrs, polarity_pos
|
26
|
+
#In case there is no pos info, we use the polarityPos
|
27
|
+
@node[:pos] = polarity_pos if !pos and polarity_pos
|
28
|
+
|
29
|
+
sentiment = @node.add_child('<sentiment/>')
|
30
|
+
sentiment.attr attrs
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Opener
|
2
|
+
class PolarityTagger
|
3
|
+
class LexiconMap
|
4
|
+
|
5
|
+
attr_reader :resource
|
6
|
+
attr_reader :negators
|
7
|
+
attr_reader :intensifiers
|
8
|
+
attr_reader :with_polarity
|
9
|
+
|
10
|
+
POS_ORDER = 'NRVGAO'
|
11
|
+
UNKNOWN = Hashie::Mash.new polarity: 'unknown'
|
12
|
+
|
13
|
+
def initialize lang:, lexicons:
|
14
|
+
@lang = lang
|
15
|
+
@lexicons = lexicons
|
16
|
+
|
17
|
+
@negators = {}
|
18
|
+
@intensifiers = {}
|
19
|
+
@with_polarity = {}
|
20
|
+
map lexicons
|
21
|
+
end
|
22
|
+
|
23
|
+
DEFAULT_POS = 'O'
|
24
|
+
|
25
|
+
POS_SHORT_MAP = {
|
26
|
+
adj: 'G',
|
27
|
+
adv: 'A',
|
28
|
+
noun: 'N',
|
29
|
+
propernoun: 'N',
|
30
|
+
other: 'O',
|
31
|
+
prep: 'P',
|
32
|
+
verb: 'V',
|
33
|
+
nil => DEFAULT_POS,
|
34
|
+
multi_word_expression: 'O',
|
35
|
+
}
|
36
|
+
|
37
|
+
def by_negator lemma
|
38
|
+
@negators[lemma]
|
39
|
+
end
|
40
|
+
|
41
|
+
def by_intensifier lemma
|
42
|
+
@intensifiers[lemma]
|
43
|
+
end
|
44
|
+
|
45
|
+
def by_polarity lemma, short_pos
|
46
|
+
return [@with_polarity[lemma+short_pos] || UNKNOWN, short_pos] if short_pos
|
47
|
+
|
48
|
+
POS_ORDER.chars.each do |short_pos|
|
49
|
+
if l = @with_polarity[lemma+short_pos]
|
50
|
+
puts "Found polarify #{l.polarity} for #{lemma} with PoS #{short_pos}"
|
51
|
+
return [l, short_pos]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
[UNKNOWN, 'unknown']
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
60
|
+
def map lexicons
|
61
|
+
lexicons.each do |l|
|
62
|
+
next if l.lemma.nil?
|
63
|
+
|
64
|
+
case l.type
|
65
|
+
when 'polarityShifter' then @negators[l.lemma] = l
|
66
|
+
when 'intensifier' then @intensifiers[l.lemma] = l
|
67
|
+
else
|
68
|
+
if l.polarity
|
69
|
+
short_pos = POS_SHORT_MAP[l.pos&.to_sym] || DEFAULT_POS
|
70
|
+
@with_polarity[l.lemma+short_pos] = l
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
puts "#{@lang}: loaded #{@negators.size} negators"
|
76
|
+
puts "#{@lang}: loaded #{@intensifiers.size} intensifiers"
|
77
|
+
puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Opener
|
2
|
+
class PolarityTagger
|
3
|
+
class LexiconsCache
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
extend MonitorMixin
|
7
|
+
|
8
|
+
@url = ENV['POLARITY_LEXICON_URL']
|
9
|
+
@path = ENV['POLARITY_LEXICON_PATH']
|
10
|
+
@cache = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def [] lang
|
14
|
+
synchronize do
|
15
|
+
@cache[lang] ||= load_lexicons lang
|
16
|
+
end
|
17
|
+
end
|
18
|
+
alias_method :get, :[]
|
19
|
+
|
20
|
+
def load_lexicons lang
|
21
|
+
lexicons = if @url then load_from_url lang else load_from_path lang end
|
22
|
+
|
23
|
+
LexiconMap.new lang: lang, lexicons: lexicons
|
24
|
+
end
|
25
|
+
|
26
|
+
def load_from_url lang
|
27
|
+
url = "#{@url}&language_code=#{lang}"
|
28
|
+
puts "#{lang}: loading lexicons from url #{url}"
|
29
|
+
lexicons = JSON.parse HTTPClient.new.get(url).body
|
30
|
+
lexicons = lexicons['data'].map{ |l| Hashie::Mash.new l }
|
31
|
+
lexicons
|
32
|
+
end
|
33
|
+
|
34
|
+
def load_from_path lang
|
35
|
+
@path ||= 'core/general-lexicons'
|
36
|
+
dir = "#{@path}/#{lang.upcase}-lexicon"
|
37
|
+
config = Nokogiri::XML File.read "#{dir}/config.xml"
|
38
|
+
lexicons = []
|
39
|
+
|
40
|
+
config.css(:lexicon).each do |cl|
|
41
|
+
filename = cl.at(:filename).text
|
42
|
+
resource = cl.at(:resource).text
|
43
|
+
xml = Nokogiri::XML File.read "#{dir}/#{filename}"
|
44
|
+
puts "#{lang}: loading lexicons from the file #{filename}"
|
45
|
+
|
46
|
+
lexicons.concat(xml.css(:LexicalEntry).map do |le|
|
47
|
+
Hashie::Mash.new(
|
48
|
+
resource: resource,
|
49
|
+
identifier: le.attr(:id),
|
50
|
+
type: le.attr(:type),
|
51
|
+
lemma: le.at(:Lemma).attr(:writtenForm).downcase,
|
52
|
+
pos: le.attr(:partOfSpeech)&.downcase,
|
53
|
+
aspect: le.at(:Domain)&.attr(:aspect)&.downcase,
|
54
|
+
polarity: le.at(:Sentiment).attr(:polarity),
|
55
|
+
strength: le.at(:Sentiment).attr(:strength),
|
56
|
+
confidence_level: le.at(:Confidence)&.attr(:level),
|
57
|
+
domain_conditional: le.at(:Domain)&.attr(:conditional) == 'yes',
|
58
|
+
)
|
59
|
+
end)
|
60
|
+
end
|
61
|
+
|
62
|
+
lexicons
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -33,11 +33,13 @@ Gem::Specification.new do |gem|
|
|
33
33
|
gem.add_dependency 'opener-webservice', '~> 2.1'
|
34
34
|
gem.add_dependency 'opener-core', '~> 2.2'
|
35
35
|
|
36
|
+
gem.add_dependency 'hashie'
|
36
37
|
gem.add_dependency 'rake'
|
37
38
|
gem.add_dependency 'nokogiri'
|
38
39
|
gem.add_dependency 'cliver'
|
39
40
|
gem.add_dependency 'slop', '~> 3.5'
|
40
41
|
|
42
|
+
gem.add_development_dependency 'pry'
|
41
43
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
42
44
|
gem.add_development_dependency 'cucumber'
|
43
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-polarity-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.2'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: hashie
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: rake
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +122,20 @@ dependencies:
|
|
108
122
|
- - "~>"
|
109
123
|
- !ruby/object:Gem::Version
|
110
124
|
version: '3.5'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: pry
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
111
139
|
- !ruby/object:Gem::Dependency
|
112
140
|
name: rspec
|
113
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -137,17 +165,19 @@ dependencies:
|
|
137
165
|
- !ruby/object:Gem::Version
|
138
166
|
version: '0'
|
139
167
|
description: Polarity tagger for various languages.
|
140
|
-
email:
|
168
|
+
email:
|
141
169
|
executables:
|
142
|
-
- polarity-tagger-server
|
143
|
-
- polarity-tagger-daemon
|
144
170
|
- polarity-tagger
|
171
|
+
- polarity-tagger-daemon
|
172
|
+
- polarity-tagger-server
|
173
|
+
- console
|
145
174
|
extensions:
|
146
175
|
- ext/hack/Rakefile
|
147
176
|
extra_rdoc_files: []
|
148
177
|
files:
|
149
178
|
- LICENSE.txt
|
150
179
|
- README.md
|
180
|
+
- bin/console
|
151
181
|
- bin/polarity-tagger
|
152
182
|
- bin/polarity-tagger-daemon
|
153
183
|
- bin/polarity-tagger-server
|
@@ -170,6 +200,12 @@ files:
|
|
170
200
|
- ext/hack/Rakefile
|
171
201
|
- lib/opener/polarity_tagger.rb
|
172
202
|
- lib/opener/polarity_tagger/cli.rb
|
203
|
+
- lib/opener/polarity_tagger/external.rb
|
204
|
+
- lib/opener/polarity_tagger/internal.rb
|
205
|
+
- lib/opener/polarity_tagger/kaf/document.rb
|
206
|
+
- lib/opener/polarity_tagger/kaf/term.rb
|
207
|
+
- lib/opener/polarity_tagger/lexicon_map.rb
|
208
|
+
- lib/opener/polarity_tagger/lexicons_cache.rb
|
173
209
|
- lib/opener/polarity_tagger/public/markdown.css
|
174
210
|
- lib/opener/polarity_tagger/server.rb
|
175
211
|
- lib/opener/polarity_tagger/version.rb
|
@@ -185,7 +221,7 @@ homepage: http://opener-project.github.com/
|
|
185
221
|
licenses:
|
186
222
|
- Apache 2.0
|
187
223
|
metadata: {}
|
188
|
-
post_install_message:
|
224
|
+
post_install_message:
|
189
225
|
rdoc_options: []
|
190
226
|
require_paths:
|
191
227
|
- lib
|
@@ -200,10 +236,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
200
236
|
- !ruby/object:Gem::Version
|
201
237
|
version: '0'
|
202
238
|
requirements: []
|
203
|
-
rubyforge_project:
|
204
|
-
rubygems_version: 2.
|
205
|
-
signing_key:
|
239
|
+
rubyforge_project:
|
240
|
+
rubygems_version: 2.7.8
|
241
|
+
signing_key:
|
206
242
|
specification_version: 4
|
207
243
|
summary: Polarity tagger for various languages.
|
208
244
|
test_files: []
|
209
|
-
has_rdoc:
|