opener-polarity-tagger 3.0.1 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +1 -0
- data/bin/console +13 -0
- data/core/LexiconMod.py +0 -13
- data/lib/opener/polarity_tagger.rb +12 -91
- data/lib/opener/polarity_tagger/cli.rb +5 -4
- data/lib/opener/polarity_tagger/external.rb +105 -0
- data/lib/opener/polarity_tagger/internal.rb +57 -0
- data/lib/opener/polarity_tagger/kaf/document.rb +53 -0
- data/lib/opener/polarity_tagger/kaf/term.rb +35 -0
- data/lib/opener/polarity_tagger/lexicon_map.rb +82 -0
- data/lib/opener/polarity_tagger/lexicons_cache.rb +67 -0
- data/lib/opener/polarity_tagger/version.rb +5 -3
- data/opener-polarity-tagger.gemspec +2 -0
- metadata +46 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1407a3f9dab798e58f92e033a6255151545d225c03a5e056a2f2dbb6878832ed
|
4
|
+
data.tar.gz: 9c79bb5e40a5882effff686d11f180f7d0ababbd5e2c9d11c0a4bab1a835eb22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53b7b5649087d0ab9728df51ccf67dd7c6ff48eeaa0c6f4c3f74a257f07b610f29dc05b0919b1d2c379becb5b31f36c5609bca49583a3eb1d0b085df256ad9e7
|
7
|
+
data.tar.gz: f510909ce50427bb126773dedb60b21f3998fb80f49b9e602254e3adf1de1a658ce5d5a3b8ba7a452a78d764fa3960e455ce89d3d495c82147440e17780237ea
|
data/README.md
CHANGED
@@ -178,6 +178,7 @@ At least you need the following system setup:
|
|
178
178
|
* lxml installed
|
179
179
|
* libarchive, on Debian/Ubuntu based systems this can be installed using
|
180
180
|
`sudo apt-get install libarchive-dev`
|
181
|
+
* VUKafParserPy, install with sudo pip install 'https://github.com/opener-project/VU-kaf-parser/archive/v1.1.zip#egg=VUKafParserPy'
|
181
182
|
|
182
183
|
Domain Adaption
|
183
184
|
---------------
|
data/bin/console
ADDED
data/core/LexiconMod.py
CHANGED
@@ -61,7 +61,6 @@ def show_lexicons(language, path=None):
|
|
61
61
|
print '#'*30
|
62
62
|
print
|
63
63
|
|
64
|
-
|
65
64
|
class LexiconSent:
|
66
65
|
|
67
66
|
def __init__(self,language='nl',lexicon_id=None, path=None):
|
@@ -79,10 +78,8 @@ class LexiconSent:
|
|
79
78
|
|
80
79
|
self.load_resources(language,lexicon_id, path)
|
81
80
|
|
82
|
-
|
83
81
|
self.__load_lexicon_xml()
|
84
82
|
|
85
|
-
|
86
83
|
def load_resources(self,language,my_id=None, path=None):
|
87
84
|
if path is None:
|
88
85
|
path = os.path.dirname(__file__)
|
@@ -100,14 +97,9 @@ class LexiconSent:
|
|
100
97
|
self.filename = os.path.join(this_folder,folder_per_lang[language],lexicons[id_to_load][0])
|
101
98
|
self.resource = lexicons[id_to_load][1]+" . "+lexicons[id_to_load][2]
|
102
99
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
100
|
def getResource(self):
|
108
101
|
return self.resource
|
109
102
|
|
110
|
-
|
111
103
|
def convert_pos_to_kaf(self,pos):
|
112
104
|
my_map = {}
|
113
105
|
my_map['adj'] = 'G'
|
@@ -119,7 +111,6 @@ class LexiconSent:
|
|
119
111
|
my_map['verb']= 'V'
|
120
112
|
return my_map.get(pos.lower(),'O')
|
121
113
|
|
122
|
-
|
123
114
|
def __load_lexicon_xml(self):
|
124
115
|
logging.debug('Loading lexicon from the file'+self.filename)
|
125
116
|
from collections import defaultdict
|
@@ -160,16 +151,12 @@ class LexiconSent:
|
|
160
151
|
logging.debug('Loaded: '+str(len(self.intensifiers))+' intensifiers')
|
161
152
|
logging.debug('Loaded: '+str(len(self.sentLex))+' elements with polarity')
|
162
153
|
|
163
|
-
|
164
|
-
|
165
154
|
def isIntensifier(self,lemma):
|
166
155
|
return lemma in self.intensifiers
|
167
156
|
|
168
|
-
|
169
157
|
def isNegator(self,lemma):
|
170
158
|
return lemma in self.negators
|
171
159
|
|
172
|
-
|
173
160
|
def getPolarity(self,lemma,pos):
|
174
161
|
if pos:
|
175
162
|
return self.sentLex.get((lemma,pos),'unknown'),pos
|
@@ -1,106 +1,27 @@
|
|
1
1
|
require 'open3'
|
2
2
|
require 'opener/core'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'hashie'
|
3
5
|
|
4
6
|
require_relative 'polarity_tagger/version'
|
5
7
|
require_relative 'polarity_tagger/cli'
|
8
|
+
require_relative 'polarity_tagger/external'
|
9
|
+
|
10
|
+
require_relative 'polarity_tagger/internal'
|
6
11
|
|
7
12
|
module Opener
|
8
|
-
##
|
9
|
-
# Ruby wrapper around the Python based polarity tagger.
|
10
|
-
#
|
11
|
-
# @!attribute [r] options
|
12
|
-
# @return [Hash]
|
13
|
-
#
|
14
|
-
# @!attribute [r] args
|
15
|
-
# @return [Array]
|
16
|
-
#
|
17
13
|
class PolarityTagger
|
18
|
-
attr_reader :options, :args
|
19
14
|
|
20
|
-
|
21
|
-
# @param [Hash] options
|
22
|
-
#
|
23
|
-
# @option options [Array] :args Collection of arbitrary arguments to pass
|
24
|
-
# to the underlying kernel.
|
25
|
-
#
|
26
|
-
# @option options [String] :resource_path Path to the lexicons to use.
|
27
|
-
#
|
28
|
-
def initialize(options = {})
|
15
|
+
def initialize options = {}
|
29
16
|
@args = options.delete(:args) || []
|
30
17
|
@options = options
|
18
|
+
@klass = if ENV['LEGACY'] then External else Internal end
|
19
|
+
@proc = @klass.new args: @args
|
31
20
|
end
|
32
21
|
|
33
|
-
|
34
|
-
|
35
|
-
#
|
36
|
-
# @return [String]
|
37
|
-
#
|
38
|
-
def command
|
39
|
-
return "#{adjust_python_path} python -E #{kernel} #{lexicon_path} #{args.join(" ")}"
|
22
|
+
def run input
|
23
|
+
@proc.run input
|
40
24
|
end
|
41
25
|
|
42
|
-
|
43
|
-
|
44
|
-
#
|
45
|
-
def lexicon_path
|
46
|
-
path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
|
47
|
-
ENV['POLARITY_LEXICON_PATH']
|
48
|
-
|
49
|
-
return path ? "--lexicon-path #{path}" : nil
|
50
|
-
end
|
51
|
-
|
52
|
-
##
|
53
|
-
# Processes the input and returns an Array containing the output of STDOUT,
|
54
|
-
# STDERR and an object containing process information.
|
55
|
-
#
|
56
|
-
# @param [String] input The text of which to detect the language.
|
57
|
-
# @return [Array]
|
58
|
-
#
|
59
|
-
def run(input)
|
60
|
-
stdout, stderr, process = capture(input)
|
61
|
-
|
62
|
-
raise stderr unless process.success?
|
63
|
-
|
64
|
-
return stdout
|
65
|
-
end
|
66
|
-
|
67
|
-
protected
|
68
|
-
|
69
|
-
##
|
70
|
-
# @return [String]
|
71
|
-
#
|
72
|
-
def adjust_python_path
|
73
|
-
site_packages = File.join(core_dir, 'site-packages')
|
74
|
-
|
75
|
-
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
76
|
-
end
|
77
|
-
|
78
|
-
##
|
79
|
-
# capture3 method doesn't work properly with Jruby, so
|
80
|
-
# this is a workaround
|
81
|
-
#
|
82
|
-
def capture(input)
|
83
|
-
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
84
|
-
out_reader = Thread.new { o.read }
|
85
|
-
err_reader = Thread.new { e.read }
|
86
|
-
i.write input
|
87
|
-
i.close
|
88
|
-
[out_reader.value, err_reader.value, t.value]
|
89
|
-
}
|
90
|
-
end
|
91
|
-
|
92
|
-
##
|
93
|
-
# @return [String]
|
94
|
-
#
|
95
|
-
def core_dir
|
96
|
-
return File.expand_path('../../../core', __FILE__)
|
97
|
-
end
|
98
|
-
|
99
|
-
##
|
100
|
-
# @return [String]
|
101
|
-
#
|
102
|
-
def kernel
|
103
|
-
return File.join(core_dir, 'poltagger-basic-multi.py')
|
104
|
-
end
|
105
|
-
end # PolarityTagger
|
106
|
-
end # Opener
|
26
|
+
end
|
27
|
+
end
|
@@ -26,7 +26,7 @@ module Opener
|
|
26
26
|
# @return [Slop]
|
27
27
|
#
|
28
28
|
def configure_slop
|
29
|
-
|
29
|
+
Slop.new strict: false, indent: 2, help: true do
|
30
30
|
banner 'Usage: polarity-tagger [OPTIONS] -- [PYTHON OPTIONS]'
|
31
31
|
|
32
32
|
separator <<-EOF.chomp
|
@@ -62,6 +62,7 @@ Examples:
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
end
|
65
|
-
|
66
|
-
|
67
|
-
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Opener
|
2
|
+
class PolarityTagger
|
3
|
+
##
|
4
|
+
# Ruby wrapper around the Python based polarity tagger.
|
5
|
+
#
|
6
|
+
# @!attribute [r] options
|
7
|
+
# @return [Hash]
|
8
|
+
#
|
9
|
+
# @!attribute [r] args
|
10
|
+
# @return [Array]
|
11
|
+
#
|
12
|
+
class External
|
13
|
+
|
14
|
+
attr_reader :options, :args
|
15
|
+
|
16
|
+
##
|
17
|
+
# @param [Hash] options
|
18
|
+
#
|
19
|
+
# @option options [Array] :args Collection of arbitrary arguments to pass
|
20
|
+
# to the underlying kernel.
|
21
|
+
#
|
22
|
+
# @option options [String] :resource_path Path to the lexicons to use.
|
23
|
+
#
|
24
|
+
def initialize options = {}
|
25
|
+
@args = options.delete(:args) || []
|
26
|
+
@options = options
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Returns a String containing the command to use for executing the kernel.
|
31
|
+
#
|
32
|
+
# @return [String]
|
33
|
+
#
|
34
|
+
def command
|
35
|
+
return "#{adjust_python_path} python -E #{kernel} #{lexicon_path} #{args.join(" ")}"
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# @return [String]
|
40
|
+
#
|
41
|
+
def lexicon_path
|
42
|
+
path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
|
43
|
+
ENV['POLARITY_LEXICON_PATH']
|
44
|
+
|
45
|
+
return path ? "--lexicon-path #{path}" : nil
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Processes the input and returns an Array containing the output of STDOUT,
|
50
|
+
# STDERR and an object containing process information.
|
51
|
+
#
|
52
|
+
# @param [String] input The text of which to detect the language.
|
53
|
+
# @return [Array]
|
54
|
+
#
|
55
|
+
def run(input)
|
56
|
+
stdout, stderr, process = capture(input)
|
57
|
+
|
58
|
+
raise stderr unless process.success?
|
59
|
+
puts stderr if ENV['DEBUG']
|
60
|
+
|
61
|
+
return stdout
|
62
|
+
end
|
63
|
+
|
64
|
+
protected
|
65
|
+
|
66
|
+
##
|
67
|
+
# @return [String]
|
68
|
+
#
|
69
|
+
def adjust_python_path
|
70
|
+
site_packages = File.join(core_dir, 'site-packages')
|
71
|
+
|
72
|
+
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# capture3 method doesn't work properly with Jruby, so
|
77
|
+
# this is a workaround
|
78
|
+
#
|
79
|
+
def capture(input)
|
80
|
+
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
81
|
+
out_reader = Thread.new { o.read }
|
82
|
+
err_reader = Thread.new { e.read }
|
83
|
+
i.write input
|
84
|
+
i.close
|
85
|
+
[out_reader.value, err_reader.value, t.value]
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
##
|
90
|
+
# @return [String]
|
91
|
+
#
|
92
|
+
def core_dir
|
93
|
+
File.expand_path '../../../../core', __FILE__
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# @return [String]
|
98
|
+
#
|
99
|
+
def kernel
|
100
|
+
File.join core_dir, 'poltagger-basic-multi.py'
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require_relative 'lexicons_cache'
|
2
|
+
require_relative 'lexicon_map'
|
3
|
+
require_relative 'kaf/document'
|
4
|
+
|
5
|
+
module Opener
|
6
|
+
class PolarityTagger
|
7
|
+
class Internal
|
8
|
+
|
9
|
+
DESC = 'VUA polarity tagger multilanguage'
|
10
|
+
LAST_EDITED = '21may2014'
|
11
|
+
VERSION = '1.2'
|
12
|
+
|
13
|
+
def initialize ignore_pos: false, **params
|
14
|
+
@cache = LexiconsCache.new
|
15
|
+
|
16
|
+
@ignore_pos = ignore_pos
|
17
|
+
end
|
18
|
+
|
19
|
+
def run input
|
20
|
+
@kaf = KAF::Document.from_xml input
|
21
|
+
@map = @kaf.map = @cache[@kaf.language]
|
22
|
+
|
23
|
+
negators = 0
|
24
|
+
@kaf.terms.each do |t|
|
25
|
+
lemma = t.lemma&.downcase
|
26
|
+
pos = if @ignore_pos then nil else t.pos end
|
27
|
+
attrs = Hashie::Mash.new
|
28
|
+
|
29
|
+
lexicon, polarity_pos = @map.by_polarity lemma, pos
|
30
|
+
|
31
|
+
if lexicon.polarity != 'unknown'
|
32
|
+
attrs.polarity = lexicon.polarity
|
33
|
+
end
|
34
|
+
if l = @map.by_negator(lemma)
|
35
|
+
negators += 1
|
36
|
+
lexicon, polarity = l, nil
|
37
|
+
attrs.sentiment_modifier = 'shifter'
|
38
|
+
end
|
39
|
+
if l = @map.by_intensifier(lemma)
|
40
|
+
lexicon, polarity = l, nil
|
41
|
+
attrs.sentiment_modifier = 'intensifier'
|
42
|
+
end
|
43
|
+
|
44
|
+
if attrs.size > 0
|
45
|
+
attrs.resource = lexicon.resource if lexicon.resource
|
46
|
+
t.setPolarity attrs, polarity_pos
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
@kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
|
51
|
+
|
52
|
+
@kaf.to_xml
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require_relative 'term'
|
2
|
+
|
3
|
+
module Opener
|
4
|
+
module KAF
|
5
|
+
class Document
|
6
|
+
|
7
|
+
attr_reader :document
|
8
|
+
attr_reader :lexicons
|
9
|
+
|
10
|
+
attr_accessor :map
|
11
|
+
|
12
|
+
def initialize xml
|
13
|
+
@document = xml
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.from_xml xml
|
17
|
+
new Nokogiri::XML xml
|
18
|
+
end
|
19
|
+
|
20
|
+
def language
|
21
|
+
@language ||= @document.at_xpath('KAF').attr 'xml:lang'
|
22
|
+
end
|
23
|
+
|
24
|
+
def terms
|
25
|
+
@terms ||= collection 'KAF/terms/term', Term
|
26
|
+
end
|
27
|
+
|
28
|
+
def add_linguistic_processor name, version, layer, timestamp: false
|
29
|
+
header = @document.at('kafHeader') || @document.root.add_child('<kafHeader/>')
|
30
|
+
procs = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer }
|
31
|
+
procs ||= header.add_child("<linguisticProcessors layer='#{layer}'/>")
|
32
|
+
lp = procs.add_child('<lp/>')
|
33
|
+
lp.attr(
|
34
|
+
timestamp: if timestamp then Time.now.iso8601 else '*' end,
|
35
|
+
version: version,
|
36
|
+
name: name,
|
37
|
+
)
|
38
|
+
lp
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_xml
|
42
|
+
@document.to_xml indent: 2
|
43
|
+
end
|
44
|
+
|
45
|
+
protected
|
46
|
+
|
47
|
+
def collection query, wrapper
|
48
|
+
@document.xpath(query).map{ |node| wrapper.new self, node }
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Opener
|
2
|
+
module KAF
|
3
|
+
class Term
|
4
|
+
|
5
|
+
attr_reader :document
|
6
|
+
attr_reader :node
|
7
|
+
|
8
|
+
def initialize document, node
|
9
|
+
@document = document
|
10
|
+
@node = node
|
11
|
+
end
|
12
|
+
|
13
|
+
def id
|
14
|
+
@id ||= @node.attr :tid
|
15
|
+
end
|
16
|
+
|
17
|
+
def lemma
|
18
|
+
@node.attr :lemma
|
19
|
+
end
|
20
|
+
|
21
|
+
def pos
|
22
|
+
@node.attr :pos
|
23
|
+
end
|
24
|
+
|
25
|
+
def setPolarity attrs, polarity_pos
|
26
|
+
#In case there is no pos info, we use the polarityPos
|
27
|
+
@node[:pos] = polarity_pos if !pos and polarity_pos
|
28
|
+
|
29
|
+
sentiment = @node.add_child('<sentiment/>')
|
30
|
+
sentiment.attr attrs
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Opener
|
2
|
+
class PolarityTagger
|
3
|
+
class LexiconMap
|
4
|
+
|
5
|
+
attr_reader :resource
|
6
|
+
attr_reader :negators
|
7
|
+
attr_reader :intensifiers
|
8
|
+
attr_reader :with_polarity
|
9
|
+
|
10
|
+
POS_ORDER = 'NRVGAO'
|
11
|
+
UNKNOWN = Hashie::Mash.new polarity: 'unknown'
|
12
|
+
|
13
|
+
def initialize lang:, lexicons:
|
14
|
+
@lang = lang
|
15
|
+
@lexicons = lexicons
|
16
|
+
|
17
|
+
@negators = {}
|
18
|
+
@intensifiers = {}
|
19
|
+
@with_polarity = {}
|
20
|
+
map lexicons
|
21
|
+
end
|
22
|
+
|
23
|
+
DEFAULT_POS = 'O'
|
24
|
+
|
25
|
+
POS_SHORT_MAP = {
|
26
|
+
adj: 'G',
|
27
|
+
adv: 'A',
|
28
|
+
noun: 'N',
|
29
|
+
propernoun: 'N',
|
30
|
+
other: 'O',
|
31
|
+
prep: 'P',
|
32
|
+
verb: 'V',
|
33
|
+
nil => DEFAULT_POS,
|
34
|
+
multi_word_expression: 'O',
|
35
|
+
}
|
36
|
+
|
37
|
+
def by_negator lemma
|
38
|
+
@negators[lemma]
|
39
|
+
end
|
40
|
+
|
41
|
+
def by_intensifier lemma
|
42
|
+
@intensifiers[lemma]
|
43
|
+
end
|
44
|
+
|
45
|
+
def by_polarity lemma, short_pos
|
46
|
+
return [@with_polarity[lemma+short_pos] || UNKNOWN, short_pos] if short_pos
|
47
|
+
|
48
|
+
POS_ORDER.chars.each do |short_pos|
|
49
|
+
if l = @with_polarity[lemma+short_pos]
|
50
|
+
puts "Found polarify #{l.polarity} for #{lemma} with PoS #{short_pos}"
|
51
|
+
return [l, short_pos]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
[UNKNOWN, 'unknown']
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
60
|
+
def map lexicons
|
61
|
+
lexicons.each do |l|
|
62
|
+
next if l.lemma.nil?
|
63
|
+
|
64
|
+
case l.type
|
65
|
+
when 'polarityShifter' then @negators[l.lemma] = l
|
66
|
+
when 'intensifier' then @intensifiers[l.lemma] = l
|
67
|
+
else
|
68
|
+
if l.polarity
|
69
|
+
short_pos = POS_SHORT_MAP[l.pos&.to_sym] || DEFAULT_POS
|
70
|
+
@with_polarity[l.lemma+short_pos] = l
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
puts "#{@lang}: loaded #{@negators.size} negators"
|
76
|
+
puts "#{@lang}: loaded #{@intensifiers.size} intensifiers"
|
77
|
+
puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Opener
|
2
|
+
class PolarityTagger
|
3
|
+
class LexiconsCache
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
extend MonitorMixin
|
7
|
+
|
8
|
+
@url = ENV['POLARITY_LEXICON_URL']
|
9
|
+
@path = ENV['POLARITY_LEXICON_PATH']
|
10
|
+
@cache = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def [] lang
|
14
|
+
synchronize do
|
15
|
+
@cache[lang] ||= load_lexicons lang
|
16
|
+
end
|
17
|
+
end
|
18
|
+
alias_method :get, :[]
|
19
|
+
|
20
|
+
def load_lexicons lang
|
21
|
+
lexicons = if @url then load_from_url lang else load_from_path lang end
|
22
|
+
|
23
|
+
LexiconMap.new lang: lang, lexicons: lexicons
|
24
|
+
end
|
25
|
+
|
26
|
+
def load_from_url lang
|
27
|
+
url = "#{@url}&language_code=#{lang}"
|
28
|
+
puts "#{lang}: loading lexicons from url #{url}"
|
29
|
+
lexicons = JSON.parse HTTPClient.new.get(url).body
|
30
|
+
lexicons = lexicons['data'].map{ |l| Hashie::Mash.new l }
|
31
|
+
lexicons
|
32
|
+
end
|
33
|
+
|
34
|
+
def load_from_path lang
|
35
|
+
@path ||= 'core/general-lexicons'
|
36
|
+
dir = "#{@path}/#{lang.upcase}-lexicon"
|
37
|
+
config = Nokogiri::XML File.read "#{dir}/config.xml"
|
38
|
+
lexicons = []
|
39
|
+
|
40
|
+
config.css(:lexicon).each do |cl|
|
41
|
+
filename = cl.at(:filename).text
|
42
|
+
resource = cl.at(:resource).text
|
43
|
+
xml = Nokogiri::XML File.read "#{dir}/#{filename}"
|
44
|
+
puts "#{lang}: loading lexicons from the file #{filename}"
|
45
|
+
|
46
|
+
lexicons.concat(xml.css(:LexicalEntry).map do |le|
|
47
|
+
Hashie::Mash.new(
|
48
|
+
resource: resource,
|
49
|
+
identifier: le.attr(:id),
|
50
|
+
type: le.attr(:type),
|
51
|
+
lemma: le.at(:Lemma).attr(:writtenForm).downcase,
|
52
|
+
pos: le.attr(:partOfSpeech)&.downcase,
|
53
|
+
aspect: le.at(:Domain)&.attr(:aspect)&.downcase,
|
54
|
+
polarity: le.at(:Sentiment).attr(:polarity),
|
55
|
+
strength: le.at(:Sentiment).attr(:strength),
|
56
|
+
confidence_level: le.at(:Confidence)&.attr(:level),
|
57
|
+
domain_conditional: le.at(:Domain)&.attr(:conditional) == 'yes',
|
58
|
+
)
|
59
|
+
end)
|
60
|
+
end
|
61
|
+
|
62
|
+
lexicons
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -33,11 +33,13 @@ Gem::Specification.new do |gem|
|
|
33
33
|
gem.add_dependency 'opener-webservice', '~> 2.1'
|
34
34
|
gem.add_dependency 'opener-core', '~> 2.2'
|
35
35
|
|
36
|
+
gem.add_dependency 'hashie'
|
36
37
|
gem.add_dependency 'rake'
|
37
38
|
gem.add_dependency 'nokogiri'
|
38
39
|
gem.add_dependency 'cliver'
|
39
40
|
gem.add_dependency 'slop', '~> 3.5'
|
40
41
|
|
42
|
+
gem.add_development_dependency 'pry'
|
41
43
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
42
44
|
gem.add_development_dependency 'cucumber'
|
43
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-polarity-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.2'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: hashie
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: rake
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +122,20 @@ dependencies:
|
|
108
122
|
- - "~>"
|
109
123
|
- !ruby/object:Gem::Version
|
110
124
|
version: '3.5'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: pry
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
111
139
|
- !ruby/object:Gem::Dependency
|
112
140
|
name: rspec
|
113
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -137,17 +165,19 @@ dependencies:
|
|
137
165
|
- !ruby/object:Gem::Version
|
138
166
|
version: '0'
|
139
167
|
description: Polarity tagger for various languages.
|
140
|
-
email:
|
168
|
+
email:
|
141
169
|
executables:
|
142
|
-
- polarity-tagger-server
|
143
|
-
- polarity-tagger-daemon
|
144
170
|
- polarity-tagger
|
171
|
+
- polarity-tagger-daemon
|
172
|
+
- polarity-tagger-server
|
173
|
+
- console
|
145
174
|
extensions:
|
146
175
|
- ext/hack/Rakefile
|
147
176
|
extra_rdoc_files: []
|
148
177
|
files:
|
149
178
|
- LICENSE.txt
|
150
179
|
- README.md
|
180
|
+
- bin/console
|
151
181
|
- bin/polarity-tagger
|
152
182
|
- bin/polarity-tagger-daemon
|
153
183
|
- bin/polarity-tagger-server
|
@@ -170,6 +200,12 @@ files:
|
|
170
200
|
- ext/hack/Rakefile
|
171
201
|
- lib/opener/polarity_tagger.rb
|
172
202
|
- lib/opener/polarity_tagger/cli.rb
|
203
|
+
- lib/opener/polarity_tagger/external.rb
|
204
|
+
- lib/opener/polarity_tagger/internal.rb
|
205
|
+
- lib/opener/polarity_tagger/kaf/document.rb
|
206
|
+
- lib/opener/polarity_tagger/kaf/term.rb
|
207
|
+
- lib/opener/polarity_tagger/lexicon_map.rb
|
208
|
+
- lib/opener/polarity_tagger/lexicons_cache.rb
|
173
209
|
- lib/opener/polarity_tagger/public/markdown.css
|
174
210
|
- lib/opener/polarity_tagger/server.rb
|
175
211
|
- lib/opener/polarity_tagger/version.rb
|
@@ -185,7 +221,7 @@ homepage: http://opener-project.github.com/
|
|
185
221
|
licenses:
|
186
222
|
- Apache 2.0
|
187
223
|
metadata: {}
|
188
|
-
post_install_message:
|
224
|
+
post_install_message:
|
189
225
|
rdoc_options: []
|
190
226
|
require_paths:
|
191
227
|
- lib
|
@@ -200,10 +236,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
200
236
|
- !ruby/object:Gem::Version
|
201
237
|
version: '0'
|
202
238
|
requirements: []
|
203
|
-
rubyforge_project:
|
204
|
-
rubygems_version: 2.
|
205
|
-
signing_key:
|
239
|
+
rubyforge_project:
|
240
|
+
rubygems_version: 2.7.8
|
241
|
+
signing_key:
|
206
242
|
specification_version: 4
|
207
243
|
summary: Polarity tagger for various languages.
|
208
244
|
test_files: []
|
209
|
-
has_rdoc:
|