opener-polarity-tagger 2.4.1 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/LICENSE.txt +13 -0
- data/README.md +115 -93
- data/bin/console +13 -0
- data/bin/polarity-tagger +8 -2
- data/bin/polarity-tagger-daemon +9 -5
- data/bin/polarity-tagger-server +8 -48
- data/core/LexiconMod.py +0 -13
- data/exec/polarity-tagger.rb +1 -11
- data/lib/opener/polarity_tagger.rb +13 -86
- data/lib/opener/polarity_tagger/cli.rb +43 -52
- data/lib/opener/polarity_tagger/external.rb +105 -0
- data/lib/opener/polarity_tagger/internal.rb +57 -0
- data/lib/opener/polarity_tagger/kaf/document.rb +53 -0
- data/lib/opener/polarity_tagger/kaf/term.rb +35 -0
- data/lib/opener/polarity_tagger/lexicon_map.rb +82 -0
- data/lib/opener/polarity_tagger/lexicons_cache.rb +67 -0
- data/lib/opener/polarity_tagger/server.rb +4 -5
- data/lib/opener/polarity_tagger/version.rb +5 -3
- data/opener-polarity-tagger.gemspec +10 -6
- data/task/requirements.rake +1 -1
- metadata +85 -75
data/exec/polarity-tagger.rb
CHANGED
@@ -1,19 +1,9 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'opener/daemons'
|
4
|
-
require 'opener/core'
|
5
4
|
|
6
5
|
require_relative '../lib/opener/polarity_tagger'
|
7
6
|
|
8
|
-
|
9
|
-
switcher_opts = {}
|
7
|
+
daemon = Opener::Daemons::Daemon.new(Opener::PolarityTagger)
|
10
8
|
|
11
|
-
parser = Opener::Daemons::OptParser.new do |opts|
|
12
|
-
switcher.bind(opts, switcher_opts)
|
13
|
-
end
|
14
|
-
|
15
|
-
options = parser.parse!(ARGV)
|
16
|
-
daemon = Opener::Daemons::Daemon.new(Opener::PolarityTagger, options)
|
17
|
-
|
18
|
-
switcher.install(switcher_opts)
|
19
9
|
daemon.start
|
@@ -1,100 +1,27 @@
|
|
1
1
|
require 'open3'
|
2
|
+
require 'opener/core'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'hashie'
|
2
5
|
|
3
6
|
require_relative 'polarity_tagger/version'
|
4
7
|
require_relative 'polarity_tagger/cli'
|
8
|
+
require_relative 'polarity_tagger/external'
|
9
|
+
|
10
|
+
require_relative 'polarity_tagger/internal'
|
5
11
|
|
6
12
|
module Opener
|
7
|
-
##
|
8
|
-
# Ruby wrapper around the Python based polarity tagger.
|
9
|
-
#
|
10
|
-
# @!attribute [r] options
|
11
|
-
# @return [Hash]
|
12
|
-
#
|
13
13
|
class PolarityTagger
|
14
|
-
attr_reader :options, :args
|
15
14
|
|
16
|
-
|
17
|
-
# @param [Hash] options
|
18
|
-
#
|
19
|
-
# @option options [Array] :args Collection of arbitrary arguments to pass
|
20
|
-
# to the underlying kernel.
|
21
|
-
#
|
22
|
-
def initialize(options = {})
|
15
|
+
def initialize options = {}
|
23
16
|
@args = options.delete(:args) || []
|
24
17
|
@options = options
|
18
|
+
@klass = if ENV['LEGACY'] then External else Internal end
|
19
|
+
@proc = @klass.new args: @args
|
25
20
|
end
|
26
21
|
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
# @return [String]
|
31
|
-
#
|
32
|
-
def command
|
33
|
-
return "#{adjust_python_path} python -E -OO #{kernel} #{lexicon_path} #{args.join(" ")}"
|
34
|
-
end
|
35
|
-
|
36
|
-
def lexicon_path
|
37
|
-
if path = options[:resource_path]
|
38
|
-
return "--lexicon-path #{path}"
|
39
|
-
elsif path = ENV['POLARITY_LEXICON_PATH']
|
40
|
-
return "--lexicon-path #{path}"
|
41
|
-
else
|
42
|
-
return nil
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
##
|
47
|
-
# Processes the input and returns an Array containing the output of STDOUT,
|
48
|
-
# STDERR and an object containing process information.
|
49
|
-
#
|
50
|
-
# @param [String] input The text of which to detect the language.
|
51
|
-
# @return [Array]
|
52
|
-
#
|
53
|
-
def run(input)
|
54
|
-
begin
|
55
|
-
stdout, stderr, process = capture(input)
|
56
|
-
raise stderr unless process.success?
|
57
|
-
return stdout
|
58
|
-
rescue Exception => error
|
59
|
-
return Opener::Core::ErrorLayer.new(input, error.message, self.class).add
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
protected
|
64
|
-
##
|
65
|
-
# @return [String]
|
66
|
-
#
|
67
|
-
def adjust_python_path
|
68
|
-
site_packages = File.join(core_dir, 'site-packages')
|
69
|
-
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
70
|
-
end
|
71
|
-
|
72
|
-
##
|
73
|
-
# capture3 method doesn't work properly with Jruby, so
|
74
|
-
# this is a workaround
|
75
|
-
#
|
76
|
-
def capture(input)
|
77
|
-
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
78
|
-
out_reader = Thread.new { o.read }
|
79
|
-
err_reader = Thread.new { e.read }
|
80
|
-
i.write input
|
81
|
-
i.close
|
82
|
-
[out_reader.value, err_reader.value, t.value]
|
83
|
-
}
|
22
|
+
def run input
|
23
|
+
@proc.run input
|
84
24
|
end
|
85
25
|
|
86
|
-
|
87
|
-
|
88
|
-
#
|
89
|
-
def core_dir
|
90
|
-
return File.expand_path('../../../core', __FILE__)
|
91
|
-
end
|
92
|
-
|
93
|
-
##
|
94
|
-
# @return [String]
|
95
|
-
#
|
96
|
-
def kernel
|
97
|
-
return File.join(core_dir, 'poltagger-basic-multi.py')
|
98
|
-
end
|
99
|
-
end # PolarityTagger
|
100
|
-
end # Opener
|
26
|
+
end
|
27
|
+
end
|
@@ -3,75 +3,66 @@ require 'opener/core'
|
|
3
3
|
module Opener
|
4
4
|
class PolarityTagger
|
5
5
|
##
|
6
|
-
# CLI wrapper around {Opener::LanguageIdentifier} using
|
6
|
+
# CLI wrapper around {Opener::LanguageIdentifier} using Slop.
|
7
7
|
#
|
8
|
-
# @!attribute [r]
|
9
|
-
# @return [
|
10
|
-
# @!attribute [r] option_parser
|
11
|
-
# @return [OptionParser]
|
8
|
+
# @!attribute [r] parser
|
9
|
+
# @return [Slop]
|
12
10
|
#
|
13
11
|
class CLI
|
14
|
-
attr_reader :
|
12
|
+
attr_reader :parser
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@parser = configure_slop
|
16
|
+
end
|
15
17
|
|
16
18
|
##
|
17
|
-
# @param [
|
19
|
+
# @param [Array] argv
|
18
20
|
#
|
19
|
-
def
|
20
|
-
|
21
|
+
def run(argv = ARGV)
|
22
|
+
parser.parse(argv)
|
23
|
+
end
|
21
24
|
|
22
|
-
|
23
|
-
|
25
|
+
##
|
26
|
+
# @return [Slop]
|
27
|
+
#
|
28
|
+
def configure_slop
|
29
|
+
Slop.new strict: false, indent: 2, help: true do
|
30
|
+
banner 'Usage: polarity-tagger [OPTIONS] -- [PYTHON OPTIONS]'
|
24
31
|
|
25
|
-
|
26
|
-
opts.program_name = 'polarity-tagger'
|
27
|
-
opts.summary_indent = ' '
|
32
|
+
separator <<-EOF.chomp
|
28
33
|
|
29
|
-
|
34
|
+
About:
|
30
35
|
|
31
|
-
|
32
|
-
|
33
|
-
end
|
36
|
+
Component for tagging the polarity of elements in a KAF document. This
|
37
|
+
command reads input from STDIN.
|
34
38
|
|
35
|
-
|
36
|
-
show_version
|
37
|
-
end
|
39
|
+
Examples:
|
38
40
|
|
39
|
-
|
40
|
-
@options[:logging] = true
|
41
|
-
end
|
42
|
-
end
|
41
|
+
Processing a KAF file:
|
43
42
|
|
44
|
-
|
45
|
-
force = false
|
46
|
-
resource_switcher.install(@options, force)
|
47
|
-
end
|
43
|
+
cat some_file.kaf | polarity-tagger
|
48
44
|
|
49
|
-
|
50
|
-
# @param [String] input
|
51
|
-
#
|
52
|
-
def run(input)
|
53
|
-
tagger = PolarityTagger.new(options)
|
45
|
+
Displaying the underlying kernel options:
|
54
46
|
|
55
|
-
|
47
|
+
polarity-tagger -- --help
|
56
48
|
|
57
|
-
|
58
|
-
end
|
49
|
+
EOF
|
59
50
|
|
60
|
-
|
51
|
+
separator "\nOptions:\n"
|
61
52
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
def show_help
|
66
|
-
abort option_parser.to_s
|
67
|
-
end
|
53
|
+
on :v, :version, 'Shows the current version' do
|
54
|
+
abort "polarity-tagger v#{VERSION} on #{RUBY_DESCRIPTION}"
|
55
|
+
end
|
68
56
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
57
|
+
run do |opts, args|
|
58
|
+
tagger = PolarityTagger.new(:args => args)
|
59
|
+
input = STDIN.tty? ? nil : STDIN.read
|
60
|
+
|
61
|
+
puts tagger.run(input)
|
62
|
+
end
|
63
|
+
end
|
74
64
|
end
|
75
|
-
|
76
|
-
|
77
|
-
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Opener
|
2
|
+
class PolarityTagger
|
3
|
+
##
|
4
|
+
# Ruby wrapper around the Python based polarity tagger.
|
5
|
+
#
|
6
|
+
# @!attribute [r] options
|
7
|
+
# @return [Hash]
|
8
|
+
#
|
9
|
+
# @!attribute [r] args
|
10
|
+
# @return [Array]
|
11
|
+
#
|
12
|
+
class External
|
13
|
+
|
14
|
+
attr_reader :options, :args
|
15
|
+
|
16
|
+
##
|
17
|
+
# @param [Hash] options
|
18
|
+
#
|
19
|
+
# @option options [Array] :args Collection of arbitrary arguments to pass
|
20
|
+
# to the underlying kernel.
|
21
|
+
#
|
22
|
+
# @option options [String] :resource_path Path to the lexicons to use.
|
23
|
+
#
|
24
|
+
def initialize options = {}
|
25
|
+
@args = options.delete(:args) || []
|
26
|
+
@options = options
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Returns a String containing the command to use for executing the kernel.
|
31
|
+
#
|
32
|
+
# @return [String]
|
33
|
+
#
|
34
|
+
def command
|
35
|
+
return "#{adjust_python_path} python -E #{kernel} #{lexicon_path} #{args.join(" ")}"
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# @return [String]
|
40
|
+
#
|
41
|
+
def lexicon_path
|
42
|
+
path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
|
43
|
+
ENV['POLARITY_LEXICON_PATH']
|
44
|
+
|
45
|
+
return path ? "--lexicon-path #{path}" : nil
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Processes the input and returns an Array containing the output of STDOUT,
|
50
|
+
# STDERR and an object containing process information.
|
51
|
+
#
|
52
|
+
# @param [String] input The text of which to detect the language.
|
53
|
+
# @return [Array]
|
54
|
+
#
|
55
|
+
def run(input)
|
56
|
+
stdout, stderr, process = capture(input)
|
57
|
+
|
58
|
+
raise stderr unless process.success?
|
59
|
+
puts stderr if ENV['DEBUG']
|
60
|
+
|
61
|
+
return stdout
|
62
|
+
end
|
63
|
+
|
64
|
+
protected
|
65
|
+
|
66
|
+
##
|
67
|
+
# @return [String]
|
68
|
+
#
|
69
|
+
def adjust_python_path
|
70
|
+
site_packages = File.join(core_dir, 'site-packages')
|
71
|
+
|
72
|
+
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# capture3 method doesn't work properly with Jruby, so
|
77
|
+
# this is a workaround
|
78
|
+
#
|
79
|
+
def capture(input)
|
80
|
+
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
81
|
+
out_reader = Thread.new { o.read }
|
82
|
+
err_reader = Thread.new { e.read }
|
83
|
+
i.write input
|
84
|
+
i.close
|
85
|
+
[out_reader.value, err_reader.value, t.value]
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
##
|
90
|
+
# @return [String]
|
91
|
+
#
|
92
|
+
def core_dir
|
93
|
+
File.expand_path '../../../../core', __FILE__
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# @return [String]
|
98
|
+
#
|
99
|
+
def kernel
|
100
|
+
File.join core_dir, 'poltagger-basic-multi.py'
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require_relative 'lexicons_cache'
|
2
|
+
require_relative 'lexicon_map'
|
3
|
+
require_relative 'kaf/document'
|
4
|
+
|
5
|
+
module Opener
|
6
|
+
class PolarityTagger
|
7
|
+
class Internal
|
8
|
+
|
9
|
+
DESC = 'VUA polarity tagger multilanguage'
|
10
|
+
LAST_EDITED = '21may2014'
|
11
|
+
VERSION = '1.2'
|
12
|
+
|
13
|
+
def initialize ignore_pos: false, **params
|
14
|
+
@cache = LexiconsCache.new
|
15
|
+
|
16
|
+
@ignore_pos = ignore_pos
|
17
|
+
end
|
18
|
+
|
19
|
+
def run input
|
20
|
+
@kaf = KAF::Document.from_xml input
|
21
|
+
@map = @kaf.map = @cache[@kaf.language]
|
22
|
+
|
23
|
+
negators = 0
|
24
|
+
@kaf.terms.each do |t|
|
25
|
+
lemma = t.lemma&.downcase
|
26
|
+
pos = if @ignore_pos then nil else t.pos end
|
27
|
+
attrs = Hashie::Mash.new
|
28
|
+
|
29
|
+
lexicon, polarity_pos = @map.by_polarity lemma, pos
|
30
|
+
|
31
|
+
if lexicon.polarity != 'unknown'
|
32
|
+
attrs.polarity = lexicon.polarity
|
33
|
+
end
|
34
|
+
if l = @map.by_negator(lemma)
|
35
|
+
negators += 1
|
36
|
+
lexicon, polarity = l, nil
|
37
|
+
attrs.sentiment_modifier = 'shifter'
|
38
|
+
end
|
39
|
+
if l = @map.by_intensifier(lemma)
|
40
|
+
lexicon, polarity = l, nil
|
41
|
+
attrs.sentiment_modifier = 'intensifier'
|
42
|
+
end
|
43
|
+
|
44
|
+
if attrs.size > 0
|
45
|
+
attrs.resource = lexicon.resource if lexicon.resource
|
46
|
+
t.setPolarity attrs, polarity_pos
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
@kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
|
51
|
+
|
52
|
+
@kaf.to_xml
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require_relative 'term'
|
2
|
+
|
3
|
+
module Opener
|
4
|
+
module KAF
|
5
|
+
class Document
|
6
|
+
|
7
|
+
attr_reader :document
|
8
|
+
attr_reader :lexicons
|
9
|
+
|
10
|
+
attr_accessor :map
|
11
|
+
|
12
|
+
def initialize xml
|
13
|
+
@document = xml
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.from_xml xml
|
17
|
+
new Nokogiri::XML xml
|
18
|
+
end
|
19
|
+
|
20
|
+
def language
|
21
|
+
@language ||= @document.at_xpath('KAF').attr 'xml:lang'
|
22
|
+
end
|
23
|
+
|
24
|
+
def terms
|
25
|
+
@terms ||= collection 'KAF/terms/term', Term
|
26
|
+
end
|
27
|
+
|
28
|
+
def add_linguistic_processor name, version, layer, timestamp: false
|
29
|
+
header = @document.at('kafHeader') || @document.root.add_child('<kafHeader/>')
|
30
|
+
procs = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer }
|
31
|
+
procs ||= header.add_child("<linguisticProcessors layer='#{layer}'/>")
|
32
|
+
lp = procs.add_child('<lp/>')
|
33
|
+
lp.attr(
|
34
|
+
timestamp: if timestamp then Time.now.iso8601 else '*' end,
|
35
|
+
version: version,
|
36
|
+
name: name,
|
37
|
+
)
|
38
|
+
lp
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_xml
|
42
|
+
@document.to_xml indent: 2
|
43
|
+
end
|
44
|
+
|
45
|
+
protected
|
46
|
+
|
47
|
+
def collection query, wrapper
|
48
|
+
@document.xpath(query).map{ |node| wrapper.new self, node }
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|