opener-polarity-tagger 2.4.2 → 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/LICENSE.txt +13 -0
- data/README.md +115 -93
- data/bin/console +13 -0
- data/bin/polarity-tagger +8 -2
- data/bin/polarity-tagger-daemon +9 -5
- data/bin/polarity-tagger-server +8 -48
- data/core/LexiconMod.py +0 -13
- data/exec/polarity-tagger.rb +1 -11
- data/lib/opener/polarity_tagger.rb +12 -86
- data/lib/opener/polarity_tagger/cli.rb +43 -52
- data/lib/opener/polarity_tagger/external.rb +105 -0
- data/lib/opener/polarity_tagger/internal.rb +57 -0
- data/lib/opener/polarity_tagger/kaf/document.rb +53 -0
- data/lib/opener/polarity_tagger/kaf/term.rb +35 -0
- data/lib/opener/polarity_tagger/lexicon_map.rb +82 -0
- data/lib/opener/polarity_tagger/lexicons_cache.rb +68 -0
- data/lib/opener/polarity_tagger/server.rb +4 -5
- data/lib/opener/polarity_tagger/version.rb +5 -3
- data/opener-polarity-tagger.gemspec +10 -6
- data/task/requirements.rake +1 -1
- metadata +85 -75
data/exec/polarity-tagger.rb
CHANGED
@@ -1,19 +1,9 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'opener/daemons'
|
4
|
-
require 'opener/core'
|
5
4
|
|
6
5
|
require_relative '../lib/opener/polarity_tagger'
|
7
6
|
|
8
|
-
|
9
|
-
switcher_opts = {}
|
7
|
+
daemon = Opener::Daemons::Daemon.new(Opener::PolarityTagger)
|
10
8
|
|
11
|
-
parser = Opener::Daemons::OptParser.new do |opts|
|
12
|
-
switcher.bind(opts, switcher_opts)
|
13
|
-
end
|
14
|
-
|
15
|
-
options = parser.parse!(ARGV)
|
16
|
-
daemon = Opener::Daemons::Daemon.new(Opener::PolarityTagger, options)
|
17
|
-
|
18
|
-
switcher.install(switcher_opts)
|
19
9
|
daemon.start
|
@@ -1,101 +1,27 @@
|
|
1
1
|
require 'open3'
|
2
2
|
require 'opener/core'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'hashie'
|
3
5
|
|
4
6
|
require_relative 'polarity_tagger/version'
|
5
7
|
require_relative 'polarity_tagger/cli'
|
8
|
+
require_relative 'polarity_tagger/external'
|
9
|
+
|
10
|
+
require_relative 'polarity_tagger/internal'
|
6
11
|
|
7
12
|
module Opener
|
8
|
-
##
|
9
|
-
# Ruby wrapper around the Python based polarity tagger.
|
10
|
-
#
|
11
|
-
# @!attribute [r] options
|
12
|
-
# @return [Hash]
|
13
|
-
#
|
14
13
|
class PolarityTagger
|
15
|
-
attr_reader :options, :args
|
16
14
|
|
17
|
-
|
18
|
-
# @param [Hash] options
|
19
|
-
#
|
20
|
-
# @option options [Array] :args Collection of arbitrary arguments to pass
|
21
|
-
# to the underlying kernel.
|
22
|
-
#
|
23
|
-
def initialize(options = {})
|
15
|
+
def initialize options = {}
|
24
16
|
@args = options.delete(:args) || []
|
25
17
|
@options = options
|
18
|
+
@klass = if ENV['LEGACY'] then External else Internal end
|
19
|
+
@proc = @klass.new args: @args
|
26
20
|
end
|
27
21
|
|
28
|
-
|
29
|
-
|
30
|
-
#
|
31
|
-
# @return [String]
|
32
|
-
#
|
33
|
-
def command
|
34
|
-
return "#{adjust_python_path} python -E -OO #{kernel} #{lexicon_path} #{args.join(" ")}"
|
35
|
-
end
|
36
|
-
|
37
|
-
def lexicon_path
|
38
|
-
if path = options[:resource_path]
|
39
|
-
return "--lexicon-path #{path}"
|
40
|
-
elsif path = ENV['POLARITY_LEXICON_PATH']
|
41
|
-
return "--lexicon-path #{path}"
|
42
|
-
else
|
43
|
-
return nil
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
##
|
48
|
-
# Processes the input and returns an Array containing the output of STDOUT,
|
49
|
-
# STDERR and an object containing process information.
|
50
|
-
#
|
51
|
-
# @param [String] input The text of which to detect the language.
|
52
|
-
# @return [Array]
|
53
|
-
#
|
54
|
-
def run(input)
|
55
|
-
begin
|
56
|
-
stdout, stderr, process = capture(input)
|
57
|
-
raise stderr unless process.success?
|
58
|
-
return stdout
|
59
|
-
rescue Exception => error
|
60
|
-
return Opener::Core::ErrorLayer.new(input, error.message, self.class).add
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
protected
|
65
|
-
##
|
66
|
-
# @return [String]
|
67
|
-
#
|
68
|
-
def adjust_python_path
|
69
|
-
site_packages = File.join(core_dir, 'site-packages')
|
70
|
-
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
71
|
-
end
|
72
|
-
|
73
|
-
##
|
74
|
-
# capture3 method doesn't work properly with Jruby, so
|
75
|
-
# this is a workaround
|
76
|
-
#
|
77
|
-
def capture(input)
|
78
|
-
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
79
|
-
out_reader = Thread.new { o.read }
|
80
|
-
err_reader = Thread.new { e.read }
|
81
|
-
i.write input
|
82
|
-
i.close
|
83
|
-
[out_reader.value, err_reader.value, t.value]
|
84
|
-
}
|
22
|
+
def run input
|
23
|
+
@proc.run input
|
85
24
|
end
|
86
25
|
|
87
|
-
|
88
|
-
|
89
|
-
#
|
90
|
-
def core_dir
|
91
|
-
return File.expand_path('../../../core', __FILE__)
|
92
|
-
end
|
93
|
-
|
94
|
-
##
|
95
|
-
# @return [String]
|
96
|
-
#
|
97
|
-
def kernel
|
98
|
-
return File.join(core_dir, 'poltagger-basic-multi.py')
|
99
|
-
end
|
100
|
-
end # PolarityTagger
|
101
|
-
end # Opener
|
26
|
+
end
|
27
|
+
end
|
@@ -3,75 +3,66 @@ require 'opener/core'
|
|
3
3
|
module Opener
|
4
4
|
class PolarityTagger
|
5
5
|
##
|
6
|
-
# CLI wrapper around {Opener::LanguageIdentifier} using
|
6
|
+
# CLI wrapper around {Opener::LanguageIdentifier} using Slop.
|
7
7
|
#
|
8
|
-
# @!attribute [r]
|
9
|
-
# @return [
|
10
|
-
# @!attribute [r] option_parser
|
11
|
-
# @return [OptionParser]
|
8
|
+
# @!attribute [r] parser
|
9
|
+
# @return [Slop]
|
12
10
|
#
|
13
11
|
class CLI
|
14
|
-
attr_reader :
|
12
|
+
attr_reader :parser
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@parser = configure_slop
|
16
|
+
end
|
15
17
|
|
16
18
|
##
|
17
|
-
# @param [
|
19
|
+
# @param [Array] argv
|
18
20
|
#
|
19
|
-
def
|
20
|
-
|
21
|
+
def run(argv = ARGV)
|
22
|
+
parser.parse(argv)
|
23
|
+
end
|
21
24
|
|
22
|
-
|
23
|
-
|
25
|
+
##
|
26
|
+
# @return [Slop]
|
27
|
+
#
|
28
|
+
def configure_slop
|
29
|
+
Slop.new strict: false, indent: 2, help: true do
|
30
|
+
banner 'Usage: polarity-tagger [OPTIONS] -- [PYTHON OPTIONS]'
|
24
31
|
|
25
|
-
|
26
|
-
opts.program_name = 'polarity-tagger'
|
27
|
-
opts.summary_indent = ' '
|
32
|
+
separator <<-EOF.chomp
|
28
33
|
|
29
|
-
|
34
|
+
About:
|
30
35
|
|
31
|
-
|
32
|
-
|
33
|
-
end
|
36
|
+
Component for tagging the polarity of elements in a KAF document. This
|
37
|
+
command reads input from STDIN.
|
34
38
|
|
35
|
-
|
36
|
-
show_version
|
37
|
-
end
|
39
|
+
Examples:
|
38
40
|
|
39
|
-
|
40
|
-
@options[:logging] = true
|
41
|
-
end
|
42
|
-
end
|
41
|
+
Processing a KAF file:
|
43
42
|
|
44
|
-
|
45
|
-
force = false
|
46
|
-
resource_switcher.install(@options, force)
|
47
|
-
end
|
43
|
+
cat some_file.kaf | polarity-tagger
|
48
44
|
|
49
|
-
|
50
|
-
# @param [String] input
|
51
|
-
#
|
52
|
-
def run(input)
|
53
|
-
tagger = PolarityTagger.new(options)
|
45
|
+
Displaying the underlying kernel options:
|
54
46
|
|
55
|
-
|
47
|
+
polarity-tagger -- --help
|
56
48
|
|
57
|
-
|
58
|
-
end
|
49
|
+
EOF
|
59
50
|
|
60
|
-
|
51
|
+
separator "\nOptions:\n"
|
61
52
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
def show_help
|
66
|
-
abort option_parser.to_s
|
67
|
-
end
|
53
|
+
on :v, :version, 'Shows the current version' do
|
54
|
+
abort "polarity-tagger v#{VERSION} on #{RUBY_DESCRIPTION}"
|
55
|
+
end
|
68
56
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
57
|
+
run do |opts, args|
|
58
|
+
tagger = PolarityTagger.new(:args => args)
|
59
|
+
input = STDIN.tty? ? nil : STDIN.read
|
60
|
+
|
61
|
+
puts tagger.run(input)
|
62
|
+
end
|
63
|
+
end
|
74
64
|
end
|
75
|
-
|
76
|
-
|
77
|
-
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Opener
|
2
|
+
class PolarityTagger
|
3
|
+
##
|
4
|
+
# Ruby wrapper around the Python based polarity tagger.
|
5
|
+
#
|
6
|
+
# @!attribute [r] options
|
7
|
+
# @return [Hash]
|
8
|
+
#
|
9
|
+
# @!attribute [r] args
|
10
|
+
# @return [Array]
|
11
|
+
#
|
12
|
+
class External
|
13
|
+
|
14
|
+
attr_reader :options, :args
|
15
|
+
|
16
|
+
##
|
17
|
+
# @param [Hash] options
|
18
|
+
#
|
19
|
+
# @option options [Array] :args Collection of arbitrary arguments to pass
|
20
|
+
# to the underlying kernel.
|
21
|
+
#
|
22
|
+
# @option options [String] :resource_path Path to the lexicons to use.
|
23
|
+
#
|
24
|
+
def initialize options = {}
|
25
|
+
@args = options.delete(:args) || []
|
26
|
+
@options = options
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Returns a String containing the command to use for executing the kernel.
|
31
|
+
#
|
32
|
+
# @return [String]
|
33
|
+
#
|
34
|
+
def command
|
35
|
+
return "#{adjust_python_path} python -E #{kernel} #{lexicon_path} #{args.join(" ")}"
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# @return [String]
|
40
|
+
#
|
41
|
+
def lexicon_path
|
42
|
+
path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
|
43
|
+
ENV['POLARITY_LEXICON_PATH']
|
44
|
+
|
45
|
+
return path ? "--lexicon-path #{path}" : nil
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Processes the input and returns an Array containing the output of STDOUT,
|
50
|
+
# STDERR and an object containing process information.
|
51
|
+
#
|
52
|
+
# @param [String] input The text of which to detect the language.
|
53
|
+
# @return [Array]
|
54
|
+
#
|
55
|
+
def run(input)
|
56
|
+
stdout, stderr, process = capture(input)
|
57
|
+
|
58
|
+
raise stderr unless process.success?
|
59
|
+
puts stderr if ENV['DEBUG']
|
60
|
+
|
61
|
+
return stdout
|
62
|
+
end
|
63
|
+
|
64
|
+
protected
|
65
|
+
|
66
|
+
##
|
67
|
+
# @return [String]
|
68
|
+
#
|
69
|
+
def adjust_python_path
|
70
|
+
site_packages = File.join(core_dir, 'site-packages')
|
71
|
+
|
72
|
+
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# capture3 method doesn't work properly with Jruby, so
|
77
|
+
# this is a workaround
|
78
|
+
#
|
79
|
+
def capture(input)
|
80
|
+
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
81
|
+
out_reader = Thread.new { o.read }
|
82
|
+
err_reader = Thread.new { e.read }
|
83
|
+
i.write input
|
84
|
+
i.close
|
85
|
+
[out_reader.value, err_reader.value, t.value]
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
##
|
90
|
+
# @return [String]
|
91
|
+
#
|
92
|
+
def core_dir
|
93
|
+
File.expand_path '../../../../core', __FILE__
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# @return [String]
|
98
|
+
#
|
99
|
+
def kernel
|
100
|
+
File.join core_dir, 'poltagger-basic-multi.py'
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require_relative 'lexicons_cache'
|
2
|
+
require_relative 'lexicon_map'
|
3
|
+
require_relative 'kaf/document'
|
4
|
+
|
5
|
+
module Opener
|
6
|
+
class PolarityTagger
|
7
|
+
class Internal
|
8
|
+
|
9
|
+
DESC = 'VUA polarity tagger multilanguage'
|
10
|
+
LAST_EDITED = '21may2014'
|
11
|
+
VERSION = '1.2'
|
12
|
+
|
13
|
+
def initialize ignore_pos: false, **params
|
14
|
+
@cache = LexiconsCache.new
|
15
|
+
|
16
|
+
@ignore_pos = ignore_pos
|
17
|
+
end
|
18
|
+
|
19
|
+
def run input
|
20
|
+
@kaf = KAF::Document.from_xml input
|
21
|
+
@map = @kaf.map = @cache[@kaf.language]
|
22
|
+
|
23
|
+
negators = 0
|
24
|
+
@kaf.terms.each do |t|
|
25
|
+
lemma = t.lemma&.downcase
|
26
|
+
pos = if @ignore_pos then nil else t.pos end
|
27
|
+
attrs = Hashie::Mash.new
|
28
|
+
|
29
|
+
lexicon, polarity_pos = @map.by_polarity lemma, pos
|
30
|
+
|
31
|
+
if lexicon.polarity != 'unknown'
|
32
|
+
attrs.polarity = lexicon.polarity
|
33
|
+
end
|
34
|
+
if l = @map.by_negator(lemma)
|
35
|
+
negators += 1
|
36
|
+
lexicon, polarity = l, nil
|
37
|
+
attrs.sentiment_modifier = 'shifter'
|
38
|
+
end
|
39
|
+
if l = @map.by_intensifier(lemma)
|
40
|
+
lexicon, polarity = l, nil
|
41
|
+
attrs.sentiment_modifier = 'intensifier'
|
42
|
+
end
|
43
|
+
|
44
|
+
if attrs.size > 0
|
45
|
+
attrs.resource = lexicon.resource if lexicon.resource
|
46
|
+
t.setPolarity attrs, polarity_pos
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
@kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
|
51
|
+
|
52
|
+
@kaf.to_xml
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require_relative 'term'
|
2
|
+
|
3
|
+
module Opener
|
4
|
+
module KAF
|
5
|
+
class Document
|
6
|
+
|
7
|
+
attr_reader :document
|
8
|
+
attr_reader :lexicons
|
9
|
+
|
10
|
+
attr_accessor :map
|
11
|
+
|
12
|
+
def initialize xml
|
13
|
+
@document = xml
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.from_xml xml
|
17
|
+
new Nokogiri::XML xml
|
18
|
+
end
|
19
|
+
|
20
|
+
def language
|
21
|
+
@language ||= @document.at_xpath('KAF').attr 'xml:lang'
|
22
|
+
end
|
23
|
+
|
24
|
+
def terms
|
25
|
+
@terms ||= collection 'KAF/terms/term', Term
|
26
|
+
end
|
27
|
+
|
28
|
+
def add_linguistic_processor name, version, layer, timestamp: false
|
29
|
+
header = @document.at('kafHeader') || @document.root.add_child('<kafHeader/>')
|
30
|
+
procs = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer }
|
31
|
+
procs ||= header.add_child("<linguisticProcessors layer='#{layer}'/>")
|
32
|
+
lp = procs.add_child('<lp/>')
|
33
|
+
lp.attr(
|
34
|
+
timestamp: if timestamp then Time.now.iso8601 else '*' end,
|
35
|
+
version: version,
|
36
|
+
name: name,
|
37
|
+
)
|
38
|
+
lp
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_xml
|
42
|
+
@document.to_xml indent: 2
|
43
|
+
end
|
44
|
+
|
45
|
+
protected
|
46
|
+
|
47
|
+
def collection query, wrapper
|
48
|
+
@document.xpath(query).map{ |node| wrapper.new self, node }
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|