opener-polarity-tagger 2.4.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'opener/daemons'
4
- require 'opener/core'
5
4
 
6
5
  require_relative '../lib/opener/polarity_tagger'
7
6
 
8
- switcher = Opener::Core::ResourceSwitcher.new
9
- switcher_opts = {}
7
+ daemon = Opener::Daemons::Daemon.new(Opener::PolarityTagger)
10
8
 
11
- parser = Opener::Daemons::OptParser.new do |opts|
12
- switcher.bind(opts, switcher_opts)
13
- end
14
-
15
- options = parser.parse!(ARGV)
16
- daemon = Opener::Daemons::Daemon.new(Opener::PolarityTagger, options)
17
-
18
- switcher.install(switcher_opts)
19
9
  daemon.start
@@ -1,100 +1,27 @@
1
1
  require 'open3'
2
+ require 'opener/core'
3
+ require 'nokogiri'
4
+ require 'hashie'
2
5
 
3
6
  require_relative 'polarity_tagger/version'
4
7
  require_relative 'polarity_tagger/cli'
8
+ require_relative 'polarity_tagger/external'
9
+
10
+ require_relative 'polarity_tagger/internal'
5
11
 
6
12
  module Opener
7
- ##
8
- # Ruby wrapper around the Python based polarity tagger.
9
- #
10
- # @!attribute [r] options
11
- # @return [Hash]
12
- #
13
13
  class PolarityTagger
14
- attr_reader :options, :args
15
14
 
16
- ##
17
- # @param [Hash] options
18
- #
19
- # @option options [Array] :args Collection of arbitrary arguments to pass
20
- # to the underlying kernel.
21
- #
22
- def initialize(options = {})
15
+ def initialize options = {}
23
16
  @args = options.delete(:args) || []
24
17
  @options = options
18
+ @klass = if ENV['LEGACY'] then External else Internal end
19
+ @proc = @klass.new args: @args
25
20
  end
26
21
 
27
- ##
28
- # Returns a String containing the command to use for executing the kernel.
29
- #
30
- # @return [String]
31
- #
32
- def command
33
- return "#{adjust_python_path} python -E -OO #{kernel} #{lexicon_path} #{args.join(" ")}"
34
- end
35
-
36
- def lexicon_path
37
- if path = options[:resource_path]
38
- return "--lexicon-path #{path}"
39
- elsif path = ENV['POLARITY_LEXICON_PATH']
40
- return "--lexicon-path #{path}"
41
- else
42
- return nil
43
- end
44
- end
45
-
46
- ##
47
- # Processes the input and returns an Array containing the output of STDOUT,
48
- # STDERR and an object containing process information.
49
- #
50
- # @param [String] input The text of which to detect the language.
51
- # @return [Array]
52
- #
53
- def run(input)
54
- begin
55
- stdout, stderr, process = capture(input)
56
- raise stderr unless process.success?
57
- return stdout
58
- rescue Exception => error
59
- return Opener::Core::ErrorLayer.new(input, error.message, self.class).add
60
- end
61
- end
62
-
63
- protected
64
- ##
65
- # @return [String]
66
- #
67
- def adjust_python_path
68
- site_packages = File.join(core_dir, 'site-packages')
69
- "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
70
- end
71
-
72
- ##
73
- # capture3 method doesn't work properly with Jruby, so
74
- # this is a workaround
75
- #
76
- def capture(input)
77
- Open3.popen3(*command.split(" ")) {|i, o, e, t|
78
- out_reader = Thread.new { o.read }
79
- err_reader = Thread.new { e.read }
80
- i.write input
81
- i.close
82
- [out_reader.value, err_reader.value, t.value]
83
- }
22
+ def run input
23
+ @proc.run input
84
24
  end
85
25
 
86
- ##
87
- # @return [String]
88
- #
89
- def core_dir
90
- return File.expand_path('../../../core', __FILE__)
91
- end
92
-
93
- ##
94
- # @return [String]
95
- #
96
- def kernel
97
- return File.join(core_dir, 'poltagger-basic-multi.py')
98
- end
99
- end # PolarityTagger
100
- end # Opener
26
+ end
27
+ end
@@ -3,75 +3,66 @@ require 'opener/core'
3
3
  module Opener
4
4
  class PolarityTagger
5
5
  ##
6
- # CLI wrapper around {Opener::LanguageIdentifier} using OptionParser.
6
+ # CLI wrapper around {Opener::LanguageIdentifier} using Slop.
7
7
  #
8
- # @!attribute [r] options
9
- # @return [Hash]
10
- # @!attribute [r] option_parser
11
- # @return [OptionParser]
8
+ # @!attribute [r] parser
9
+ # @return [Slop]
12
10
  #
13
11
  class CLI
14
- attr_reader :options, :option_parser, :resource_switcher
12
+ attr_reader :parser
13
+
14
+ def initialize
15
+ @parser = configure_slop
16
+ end
15
17
 
16
18
  ##
17
- # @param [Hash] options
19
+ # @param [Array] argv
18
20
  #
19
- def initialize(options = {})
20
- @options = options
21
+ def run(argv = ARGV)
22
+ parser.parse(argv)
23
+ end
21
24
 
22
- @resource_switcher = Opener::Core::ResourceSwitcher.new
23
- component_options, options[:args] = Opener::Core::ArgvSplitter.split(options[:args])
25
+ ##
26
+ # @return [Slop]
27
+ #
28
+ def configure_slop
29
+ Slop.new strict: false, indent: 2, help: true do
30
+ banner 'Usage: polarity-tagger [OPTIONS] -- [PYTHON OPTIONS]'
24
31
 
25
- @option_parser = OptionParser.new do |opts|
26
- opts.program_name = 'polarity-tagger'
27
- opts.summary_indent = ' '
32
+ separator <<-EOF.chomp
28
33
 
29
- resource_switcher.bind(opts, @options)
34
+ About:
30
35
 
31
- opts.on('-h', '--help', 'Shows this help message') do
32
- show_help
33
- end
36
+ Component for tagging the polarity of elements in a KAF document. This
37
+ command reads input from STDIN.
34
38
 
35
- opts.on('-v', '--version', 'Shows the current version') do
36
- show_version
37
- end
39
+ Examples:
38
40
 
39
- opts.on('-l', '--log', 'Enable logging to STDERR') do
40
- @options[:logging] = true
41
- end
42
- end
41
+ Processing a KAF file:
43
42
 
44
- option_parser.parse!(component_options)
45
- force = false
46
- resource_switcher.install(@options, force)
47
- end
43
+ cat some_file.kaf | polarity-tagger
48
44
 
49
- ##
50
- # @param [String] input
51
- #
52
- def run(input)
53
- tagger = PolarityTagger.new(options)
45
+ Displaying the underlying kernel options:
54
46
 
55
- stdout, stderr, process = tagger.run(input)
47
+ polarity-tagger -- --help
56
48
 
57
- puts stdout
58
- end
49
+ EOF
59
50
 
60
- private
51
+ separator "\nOptions:\n"
61
52
 
62
- ##
63
- # Shows the help message and exits the program.
64
- #
65
- def show_help
66
- abort option_parser.to_s
67
- end
53
+ on :v, :version, 'Shows the current version' do
54
+ abort "polarity-tagger v#{VERSION} on #{RUBY_DESCRIPTION}"
55
+ end
68
56
 
69
- ##
70
- # Shows the version and exits the program.
71
- #
72
- def show_version
73
- abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
57
+ run do |opts, args|
58
+ tagger = PolarityTagger.new(:args => args)
59
+ input = STDIN.tty? ? nil : STDIN.read
60
+
61
+ puts tagger.run(input)
62
+ end
63
+ end
74
64
  end
75
- end # CLI
76
- end # PolarityTagger
77
- end # Opener
65
+
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,105 @@
1
+ module Opener
2
+ class PolarityTagger
3
+ ##
4
+ # Ruby wrapper around the Python based polarity tagger.
5
+ #
6
+ # @!attribute [r] options
7
+ # @return [Hash]
8
+ #
9
+ # @!attribute [r] args
10
+ # @return [Array]
11
+ #
12
+ class External
13
+
14
+ attr_reader :options, :args
15
+
16
+ ##
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [Array] :args Collection of arbitrary arguments to pass
20
+ # to the underlying kernel.
21
+ #
22
+ # @option options [String] :resource_path Path to the lexicons to use.
23
+ #
24
+ def initialize options = {}
25
+ @args = options.delete(:args) || []
26
+ @options = options
27
+ end
28
+
29
+ ##
30
+ # Returns a String containing the command to use for executing the kernel.
31
+ #
32
+ # @return [String]
33
+ #
34
+ def command
35
+ return "#{adjust_python_path} python -E #{kernel} #{lexicon_path} #{args.join(" ")}"
36
+ end
37
+
38
+ ##
39
+ # @return [String]
40
+ #
41
+ def lexicon_path
42
+ path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
43
+ ENV['POLARITY_LEXICON_PATH']
44
+
45
+ return path ? "--lexicon-path #{path}" : nil
46
+ end
47
+
48
+ ##
49
+ # Processes the input and returns an Array containing the output of STDOUT,
50
+ # STDERR and an object containing process information.
51
+ #
52
+ # @param [String] input The text of which to detect the language.
53
+ # @return [Array]
54
+ #
55
+ def run(input)
56
+ stdout, stderr, process = capture(input)
57
+
58
+ raise stderr unless process.success?
59
+ puts stderr if ENV['DEBUG']
60
+
61
+ return stdout
62
+ end
63
+
64
+ protected
65
+
66
+ ##
67
+ # @return [String]
68
+ #
69
+ def adjust_python_path
70
+ site_packages = File.join(core_dir, 'site-packages')
71
+
72
+ "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
73
+ end
74
+
75
+ ##
76
+ # capture3 method doesn't work properly with Jruby, so
77
+ # this is a workaround
78
+ #
79
+ def capture(input)
80
+ Open3.popen3(*command.split(" ")) {|i, o, e, t|
81
+ out_reader = Thread.new { o.read }
82
+ err_reader = Thread.new { e.read }
83
+ i.write input
84
+ i.close
85
+ [out_reader.value, err_reader.value, t.value]
86
+ }
87
+ end
88
+
89
+ ##
90
+ # @return [String]
91
+ #
92
+ def core_dir
93
+ File.expand_path '../../../../core', __FILE__
94
+ end
95
+
96
+ ##
97
+ # @return [String]
98
+ #
99
+ def kernel
100
+ File.join core_dir, 'poltagger-basic-multi.py'
101
+ end
102
+
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,57 @@
1
+ require_relative 'lexicons_cache'
2
+ require_relative 'lexicon_map'
3
+ require_relative 'kaf/document'
4
+
5
+ module Opener
6
+ class PolarityTagger
7
+ class Internal
8
+
9
+ DESC = 'VUA polarity tagger multilanguage'
10
+ LAST_EDITED = '21may2014'
11
+ VERSION = '1.2'
12
+
13
+ def initialize ignore_pos: false, **params
14
+ @cache = LexiconsCache.new
15
+
16
+ @ignore_pos = ignore_pos
17
+ end
18
+
19
+ def run input
20
+ @kaf = KAF::Document.from_xml input
21
+ @map = @kaf.map = @cache[@kaf.language]
22
+
23
+ negators = 0
24
+ @kaf.terms.each do |t|
25
+ lemma = t.lemma&.downcase
26
+ pos = if @ignore_pos then nil else t.pos end
27
+ attrs = Hashie::Mash.new
28
+
29
+ lexicon, polarity_pos = @map.by_polarity lemma, pos
30
+
31
+ if lexicon.polarity != 'unknown'
32
+ attrs.polarity = lexicon.polarity
33
+ end
34
+ if l = @map.by_negator(lemma)
35
+ negators += 1
36
+ lexicon, polarity = l, nil
37
+ attrs.sentiment_modifier = 'shifter'
38
+ end
39
+ if l = @map.by_intensifier(lemma)
40
+ lexicon, polarity = l, nil
41
+ attrs.sentiment_modifier = 'intensifier'
42
+ end
43
+
44
+ if attrs.size > 0
45
+ attrs.resource = lexicon.resource if lexicon.resource
46
+ t.setPolarity attrs, polarity_pos
47
+ end
48
+ end
49
+
50
+ @kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
51
+
52
+ @kaf.to_xml
53
+ end
54
+
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,53 @@
1
+ require_relative 'term'
2
+
3
+ module Opener
4
+ module KAF
5
+ class Document
6
+
7
+ attr_reader :document
8
+ attr_reader :lexicons
9
+
10
+ attr_accessor :map
11
+
12
+ def initialize xml
13
+ @document = xml
14
+ end
15
+
16
+ def self.from_xml xml
17
+ new Nokogiri::XML xml
18
+ end
19
+
20
+ def language
21
+ @language ||= @document.at_xpath('KAF').attr 'xml:lang'
22
+ end
23
+
24
+ def terms
25
+ @terms ||= collection 'KAF/terms/term', Term
26
+ end
27
+
28
+ def add_linguistic_processor name, version, layer, timestamp: false
29
+ header = @document.at('kafHeader') || @document.root.add_child('<kafHeader/>')
30
+ procs = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer }
31
+ procs ||= header.add_child("<linguisticProcessors layer='#{layer}'/>")
32
+ lp = procs.add_child('<lp/>')
33
+ lp.attr(
34
+ timestamp: if timestamp then Time.now.iso8601 else '*' end,
35
+ version: version,
36
+ name: name,
37
+ )
38
+ lp
39
+ end
40
+
41
+ def to_xml
42
+ @document.to_xml indent: 2
43
+ end
44
+
45
+ protected
46
+
47
+ def collection query, wrapper
48
+ @document.xpath(query).map{ |node| wrapper.new self, node }
49
+ end
50
+
51
+ end
52
+ end
53
+ end