opener-polarity-tagger 2.5.0 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'opener/daemons'
4
- require 'opener/core'
5
4
 
6
5
  require_relative '../lib/opener/polarity_tagger'
7
6
 
8
- switcher = Opener::Core::ResourceSwitcher.new
9
- switcher_opts = {}
7
+ daemon = Opener::Daemons::Daemon.new(Opener::PolarityTagger)
10
8
 
11
- parser = Opener::Daemons::OptParser.new do |opts|
12
- switcher.bind(opts, switcher_opts)
13
- end
14
-
15
- options = parser.parse!(ARGV)
16
- daemon = Opener::Daemons::Daemon.new(Opener::PolarityTagger, options)
17
-
18
- switcher.install(switcher_opts)
19
9
  daemon.start
@@ -1,101 +1,27 @@
1
1
  require 'open3'
2
2
  require 'opener/core'
3
+ require 'nokogiri'
4
+ require 'hashie'
3
5
 
4
6
  require_relative 'polarity_tagger/version'
5
7
  require_relative 'polarity_tagger/cli'
8
+ require_relative 'polarity_tagger/external'
9
+
10
+ require_relative 'polarity_tagger/internal'
6
11
 
7
12
  module Opener
8
- ##
9
- # Ruby wrapper around the Python based polarity tagger.
10
- #
11
- # @!attribute [r] options
12
- # @return [Hash]
13
- #
14
13
  class PolarityTagger
15
- attr_reader :options, :args
16
14
 
17
- ##
18
- # @param [Hash] options
19
- #
20
- # @option options [Array] :args Collection of arbitrary arguments to pass
21
- # to the underlying kernel.
22
- #
23
- def initialize(options = {})
15
+ def initialize options = {}
24
16
  @args = options.delete(:args) || []
25
17
  @options = options
18
+ @klass = if ENV['LEGACY'] then External else Internal end
19
+ @proc = @klass.new args: @args
26
20
  end
27
21
 
28
- ##
29
- # Returns a String containing the command to use for executing the kernel.
30
- #
31
- # @return [String]
32
- #
33
- def command
34
- return "#{adjust_python_path} python -E #{kernel} #{lexicon_path} #{args.join(" ")}"
35
- end
36
-
37
- def lexicon_path
38
- if path = options[:resource_path]
39
- return "--lexicon-path #{path}"
40
- elsif path = ENV['POLARITY_LEXICON_PATH']
41
- return "--lexicon-path #{path}"
42
- else
43
- return nil
44
- end
45
- end
46
-
47
- ##
48
- # Processes the input and returns an Array containing the output of STDOUT,
49
- # STDERR and an object containing process information.
50
- #
51
- # @param [String] input The text of which to detect the language.
52
- # @return [Array]
53
- #
54
- def run(input)
55
- begin
56
- stdout, stderr, process = capture(input)
57
- raise stderr unless process.success?
58
- return stdout
59
- rescue Exception => error
60
- return Opener::Core::ErrorLayer.new(input, error.message, self.class).add
61
- end
62
- end
63
-
64
- protected
65
- ##
66
- # @return [String]
67
- #
68
- def adjust_python_path
69
- site_packages = File.join(core_dir, 'site-packages')
70
- "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
71
- end
72
-
73
- ##
74
- # capture3 method doesn't work properly with Jruby, so
75
- # this is a workaround
76
- #
77
- def capture(input)
78
- Open3.popen3(*command.split(" ")) {|i, o, e, t|
79
- out_reader = Thread.new { o.read }
80
- err_reader = Thread.new { e.read }
81
- i.write input
82
- i.close
83
- [out_reader.value, err_reader.value, t.value]
84
- }
22
+ def run input
23
+ @proc.run input
85
24
  end
86
25
 
87
- ##
88
- # @return [String]
89
- #
90
- def core_dir
91
- return File.expand_path('../../../core', __FILE__)
92
- end
93
-
94
- ##
95
- # @return [String]
96
- #
97
- def kernel
98
- return File.join(core_dir, 'poltagger-basic-multi.py')
99
- end
100
- end # PolarityTagger
101
- end # Opener
26
+ end
27
+ end
@@ -3,75 +3,66 @@ require 'opener/core'
3
3
  module Opener
4
4
  class PolarityTagger
5
5
  ##
6
- # CLI wrapper around {Opener::LanguageIdentifier} using OptionParser.
6
+ # CLI wrapper around {Opener::LanguageIdentifier} using Slop.
7
7
  #
8
- # @!attribute [r] options
9
- # @return [Hash]
10
- # @!attribute [r] option_parser
11
- # @return [OptionParser]
8
+ # @!attribute [r] parser
9
+ # @return [Slop]
12
10
  #
13
11
  class CLI
14
- attr_reader :options, :option_parser, :resource_switcher
12
+ attr_reader :parser
13
+
14
+ def initialize
15
+ @parser = configure_slop
16
+ end
15
17
 
16
18
  ##
17
- # @param [Hash] options
19
+ # @param [Array] argv
18
20
  #
19
- def initialize(options = {})
20
- @options = options
21
+ def run(argv = ARGV)
22
+ parser.parse(argv)
23
+ end
21
24
 
22
- @resource_switcher = Opener::Core::ResourceSwitcher.new
23
- component_options, options[:args] = Opener::Core::ArgvSplitter.split(options[:args])
25
+ ##
26
+ # @return [Slop]
27
+ #
28
+ def configure_slop
29
+ Slop.new strict: false, indent: 2, help: true do
30
+ banner 'Usage: polarity-tagger [OPTIONS] -- [PYTHON OPTIONS]'
24
31
 
25
- @option_parser = OptionParser.new do |opts|
26
- opts.program_name = 'polarity-tagger'
27
- opts.summary_indent = ' '
32
+ separator <<-EOF.chomp
28
33
 
29
- resource_switcher.bind(opts, @options)
34
+ About:
30
35
 
31
- opts.on('-h', '--help', 'Shows this help message') do
32
- show_help
33
- end
36
+ Component for tagging the polarity of elements in a KAF document. This
37
+ command reads input from STDIN.
34
38
 
35
- opts.on('-v', '--version', 'Shows the current version') do
36
- show_version
37
- end
39
+ Examples:
38
40
 
39
- opts.on('-l', '--log', 'Enable logging to STDERR') do
40
- @options[:logging] = true
41
- end
42
- end
41
+ Processing a KAF file:
43
42
 
44
- option_parser.parse!(component_options)
45
- force = false
46
- resource_switcher.install(@options, force)
47
- end
43
+ cat some_file.kaf | polarity-tagger
48
44
 
49
- ##
50
- # @param [String] input
51
- #
52
- def run(input)
53
- tagger = PolarityTagger.new(options)
45
+ Displaying the underlying kernel options:
54
46
 
55
- stdout, stderr, process = tagger.run(input)
47
+ polarity-tagger -- --help
56
48
 
57
- puts stdout
58
- end
49
+ EOF
59
50
 
60
- private
51
+ separator "\nOptions:\n"
61
52
 
62
- ##
63
- # Shows the help message and exits the program.
64
- #
65
- def show_help
66
- abort option_parser.to_s
67
- end
53
+ on :v, :version, 'Shows the current version' do
54
+ abort "polarity-tagger v#{VERSION} on #{RUBY_DESCRIPTION}"
55
+ end
68
56
 
69
- ##
70
- # Shows the version and exits the program.
71
- #
72
- def show_version
73
- abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
57
+ run do |opts, args|
58
+ tagger = PolarityTagger.new(:args => args)
59
+ input = STDIN.tty? ? nil : STDIN.read
60
+
61
+ puts tagger.run(input)
62
+ end
63
+ end
74
64
  end
75
- end # CLI
76
- end # PolarityTagger
77
- end # Opener
65
+
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,105 @@
1
+ module Opener
2
+ class PolarityTagger
3
+ ##
4
+ # Ruby wrapper around the Python based polarity tagger.
5
+ #
6
+ # @!attribute [r] options
7
+ # @return [Hash]
8
+ #
9
+ # @!attribute [r] args
10
+ # @return [Array]
11
+ #
12
+ class External
13
+
14
+ attr_reader :options, :args
15
+
16
+ ##
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [Array] :args Collection of arbitrary arguments to pass
20
+ # to the underlying kernel.
21
+ #
22
+ # @option options [String] :resource_path Path to the lexicons to use.
23
+ #
24
+ def initialize options = {}
25
+ @args = options.delete(:args) || []
26
+ @options = options
27
+ end
28
+
29
+ ##
30
+ # Returns a String containing the command to use for executing the kernel.
31
+ #
32
+ # @return [String]
33
+ #
34
+ def command
35
+ return "#{adjust_python_path} python -E #{kernel} #{lexicon_path} #{args.join(" ")}"
36
+ end
37
+
38
+ ##
39
+ # @return [String]
40
+ #
41
+ def lexicon_path
42
+ path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
43
+ ENV['POLARITY_LEXICON_PATH']
44
+
45
+ return path ? "--lexicon-path #{path}" : nil
46
+ end
47
+
48
+ ##
49
+ # Processes the input and returns an Array containing the output of STDOUT,
50
+ # STDERR and an object containing process information.
51
+ #
52
+ # @param [String] input The text of which to detect the language.
53
+ # @return [Array]
54
+ #
55
+ def run(input)
56
+ stdout, stderr, process = capture(input)
57
+
58
+ raise stderr unless process.success?
59
+ puts stderr if ENV['DEBUG']
60
+
61
+ return stdout
62
+ end
63
+
64
+ protected
65
+
66
+ ##
67
+ # @return [String]
68
+ #
69
+ def adjust_python_path
70
+ site_packages = File.join(core_dir, 'site-packages')
71
+
72
+ "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
73
+ end
74
+
75
+ ##
76
+ # capture3 method doesn't work properly with Jruby, so
77
+ # this is a workaround
78
+ #
79
+ def capture(input)
80
+ Open3.popen3(*command.split(" ")) {|i, o, e, t|
81
+ out_reader = Thread.new { o.read }
82
+ err_reader = Thread.new { e.read }
83
+ i.write input
84
+ i.close
85
+ [out_reader.value, err_reader.value, t.value]
86
+ }
87
+ end
88
+
89
+ ##
90
+ # @return [String]
91
+ #
92
+ def core_dir
93
+ File.expand_path '../../../../core', __FILE__
94
+ end
95
+
96
+ ##
97
+ # @return [String]
98
+ #
99
+ def kernel
100
+ File.join core_dir, 'poltagger-basic-multi.py'
101
+ end
102
+
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,57 @@
1
+ require_relative 'lexicons_cache'
2
+ require_relative 'lexicon_map'
3
+ require_relative 'kaf/document'
4
+
5
+ module Opener
6
+ class PolarityTagger
7
+ class Internal
8
+
9
+ DESC = 'VUA polarity tagger multilanguage'
10
+ LAST_EDITED = '21may2014'
11
+ VERSION = '1.2'
12
+
13
+ CACHE = LexiconsCache.new
14
+
15
+ def initialize ignore_pos: false, **params
16
+ @ignore_pos = ignore_pos
17
+ end
18
+
19
+ def run input
20
+ @kaf = KAF::Document.from_xml input
21
+ @map = @kaf.map = CACHE[@kaf.language]
22
+
23
+ negators = 0
24
+ @kaf.terms.each do |t|
25
+ lemma = t.lemma&.downcase
26
+ pos = if @ignore_pos then nil else t.pos end
27
+ attrs = Hashie::Mash.new
28
+
29
+ lexicon, polarity_pos = @map.by_polarity lemma, pos
30
+
31
+ if lexicon.polarity != 'unknown'
32
+ attrs.polarity = lexicon.polarity
33
+ end
34
+ if l = @map.by_negator(lemma)
35
+ negators += 1
36
+ lexicon, polarity_pos = l, nil
37
+ attrs.sentiment_modifier = 'shifter'
38
+ end
39
+ if l = @map.by_intensifier(lemma)
40
+ lexicon, polarity_pos = l, nil
41
+ attrs.sentiment_modifier = 'intensifier'
42
+ end
43
+
44
+ if attrs.size > 0
45
+ attrs.resource = lexicon.resource if lexicon.resource
46
+ t.setPolarity attrs, polarity_pos
47
+ end
48
+ end
49
+
50
+ @kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
51
+
52
+ @kaf.to_xml
53
+ end
54
+
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,53 @@
1
+ require_relative 'term'
2
+
3
+ module Opener
4
+ module KAF
5
+ class Document
6
+
7
+ attr_reader :document
8
+ attr_reader :lexicons
9
+
10
+ attr_accessor :map
11
+
12
+ def initialize xml
13
+ @document = xml
14
+ end
15
+
16
+ def self.from_xml xml
17
+ new Nokogiri::XML xml
18
+ end
19
+
20
+ def language
21
+ @language ||= @document.at_xpath('KAF').attr 'xml:lang'
22
+ end
23
+
24
+ def terms
25
+ @terms ||= collection 'KAF/terms/term', Term
26
+ end
27
+
28
+ def add_linguistic_processor name, version, layer, timestamp: false
29
+ header = @document.at('kafHeader') || @document.root.add_child('<kafHeader/>')
30
+ procs = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer }
31
+ procs ||= header.add_child("<linguisticProcessors layer='#{layer}'/>")
32
+ lp = procs.add_child('<lp/>')
33
+ lp.attr(
34
+ timestamp: if timestamp then Time.now.iso8601 else '*' end,
35
+ version: version,
36
+ name: name,
37
+ )
38
+ lp
39
+ end
40
+
41
+ def to_xml
42
+ @document.to_xml indent: 2
43
+ end
44
+
45
+ protected
46
+
47
+ def collection query, wrapper
48
+ @document.xpath(query).map{ |node| wrapper.new self, node }
49
+ end
50
+
51
+ end
52
+ end
53
+ end