opener-polarity-tagger 2.4.1 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,19 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'opener/daemons'
4
- require 'opener/core'
5
4
 
6
5
  require_relative '../lib/opener/polarity_tagger'
7
6
 
8
- switcher = Opener::Core::ResourceSwitcher.new
9
- switcher_opts = {}
7
+ daemon = Opener::Daemons::Daemon.new(Opener::PolarityTagger)
10
8
 
11
- parser = Opener::Daemons::OptParser.new do |opts|
12
- switcher.bind(opts, switcher_opts)
13
- end
14
-
15
- options = parser.parse!(ARGV)
16
- daemon = Opener::Daemons::Daemon.new(Opener::PolarityTagger, options)
17
-
18
- switcher.install(switcher_opts)
19
9
  daemon.start
@@ -1,100 +1,27 @@
1
1
  require 'open3'
2
+ require 'opener/core'
3
+ require 'nokogiri'
4
+ require 'hashie'
2
5
 
3
6
  require_relative 'polarity_tagger/version'
4
7
  require_relative 'polarity_tagger/cli'
8
+ require_relative 'polarity_tagger/external'
9
+
10
+ require_relative 'polarity_tagger/internal'
5
11
 
6
12
  module Opener
7
- ##
8
- # Ruby wrapper around the Python based polarity tagger.
9
- #
10
- # @!attribute [r] options
11
- # @return [Hash]
12
- #
13
13
  class PolarityTagger
14
- attr_reader :options, :args
15
14
 
16
- ##
17
- # @param [Hash] options
18
- #
19
- # @option options [Array] :args Collection of arbitrary arguments to pass
20
- # to the underlying kernel.
21
- #
22
- def initialize(options = {})
15
+ def initialize options = {}
23
16
  @args = options.delete(:args) || []
24
17
  @options = options
18
+ @klass = if ENV['LEGACY'] then External else Internal end
19
+ @proc = @klass.new args: @args
25
20
  end
26
21
 
27
- ##
28
- # Returns a String containing the command to use for executing the kernel.
29
- #
30
- # @return [String]
31
- #
32
- def command
33
- return "#{adjust_python_path} python -E -OO #{kernel} #{lexicon_path} #{args.join(" ")}"
34
- end
35
-
36
- def lexicon_path
37
- if path = options[:resource_path]
38
- return "--lexicon-path #{path}"
39
- elsif path = ENV['POLARITY_LEXICON_PATH']
40
- return "--lexicon-path #{path}"
41
- else
42
- return nil
43
- end
44
- end
45
-
46
- ##
47
- # Processes the input and returns an Array containing the output of STDOUT,
48
- # STDERR and an object containing process information.
49
- #
50
- # @param [String] input The text of which to detect the language.
51
- # @return [Array]
52
- #
53
- def run(input)
54
- begin
55
- stdout, stderr, process = capture(input)
56
- raise stderr unless process.success?
57
- return stdout
58
- rescue Exception => error
59
- return Opener::Core::ErrorLayer.new(input, error.message, self.class).add
60
- end
61
- end
62
-
63
- protected
64
- ##
65
- # @return [String]
66
- #
67
- def adjust_python_path
68
- site_packages = File.join(core_dir, 'site-packages')
69
- "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
70
- end
71
-
72
- ##
73
- # capture3 method doesn't work properly with Jruby, so
74
- # this is a workaround
75
- #
76
- def capture(input)
77
- Open3.popen3(*command.split(" ")) {|i, o, e, t|
78
- out_reader = Thread.new { o.read }
79
- err_reader = Thread.new { e.read }
80
- i.write input
81
- i.close
82
- [out_reader.value, err_reader.value, t.value]
83
- }
22
+ def run input
23
+ @proc.run input
84
24
  end
85
25
 
86
- ##
87
- # @return [String]
88
- #
89
- def core_dir
90
- return File.expand_path('../../../core', __FILE__)
91
- end
92
-
93
- ##
94
- # @return [String]
95
- #
96
- def kernel
97
- return File.join(core_dir, 'poltagger-basic-multi.py')
98
- end
99
- end # PolarityTagger
100
- end # Opener
26
+ end
27
+ end
@@ -3,75 +3,66 @@ require 'opener/core'
3
3
  module Opener
4
4
  class PolarityTagger
5
5
  ##
6
- # CLI wrapper around {Opener::LanguageIdentifier} using OptionParser.
6
+ # CLI wrapper around {Opener::LanguageIdentifier} using Slop.
7
7
  #
8
- # @!attribute [r] options
9
- # @return [Hash]
10
- # @!attribute [r] option_parser
11
- # @return [OptionParser]
8
+ # @!attribute [r] parser
9
+ # @return [Slop]
12
10
  #
13
11
  class CLI
14
- attr_reader :options, :option_parser, :resource_switcher
12
+ attr_reader :parser
13
+
14
+ def initialize
15
+ @parser = configure_slop
16
+ end
15
17
 
16
18
  ##
17
- # @param [Hash] options
19
+ # @param [Array] argv
18
20
  #
19
- def initialize(options = {})
20
- @options = options
21
+ def run(argv = ARGV)
22
+ parser.parse(argv)
23
+ end
21
24
 
22
- @resource_switcher = Opener::Core::ResourceSwitcher.new
23
- component_options, options[:args] = Opener::Core::ArgvSplitter.split(options[:args])
25
+ ##
26
+ # @return [Slop]
27
+ #
28
+ def configure_slop
29
+ Slop.new strict: false, indent: 2, help: true do
30
+ banner 'Usage: polarity-tagger [OPTIONS] -- [PYTHON OPTIONS]'
24
31
 
25
- @option_parser = OptionParser.new do |opts|
26
- opts.program_name = 'polarity-tagger'
27
- opts.summary_indent = ' '
32
+ separator <<-EOF.chomp
28
33
 
29
- resource_switcher.bind(opts, @options)
34
+ About:
30
35
 
31
- opts.on('-h', '--help', 'Shows this help message') do
32
- show_help
33
- end
36
+ Component for tagging the polarity of elements in a KAF document. This
37
+ command reads input from STDIN.
34
38
 
35
- opts.on('-v', '--version', 'Shows the current version') do
36
- show_version
37
- end
39
+ Examples:
38
40
 
39
- opts.on('-l', '--log', 'Enable logging to STDERR') do
40
- @options[:logging] = true
41
- end
42
- end
41
+ Processing a KAF file:
43
42
 
44
- option_parser.parse!(component_options)
45
- force = false
46
- resource_switcher.install(@options, force)
47
- end
43
+ cat some_file.kaf | polarity-tagger
48
44
 
49
- ##
50
- # @param [String] input
51
- #
52
- def run(input)
53
- tagger = PolarityTagger.new(options)
45
+ Displaying the underlying kernel options:
54
46
 
55
- stdout, stderr, process = tagger.run(input)
47
+ polarity-tagger -- --help
56
48
 
57
- puts stdout
58
- end
49
+ EOF
59
50
 
60
- private
51
+ separator "\nOptions:\n"
61
52
 
62
- ##
63
- # Shows the help message and exits the program.
64
- #
65
- def show_help
66
- abort option_parser.to_s
67
- end
53
+ on :v, :version, 'Shows the current version' do
54
+ abort "polarity-tagger v#{VERSION} on #{RUBY_DESCRIPTION}"
55
+ end
68
56
 
69
- ##
70
- # Shows the version and exits the program.
71
- #
72
- def show_version
73
- abort "#{option_parser.program_name} v#{VERSION} on #{RUBY_DESCRIPTION}"
57
+ run do |opts, args|
58
+ tagger = PolarityTagger.new(:args => args)
59
+ input = STDIN.tty? ? nil : STDIN.read
60
+
61
+ puts tagger.run(input)
62
+ end
63
+ end
74
64
  end
75
- end # CLI
76
- end # PolarityTagger
77
- end # Opener
65
+
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,105 @@
1
+ module Opener
2
+ class PolarityTagger
3
+ ##
4
+ # Ruby wrapper around the Python based polarity tagger.
5
+ #
6
+ # @!attribute [r] options
7
+ # @return [Hash]
8
+ #
9
+ # @!attribute [r] args
10
+ # @return [Array]
11
+ #
12
+ class External
13
+
14
+ attr_reader :options, :args
15
+
16
+ ##
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [Array] :args Collection of arbitrary arguments to pass
20
+ # to the underlying kernel.
21
+ #
22
+ # @option options [String] :resource_path Path to the lexicons to use.
23
+ #
24
+ def initialize options = {}
25
+ @args = options.delete(:args) || []
26
+ @options = options
27
+ end
28
+
29
+ ##
30
+ # Returns a String containing the command to use for executing the kernel.
31
+ #
32
+ # @return [String]
33
+ #
34
+ def command
35
+ return "#{adjust_python_path} python -E #{kernel} #{lexicon_path} #{args.join(" ")}"
36
+ end
37
+
38
+ ##
39
+ # @return [String]
40
+ #
41
+ def lexicon_path
42
+ path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
43
+ ENV['POLARITY_LEXICON_PATH']
44
+
45
+ return path ? "--lexicon-path #{path}" : nil
46
+ end
47
+
48
+ ##
49
+ # Processes the input and returns an Array containing the output of STDOUT,
50
+ # STDERR and an object containing process information.
51
+ #
52
+ # @param [String] input The text of which to detect the language.
53
+ # @return [Array]
54
+ #
55
+ def run(input)
56
+ stdout, stderr, process = capture(input)
57
+
58
+ raise stderr unless process.success?
59
+ puts stderr if ENV['DEBUG']
60
+
61
+ return stdout
62
+ end
63
+
64
+ protected
65
+
66
+ ##
67
+ # @return [String]
68
+ #
69
+ def adjust_python_path
70
+ site_packages = File.join(core_dir, 'site-packages')
71
+
72
+ "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
73
+ end
74
+
75
+ ##
76
+ # capture3 method doesn't work properly with Jruby, so
77
+ # this is a workaround
78
+ #
79
+ def capture(input)
80
+ Open3.popen3(*command.split(" ")) {|i, o, e, t|
81
+ out_reader = Thread.new { o.read }
82
+ err_reader = Thread.new { e.read }
83
+ i.write input
84
+ i.close
85
+ [out_reader.value, err_reader.value, t.value]
86
+ }
87
+ end
88
+
89
+ ##
90
+ # @return [String]
91
+ #
92
+ def core_dir
93
+ File.expand_path '../../../../core', __FILE__
94
+ end
95
+
96
+ ##
97
+ # @return [String]
98
+ #
99
+ def kernel
100
+ File.join core_dir, 'poltagger-basic-multi.py'
101
+ end
102
+
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,57 @@
1
+ require_relative 'lexicons_cache'
2
+ require_relative 'lexicon_map'
3
+ require_relative 'kaf/document'
4
+
5
+ module Opener
6
+ class PolarityTagger
7
+ class Internal
8
+
9
+ DESC = 'VUA polarity tagger multilanguage'
10
+ LAST_EDITED = '21may2014'
11
+ VERSION = '1.2'
12
+
13
+ def initialize ignore_pos: false, **params
14
+ @cache = LexiconsCache.new
15
+
16
+ @ignore_pos = ignore_pos
17
+ end
18
+
19
+ def run input
20
+ @kaf = KAF::Document.from_xml input
21
+ @map = @kaf.map = @cache[@kaf.language]
22
+
23
+ negators = 0
24
+ @kaf.terms.each do |t|
25
+ lemma = t.lemma&.downcase
26
+ pos = if @ignore_pos then nil else t.pos end
27
+ attrs = Hashie::Mash.new
28
+
29
+ lexicon, polarity_pos = @map.by_polarity lemma, pos
30
+
31
+ if lexicon.polarity != 'unknown'
32
+ attrs.polarity = lexicon.polarity
33
+ end
34
+ if l = @map.by_negator(lemma)
35
+ negators += 1
36
+ lexicon, polarity = l, nil
37
+ attrs.sentiment_modifier = 'shifter'
38
+ end
39
+ if l = @map.by_intensifier(lemma)
40
+ lexicon, polarity = l, nil
41
+ attrs.sentiment_modifier = 'intensifier'
42
+ end
43
+
44
+ if attrs.size > 0
45
+ attrs.resource = lexicon.resource if lexicon.resource
46
+ t.setPolarity attrs, polarity_pos
47
+ end
48
+ end
49
+
50
+ @kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
51
+
52
+ @kaf.to_xml
53
+ end
54
+
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,53 @@
1
+ require_relative 'term'
2
+
3
+ module Opener
4
+ module KAF
5
+ class Document
6
+
7
+ attr_reader :document
8
+ attr_reader :lexicons
9
+
10
+ attr_accessor :map
11
+
12
+ def initialize xml
13
+ @document = xml
14
+ end
15
+
16
+ def self.from_xml xml
17
+ new Nokogiri::XML xml
18
+ end
19
+
20
+ def language
21
+ @language ||= @document.at_xpath('KAF').attr 'xml:lang'
22
+ end
23
+
24
+ def terms
25
+ @terms ||= collection 'KAF/terms/term', Term
26
+ end
27
+
28
+ def add_linguistic_processor name, version, layer, timestamp: false
29
+ header = @document.at('kafHeader') || @document.root.add_child('<kafHeader/>')
30
+ procs = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer }
31
+ procs ||= header.add_child("<linguisticProcessors layer='#{layer}'/>")
32
+ lp = procs.add_child('<lp/>')
33
+ lp.attr(
34
+ timestamp: if timestamp then Time.now.iso8601 else '*' end,
35
+ version: version,
36
+ name: name,
37
+ )
38
+ lp
39
+ end
40
+
41
+ def to_xml
42
+ @document.to_xml indent: 2
43
+ end
44
+
45
+ protected
46
+
47
+ def collection query, wrapper
48
+ @document.xpath(query).map{ |node| wrapper.new self, node }
49
+ end
50
+
51
+ end
52
+ end
53
+ end