RubyGems - opener-opinion-detector-basic - Versions diffs - 3.0.1 → 3.0.2 - Mend

opener-opinion-detector-basic 3.0.1 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/exec/opinion-detector-basic.rb +38 -0
data/lib/opener/opinion_detector_basic.rb +3 -4
data/lib/opener/opinion_detector_basic/cli.rb +9 -2
data/lib/opener/opinion_detector_basic/opinion.rb +32 -33
data/lib/opener/opinion_detector_basic/processor.rb +42 -45
data/lib/opener/opinion_detector_basic/term.rb +39 -40
data/lib/opener/opinion_detector_basic/version.rb +1 -1
metadata +4 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3345970ea78e554cf48e14b5293043bc5548d4b3
-  data.tar.gz: 4a1628365168b79760beb9c01d052b81b13c4bc2
+  metadata.gz: fa83717460841775dcf28e9f640208bd16e2cc5d
+  data.tar.gz: 8c8f56b11a38b57c07945bffd666b646ef041f80
 SHA512:
-  metadata.gz: b946c3cce88a034b2ed5952377004f2b5e2122f0d54c9996aa8d71bb6840d752f2e67ce0a059a346c5efcab3072c799a1c09fdc0608b77329fa61df2b3dd1615
-  data.tar.gz: 8dc95b702adee1279a10820f746c99ecd0225308420c48c01ef954bea001650819e05d614bdb20bf4b10b8a13c6f25a839ddca6167e3eae05270eb5e3917476f
+  metadata.gz: 957e43027ccae0ab98becec3641eb82c10a9531d74f278d3557e3f1f8aa19e8ec59260bebd31a105e5e283e55bc2a04d67b70f9a582c00b93b771210e491fbe8
+  data.tar.gz: 02a6df21160370204fb1a72084d36663f974ff816a615028567e701370bd0a6cfabe96b16f7281f0576d7f656bbd21de010f129a05d1f2d3126e71ed4b1706ff

data/exec/opinion-detector-basic.rb CHANGED Viewed

@@ -4,6 +4,44 @@ require 'opener/daemons'
 require_relative '../lib/opener/opinion_detector_basic'
+Oga::XML::Parser.class_eval do
+  include NewRelic::Agent::Instrumentation::ControllerInstrumentation
+  include NewRelic::Agent::MethodTracer
+  add_method_tracer(:parse)
+end
+Oga::XPath::Parser.class_eval do
+  class << self
+    include NewRelic::Agent::Instrumentation::ControllerInstrumentation
+    include NewRelic::Agent::MethodTracer
+    add_method_tracer(:parse_with_cache, 'Oga::XPath::Parser/parse_with_cache')
+  end
+end
+Oga::XPath::Evaluator.class_eval do
+  include NewRelic::Agent::Instrumentation::ControllerInstrumentation
+  include NewRelic::Agent::MethodTracer
+  add_method_tracer(:evaluate)
+  add_method_tracer(:evaluate_ast)
+end
+Opener::OpinionDetectorBasic::Processor.class_eval do
+  include NewRelic::Agent::Instrumentation::ControllerInstrumentation
+  include NewRelic::Agent::MethodTracer
+  add_method_tracer(:process)
+  add_method_tracer(:terms)
+  add_method_tracer(:opinions)
+  add_method_tracer(:add_opinion_element)
+  add_method_tracer(:pretty_print)
+  add_method_tracer(:set_accumulated_strength)
+  add_method_tracer(:apply_modifiers)
+  add_method_tracer(:apply_conjunctions)
+end
 daemon = Opener::Daemons::Daemon.new(Opener::OpinionDetectorBasic)
 daemon.start

data/lib/opener/opinion_detector_basic.rb CHANGED Viewed

@@ -1,6 +1,7 @@
+gem 'slop', '~> 3.0'
 require 'slop'
 require 'oga'
-require 'monitor'
 require 'rexml/document'
 require 'rexml/formatters/pretty'
@@ -32,7 +33,7 @@ module Opener
       @args    = options.delete(:args) || []
       @options = options
     end
     ##
     # Processes the input KAF document.
     #
@@ -40,8 +41,6 @@ module Opener
     # @return [String]
     #
     def run(input)
-      options[:timestamp] = !options.delete(:no_time)
       return Processor.new(input, options).process
     end
   end # OpinionDetectorBasic

data/lib/opener/opinion_detector_basic/cli.rb CHANGED Viewed

@@ -45,10 +45,17 @@ Example:
             abort "opinion-detector-basic v#{VERSION} on #{RUBY_DESCRIPTION}"
           end
+          on :p, :pretty, 'Enables pretty-printing of XML',
+            :default => false
+          on :'no-timestamp', 'Disables timestamps in XML output'
           run do |opts, args|
             detector = OpinionDetectorBasic.new(
-              :args   => args,
-              :domain => opts[:domain]
+              :args      => args,
+              :domain    => opts[:domain],
+              :pretty    => opts[:pretty],
+              :timestamp => !opts[:'no-timestamp']
             )
             input = STDIN.tty? ? nil : STDIN.read

data/lib/opener/opinion_detector_basic/opinion.rb CHANGED Viewed

@@ -3,7 +3,22 @@ module Opener
     class Opinion
       attr_reader :term
       attr_accessor :left_candidates, :right_candidates, :target_ids, :holders
+      # Opinion holders for each language code.
+      OPINION_HOLDERS = {
+        'nl' => [
+          'ik','we','wij','ze','zij','jullie','u','hij','het','jij','je','mij',
+          'me','hem','haar','ons','hen','hun'
+        ],
+        'en' => ['i','we','he','she','they','it','you'],
+        'es' => [
+          'yo','tu','nosotros','vosotros','ellos','ellas','nosotras','vosotras'
+        ],
+        'it' => ['io','tu','noi','voi','loro','lei','lui'],
+        'de' => ['ich','du','wir','ihr','sie','er'],
+        'fr' => ['je','tu','lui','elle','nous','vous','ils','elles']
+      }
       def initialize(term)
         @term = term
         @left_candidates = []
@@ -11,7 +26,7 @@ module Opener
         @holders = []
         @target_ids = []
       end
       ##
       # Returns the term ids of the opinion expression.
       #
@@ -20,7 +35,7 @@ module Opener
       def ids
         @ids ||= term.list_ids.sort
       end
       ##
       # Returns the sentence id of the opinion.
       #
@@ -29,7 +44,7 @@ module Opener
       def sentence
         @sentence ||= term.sentence
       end
       ##
       # Returns the strength of the opinion.
       #
@@ -38,7 +53,7 @@ module Opener
       def strength
         @strength ||= term.accumulated_strength
       end
       ##
       # Returns the polarity of the opinion.
       #
@@ -53,7 +68,7 @@ module Opener
           "neutral"
         end
       end
       ##
       # Obtain the opinion holders from the terms that belong to the same
       # sentence.
@@ -61,13 +76,13 @@ module Opener
       def obtain_holders(sentences, language)
         sentence_terms = sentences[sentence]
         sentence_terms.each do |term|
-          if opinion_holders[language].include?(term.lemma)
+          if OPINION_HOLDERS[language].include?(term.lemma)
             @holders << term.id
             break
           end
         end
       end
       ##
       # Get the potential right and left candidates of the sentence and
       # decide which ones are the actual targets of the opinion
@@ -76,20 +91,20 @@ module Opener
         sentence_terms = sentences[sentence]
         max_distance = 3
         terms_count = sentence_terms.count
         index = -1
         sentence_terms.each_with_index do |term, i|
           if ids.include?(term.id)
             index = i
           end
         end
         unless index+1 >= terms_count
           min = index+1
           max = [index+1+max_distance,terms_count].min
           @right_candidates = filter_candidates(sentence_terms[min..max])
         end
         index = 0
         sentence_terms.each_with_index do |term, i|
           if ids.include?(term.id)
@@ -97,7 +112,7 @@ module Opener
             break # needed for left_candidates
           end
         end
         unless index == 0
           min = [0, index-1-max_distance].max
           max = index
@@ -108,7 +123,7 @@ module Opener
           candidate = right_candidates.first
           @target_ids << candidate.id
         end
         if target_ids.empty?
           list = mix_lists(right_candidates, left_candidates)
           list.each do |l|
@@ -117,9 +132,9 @@ module Opener
           end
         end
       end
       protected
       ##
       # If there are no opinion targets, right and left candidates
       # are mixed into one list and the first one is picked as the target.
@@ -140,7 +155,7 @@ module Opener
         end
         return list.compact
       end
       ##
       # Filters candidate terms depending on their part of speech and if
       # they are already part of the expression.
@@ -150,22 +165,6 @@ module Opener
       def filter_candidates(sentence_terms)
         sentence_terms.select{|t| (t.pos == "N" || t.pos == "R") && !ids.include?(t.id)}
       end
-      ##
-      # Opinion holders for each language code.
-      #
-      # @return [Hash]
-      #
-      def opinion_holders
-        {
-          'nl' => ['ik','we','wij','ze','zij','jullie','u','hij','het','jij','je','mij','me','hem','haar','ons','hen','hun'],
-          'en' => ['i','we','he','she','they','it','you'],
-          'es' => ['yo','tu','nosotros','vosotros','ellos','ellas','nosotras','vosotras'],
-          'it' => ['io','tu','noi','voi','loro','lei','lui'],
-          'de' => ['ich','du','wir','ihr','sie','er'],
-          'fr' => ['je','tu','lui','elle','nous','vous','ils','elles']
-        }
-      end
     end # Opinion
   end # OpinionDetectorBasic
-end # Opener
+end # Opener

data/lib/opener/opinion_detector_basic/processor.rb CHANGED Viewed

@@ -17,11 +17,11 @@ module Opener
       #  by default due to the performance overhead.
       #
       def initialize(file, options = {})
-        @document            = Oga.parse_xml(file)
+        @document = Oga.parse_xml(file)
-        @timestamp           = !!options[:timestamp]
-        @opinion_strength    = !!options[:opinion_strength]
-        @pretty              = options[:pretty] || false
+        @timestamp        = options[:timestamp]
+        @opinion_strength = options[:opinion_strength]
+        @pretty           = options[:pretty] || false
         raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
       end
@@ -30,18 +30,18 @@ module Opener
       # Processes the input and returns the new KAF output.
       # @return [String]
       #
-      def process
+      def process
         add_opinions_layer
         index = 1
-        opinions.each do |opinion|
+        opinions.each do |opinion|
           add_opinion(opinion, index)
           index += 1
         end
         add_linguistic_processor
-        return pretty ? pretty_print(document) : document.to_xml
+        pretty ? pretty_print(document) : document.to_xml
       end
       ##
@@ -50,25 +50,19 @@ module Opener
       # @return [String]
       #
       def language
-        return @language ||= document.at_xpath('KAF').get('xml:lang')
+        @language ||= document.at_xpath('KAF').get('xml:lang')
       end
       ##
       # Get the terms from the input file
       # @return [Hash]
       #
       def terms
-        unless @terms
-          @terms = []
-          document.xpath('KAF/terms/term').each do |term|
-            @terms << Term.new(term, document, language)
-          end
+        @terms ||= document.xpath('KAF/terms/term').map do |term|
+          Term.new(term, document, language)
         end
-        return @terms
       end
       ##
       # Get the opinions.
       #
@@ -79,7 +73,7 @@ module Opener
           set_accumulated_strength
           apply_modifiers
           apply_conjunctions
           ##
           # Initialize opinions with their expressions.
           #
@@ -88,14 +82,14 @@ module Opener
               o = Opinion.new(term)
             end
           end.compact
           ##
           # Obtain targets for each opinion.
           #
           @opinions.each do |opinion|
             opinion.obtain_targets(sentences)
           end
           ##
           # Obtain holders for each opinion.
           #
@@ -103,10 +97,10 @@ module Opener
             opinion.obtain_holders(sentences, language)
           end
         end
-        return @opinions
+        @opinions
       end
       ##
       # Remove the opinions layer from the KAF file if it exists and add a new
       # one.
@@ -117,30 +111,32 @@ module Opener
         new_node('opinions', 'KAF')
       end
       ##
       # Adds the entire opinion in the KAF file.
       #
       def add_opinion(opinion, index)
         opinion_node = new_node("opinion", "KAF/opinions")
         opinion_node.set('oid', "o#{index.to_s}")
         unless opinion.holders.empty?
           opinion_holder_node = new_node("opinion_holder", opinion_node)
           add_opinion_element(opinion_holder_node, opinion.holders)
         end
         opinion_target_node = new_node("opinion_target", opinion_node)
         unless opinion.target_ids.empty?
           add_opinion_element(opinion_target_node, opinion.target_ids)
         end
         expression_node = new_node("opinion_expression", opinion_node)
         expression_node.set('polarity', opinion.polarity)
         expression_node.set('strength', opinion.strength.to_s)
         add_opinion_element(expression_node, opinion.ids)
       end
       ##
       # Method for adding opinion holders, targets and expressions.
       #
@@ -149,12 +145,13 @@ module Opener
         comment = Oga::XML::Comment.new(:text => "#{lemmas}")
         node.children << comment
         span_node = new_node("span", node)
         ids.each do |id|
           target_node = new_node("target", span_node)
           target_node.set('id', id.to_s)
         end
       end
       ##
       # Add linguistic processor layer with basic information
       # (version, timestamp, description etc) in the KAF file.
@@ -171,7 +168,7 @@ module Opener
         lp_node.set('version', "#{last_edited}-#{version}")
         lp_node.set('name', description)
         if timestamp
           format = '%Y-%m-%dT%H:%M:%S%Z'
@@ -180,7 +177,7 @@ module Opener
           lp_node.set('timestamp', '*')
         end
       end
       ##
       # Format the output document properly.
       #
@@ -196,18 +193,18 @@ module Opener
         formatter.compact = true
         formatter.write(doc, out)
-        return out.strip
+        out.strip
       end
       ##
       # Get terms grouped by sentence.
       #
       def sentences
         @sentences ||= terms.group_by{|t| t.sentence}
       end
       protected
       ##
       # The strength of a term depends heavily on the type of the previous
       # one. For example if the previous one is a shifter, it needs
@@ -228,7 +225,7 @@ module Opener
                 symbol = terms[i+1].accumulated_strength > 0 ? :+ : :-
               else
                 symbol = :*
-              end
+              end
             elsif terms[i+1].is_intensifier?
               terms[i+1].accumulated_strength = term.accumulated_strength.send(symbol, terms[i+1].accumulated_strength)
               term.use = false
@@ -242,7 +239,7 @@ module Opener
           end
         end
       end
       ##
       # Apply strength to the next term after a shifter or intensifier.
       #
@@ -258,7 +255,7 @@ module Opener
           end
         end
       end
       ##
       # Ignore conjunctions when applying strength.
       #
@@ -276,7 +273,7 @@ module Opener
               if j >= terms_count
                 break
               end
               if terms[j].is_conjunction
                 terms[j].use = false
                 j += 1
@@ -297,9 +294,9 @@ module Opener
             i = j
           end
           i += 1
-        end
+        end
       end
       ##
       # Creates a new node in the KAF file.
       #
@@ -314,7 +311,7 @@ module Opener
         parent_node.children << node
-        return node
+        node
       end
       ##
@@ -322,7 +319,7 @@ module Opener
       # @return [Boolean]
       #
       def is_kaf?
-        return !!document.at_xpath('KAF')
+        !!document.at_xpath('KAF')
       end
     end # Processor
   end # OpinionDetectorBasic

data/lib/opener/opinion_detector_basic/term.rb CHANGED Viewed

@@ -3,7 +3,17 @@ module Opener
     class Term
       attr_reader :node, :sentence, :is_conjunction
       attr_accessor :use, :accumulated_strength, :list_ids
+      # Map of conjunctions per language code
+      CONJUNCTIONS = {
+        'nl' => %w{, en},
+        'en' => %w{, and},
+        'es' => %w{, y e},
+        'it' => %w{, e ed},
+        'de' => %w{, und},
+        'fr' => %w{, et}
+      }
       def initialize(node, document, language)
         @node                 = node
         @sentence             = get_sentence(document)
@@ -12,7 +22,7 @@ module Opener
         @list_ids             = [id]
         @is_conjunction       = is_conjunction?(language)
       end
       ##
       # Returns the term id.
       #
@@ -21,16 +31,16 @@ module Opener
       def id
         @id ||= node.get('tid')
       end
       ##
       # Returns the lemma of the term.
-      #
+      #
       # @return [String]
       #
       def lemma
         @lemma ||= node.get('lemma')
       end
       ##
       # Returns the part of speech of the term.
       #
@@ -39,38 +49,36 @@ module Opener
       def pos
         @pos ||= node.get('pos')
       end
       ##
       # Returns the sentiment modifier type if it exists.
       #
       # @return [String|NilClass]
       #
       def sentiment_modifier
-        @sentiment_modifier ||= if sentiment = node.xpath('sentiment').first
-          sentiment.get('sentiment_modifier')
-        end
+        @sentiment_modifier ||=
+          first_sentiment ? first_sentiment.get('sentiment_modifier') : nil
       end
       ##
       # Returns the polarity of the term if it exists.
       #
       # @return [String|NilClass]
       #
       def polarity
-        @polarity ||= if sentiment = node.xpath('sentiment').first
-          sentiment.get('polarity')
-        end
+        @polarity ||= first_sentiment ? first_sentiment.get('polarity') : nil
       end
       ##
       # Returns the actual word ids that construct the lemma.
       #
       # @return [Array]
       #
       def target_ids
-        @target_ids ||= node.xpath('span/target').map {|target| target.get('id')}
+        @target_ids ||= node.xpath('span/target')
+          .map { |target| target.get('id') }
       end
       ##
       # Returns the strength of the term depending on its type.
       #
@@ -82,16 +90,16 @@ module Opener
         elsif polarity == "negative"
           return -1
         end
         if is_intensifier?
           return 2
         elsif is_shifter?
           return -1
         end
         return 0
       end
       ##
       # Returns the sentence id that the term belongs to in the document.
       #
@@ -103,7 +111,7 @@ module Opener
         .first
         .get('sent')
       end
       ##
       # Checks if a term is an intensifier.
       #
@@ -112,7 +120,7 @@ module Opener
       def is_intensifier?
         sentiment_modifier == "intensifier"
       end
       ##
       # Checks if a term is a shifter.
       #
@@ -121,7 +129,7 @@ module Opener
       def is_shifter?
         sentiment_modifier == "shifter"
       end
       ##
       # Checks if a term is an expression.
       #
@@ -130,31 +138,22 @@ module Opener
       def is_expression?
         use && !!polarity
       end
       ##
       # Checks if a term is a conjunction.
       #
       # @return [TrueClass|FalseClass]
       #
       def is_conjunction?(language)
-        conjunctions[language].include?(lemma)
+        CONJUNCTIONS[language].include?(lemma)
       end
-      ##
-      # Map of conjunctions per language code
-      #
-      # @return [Hash]
-      #
-      def conjunctions
-        {
-          'nl' => [',','en'],
-          'en' => [',','and'],
-          'es' => [',','y','e'],
-          'it' => [',','e','ed'],
-          'de' => [',','und'],
-          'fr' => [',','et']
-        }
+      private
+      # @return [Oga::XML::Element]
+      def first_sentiment
+        @first_sentiment ||= node.xpath('sentiment').first
       end
     end # Term
   end # OpinionDetectorBasic
-end # Opener
+end # Opener

data/lib/opener/opinion_detector_basic/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Opener
   class OpinionDetectorBasic
-    VERSION = '3.0.1'
+    VERSION = '3.0.2'
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: opener-opinion-detector-basic
 version: !ruby/object:Gem::Version
-  version: 3.0.1
+  version: 3.0.2
 platform: ruby
 authors:
 - development@olery.com
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-06-15 00:00:00.000000000 Z
+date: 2015-08-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: opener-daemons
@@ -125,9 +125,9 @@ dependencies:
 description: Basic Opinion Detector.
 email:
 executables:
+- opinion-detector-basic-server
 - opinion-detector-basic
 - opinion-detector-basic-daemon
-- opinion-detector-basic-server
 extensions: []
 extra_rdoc_files: []
 files:
@@ -169,7 +169,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.2.2
+rubygems_version: 2.4.8
 signing_key:
 specification_version: 4
 summary: Basic Opinion Detector.