RubyGems - evoc - Versions diffs - 3.7.0 → 3.8.0 - Mend

evoc 3.7.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/.gitignore +2 -0
data/lib/evoc/algorithm.rb +52 -31
data/lib/evoc/evaluate.rb +26 -0
data/lib/evoc/experiment.rb +36 -29
data/lib/evoc/recommendation_cache.rb +6 -14
data/lib/evoc/rule.rb +2 -2
data/lib/evoc/rule_store.rb +13 -8
data/lib/evoc/scenario.rb +0 -38
data/lib/evoc/version.rb +1 -1
data/lib/evoc_cli/experiment.rb +2 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 2b867c7a5e05b3c2be58b9dd361554a43f3f277c
-  data.tar.gz: 9d00091a6fd7685f048930889aaf252df5aa6634
+  metadata.gz: d6583ca91c4e4987cd4816958b375795d5adfc9a
+  data.tar.gz: 8569042becd54ef9ebd4f2d94aac3fb106b9fb6d
 SHA512:
-  metadata.gz: f89dbef20f735e0f8c6b8f7104ea8b38a4dd118707089b17acdf529591fd652478406047cb5d4b08e18afb746d3edf46dc7cf9ac7eb208d214f91e9050841c12
-  data.tar.gz: 7e1955af5653df5d7afd986e9d522178b32d10c26771833e4d37aec910dd93208de56c8a6f83bd21faeb8796b098da808f72a05c9f6e3caf1100064fd20d7bb0
+  metadata.gz: 845bccb16af58856641c86224d03dd677c2d10cc69c545db2a5f9695765df3db1717a8b1d76e02c59d060eaa0a2290c1abe18bbd40980908d4db1faa4997306b
+  data.tar.gz: cb229b3e0cfea199d63929131258acc75a40d7ca872067740641bf324ec8c4d466fb16569131983a8f472e9962b5898c65ab731dfe4082be318645d56dba77f8

data/.gitignore CHANGED Viewed

@@ -15,3 +15,5 @@ evoc.log
 *TAGS
 *~
 *#
+/spec/test_data/temp/*
+/GPATH

data/lib/evoc/algorithm.rb CHANGED Viewed

@@ -105,36 +105,6 @@ module Evoc
         end
-        def self.not_subsumed(tx_store:, query:)
-          #initial filter, we consider all txes where something in the query changed
-          query_changed_in = tx_store.transactions_of_list(query)
-          # now find what subsets of the query changed in each tx
-          trie = Containers::Trie.new
-          query_changed_in.each do |tx_id|
-            tx = tx_store.get_tx(id:tx_id,id_type: :index)
-            antecedent = (query & tx.items)
-            consequents = (tx.items - antecedent)
-            if consequents.size != 0
-              consequents.each do |consequent|
-                entry = "#{consequent.to_s}#{antecedent.join('')}"
-                if trie.get(entry).nil?
-                  puts "ADDED #{entry}"
-                  trie.push(entry,consequent.to_s)
-                end
-              end
-            end
-          end
-          return trie
-          # now generate rules
-          # rule_store = Evoc::RuleStore.new(query: query)
-          # rules.each do |consequent,antecedents|
-          #   antecedents.each do |antecedent|
-          #     rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
-          #   end
-          # end
-          # return rule_store
-        end
         ##
         # Find the largest rules for each unique consequent
         def self.largest_rules(tx_store:,query:)
@@ -168,6 +138,57 @@ module Evoc
           return rule_store
         end
+        def self.hybrid(tx_store:,query:)
+          overlaps = self.largest_overlaps(tx_store: tx_store, query: query)
+          if overlaps.empty?
+            return Evoc::RuleStore.new(query: query)                             # no rules
+          else
+            if overlaps.size == 1
+              if overlaps.first.size == query.size                               # execute rose
+                return self.rose(tx_store: tx_store,query: query)
+              else
+                return self.tarmaq(0,tx_store: tx_store,query: overlaps.first)   # execute tarmaq
+              end
+            else
+              store = Evoc::RuleStore.new(query: query)
+              overlaps.each do |overlap|
+                if overlap.size == 1                                             # execute co change
+                  part_store = Evoc::Algorithm.co_change(tx_store: tx_store, query: overlap)
+                  part_store.each {|r| store << r}
+                else                                                             # execute tarmaq
+                  part_store = Evoc::Algorithm.tarmaq(0,tx_store: tx_store, query: overlap)
+                  part_store.each {|r| store << r}
+                end
+              end
+              store.rules = store.select {|r| !query.include?(r.rhs.first)}
+              return store
+            end
+          end
+        end
+        def self.largest_overlaps(tx_store:,query:)
+            largest_match = []
+            #initial filter, we consider all txes where something in the query changed
+            query_changed_in = tx_store.transactions_of_list(query)
+            # now find what subsets of the query changed in each tx
+            query_changed_in.each do |tx_id|
+                tx = tx_store.get_tx(id:tx_id,id_type: :index)
+                largest_match_in_query = (query & tx.items)
+                match_size = largest_match_in_query.size
+                remainder_in_tx = tx.items - largest_match_in_query
+                if remainder_in_tx.size > 0
+                  if match_size > largest_match.size
+                    largest_match = largest_match_in_query
+                  end
+                end
+            end
+            if largest_match.empty? #no more matches
+              return []
+            else
+              query_remainder = query - largest_match
+              return [largest_match] + self.largest_overlaps(tx_store: tx_store,query: query_remainder)
+            end
+        end
         ##
         # TARMAQ
@@ -206,7 +227,7 @@ module Evoc
           self.cached_rule_range(1,1,tx_store: tx_store, query: query)
         end
-        def self.closed_rules(tx_store:, query:)
+        def self.tcharm(tx_store:, query:)
           Evoc::ClosedRules.closed_rules(tx_store: tx_store,query: query)
         end

data/lib/evoc/evaluate.rb CHANGED Viewed

@@ -11,11 +11,16 @@ module Evoc
       end
     end
     def self.mean_confidence(rules:)
       if rules.empty? then return nil end
       return (rules.inject(0) {|sum,r| sum + r.m_confidence.value}/rules.size).to_f
     end
+    def self.mean_confidence10(rules:)
+      return self.mean_confidence(rules: Evoc::RuleStore.sort_on(rules: rules,measures: ['m_confidence']).take(10).flatten.take(10))
+    end
     def self.discernibility(rec:)
       # AP is 0 for the empty list
       if rec.is_a?(Array) && rec.empty? # array and empty
@@ -65,6 +70,27 @@ module Evoc
       return (2*rec_correct/(rec_size + exp)).to_f
     end
+    ##
+    # @return an array containg the rank of each consequtive expected outcome
+    def self.relevant_ranks(rec:)
+      # AP is 0 for the empty list
+      if rec.is_a?(Array) && rec.empty? # array and empty
+        return []
+      end
+      self.validateInput(rec)
+      ranks = []
+      last_checked = 1
+      rec.each do |c|
+        c.each do |e|
+          if e == 1
+            ranks << last_checked
+          end
+          last_checked = last_checked + 1
+        end
+      end
+      return ranks
+    end
     ##
     # @return the rank of the first relevant itemjk

data/lib/evoc/experiment.rb CHANGED Viewed

@@ -34,36 +34,42 @@ module Evoc
         end
         sampling_history = sampling_history.clone_with_subset(self.opts[:minimum_history],sampling_history.size-1)
       end
-      # group the txes by size
-      groups = sampling_history.group_by {|tx| tx.size}
-      # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
-      # i.e., random sampling is done first, then the sampled txes are removed from the sampling
-      tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
-      tx_sizes_to_sample_from.each do |group_size|
-        if group_size == '*'
-          # TODO: > 2 should be generalized to > X
-          txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}.map(&:id)
-          sampled_ids = txes_larger_than_one.sample(self.opts[:sample_size])
-          sample << sampled_ids
-          STDERR.puts "Sampled #{sampled_ids.size} txes from the whole history"
-          # remove sampled txes from sampling_history
-          filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
-          sampling_history.clear
-          filtered_hist.each {|tx| sampling_history << tx}
-        elsif group_size.to_i
-          # check if there were any txes of this size
-          if group = groups[group_size.to_i]
-            if group.size < self.opts[:sample_size]
-              logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
-            end
-            sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
+      if self.opts[:recent]
+        STDERR.puts "Taking the #{self.opts[:sample_size]} most recent transactions, this overrides any other sampling params apart from maximum_commit_size"
+        txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}
+        sample = txes_larger_than_one.sort_by {|tx| -tx.index}.take(self.opts[:sample_size]).map(&:id)
+      else
+        # group the txes by size
+        groups = sampling_history.group_by {|tx| tx.size}
+        # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
+        # i.e., random sampling is done first, then the sampled txes are removed from the sampling
+        tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
+        tx_sizes_to_sample_from.each do |group_size|
+          if group_size == '*'
+            # TODO: > 2 should be generalized to > X
+            txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}.map(&:id)
+            sampled_ids = txes_larger_than_one.sample(self.opts[:sample_size])
             sample << sampled_ids
-            STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
+            STDERR.puts "Sampled #{sampled_ids.size} txes from the whole history"
+            # remove sampled txes from sampling_history
+            filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
+            sampling_history.clear
+            filtered_hist.each {|tx| sampling_history << tx}
+          elsif group_size.to_i
+            # check if there were any txes of this size
+            if group = groups[group_size.to_i]
+              if group.size < self.opts[:sample_size]
+                logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
+              end
+              sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
+              sample << sampled_ids
+              STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
+            else
+              logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
+            end
           else
-            logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
+            raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
           end
-        else
-          raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
         end
       end
       sample.flatten.uniq
@@ -238,6 +244,7 @@ module Evoc
         # convert query string to array of items
         query_hash['query'] = query_hash['query'].split(',').map(&:to_i)
         # verify query before executing
+        tx = nil
         if tx = Evoc::HistoryStore.base_history.get_tx(id: query_hash['tx_id'],id_type: :id)
           if !(query_hash['query'] - tx.items).empty?
             raise Evoc::Exceptions::ConfigurationError.new "The query generated from #{query_hash['tx_id']} was not a subset of the same tx in the loaded history. The query was: '#{query_hash['query']}', the tx was '#{tx.items}'"
@@ -273,10 +280,10 @@ module Evoc
                                                          max_size: s.max_size,
                                                          aggregator: s.aggregator,
                                                          measures: s.measures)
-            Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], top_k: self.opts[:top_k], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
+            Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], topk: self.opts[:topk], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
             result = Evoc::RecommendationCache.to_h(measures: s.measures)
             # merge scenario params with result hash and dump as json
-            $stdout.puts s.to_h.merge(result).to_json
+            $stdout.puts s.to_h.merge(result).merge({topk: self.opts[:topk],date: tx.date}).to_json
           rescue ArgumentError => e
             invalid_configuration += 1
             last_error = e.message

data/lib/evoc/recommendation_cache.rb CHANGED Viewed

@@ -12,22 +12,14 @@ module Evoc
             attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
         end
-        def self.recommendation_cached?(algorithm:,
-                                    query:,
-                                    model_start:,
-                                    model_end:,
-                                    max_size: nil)
+        def self.recommendation_cached?(algorithm:, query:, model_start:, model_end:, max_size: nil)
             return self.tag == [algorithm,query,model_start,model_end,max_size].hash
         end
-        def self.get_recommendation(algorithm:,
-                                    query:,
-                                    model_start:,
-                                    model_end:,
-                                    max_size: nil,
-                                    aggregator: nil,
-                                    measures: [])
+        ##
+        # @param scenario <Evoc::Scenario> the scenario to cache a new recommendation for
+        def self.get_recommendation(algorithm:, query:, model_start:, model_end:, max_size: nil, aggregator: nil, measures: [])
           # check if a new base recommendation needs to be generated
             tag = [algorithm,query,model_start,model_end,max_size].hash
             if self.tag != tag
@@ -76,10 +68,10 @@ module Evoc
         # @param [Array<String>] measure_combinations the list of measures to use when sorting a recommendation before evaluating
         #
         # @return [Hash[aggregator][evaluator][result]] the hash of results
-        def self.evaluate_last(evaluators: ,top_k: nil, unique_consequents: nil,expected_outcome:,measure_combination: )
+        def self.evaluate_last(evaluators: ,expected_outcome:,measure_combination:,topk: nil, unique_consequents: nil)
           if !self.last_recommendation.nil?
               self.evaluation = self.last_recommendation.evaluate_with(evaluators: evaluators,
-                                                            top_k: top_k,
+                                                            topk: topk,
                                                             unique_consequents: unique_consequents,
                                                             expected_outcome: expected_outcome,
                                                             measure_combination: measure_combination)

data/lib/evoc/rule.rb CHANGED Viewed

@@ -26,13 +26,13 @@ module Evoc
     end
     def human_lhs
-      if !self.tx_store.nil? # & self.lhs.all? {|i| i.is_a?(Numeric)}
+      if !self.tx_store.nil?
         self.tx_store.ints2names(self.lhs.map(&:to_i)).join(',')
       end
     end
     def human_rhs
-      if !self.tx_store.nil? #& self.rhs.all? {|i| i.is_a?(Numeric)}
+      if !self.tx_store.nil?
         self.tx_store.ints2names(self.rhs.map(&:to_i)).join(',')
       end
     end

data/lib/evoc/rule_store.rb CHANGED Viewed

@@ -117,12 +117,13 @@ module Evoc
     # Needed by Evaluate mixin
-    def evaluation_format(measures:, expected_outcome:)
+    def evaluation_format(measures:, expected_outcome:,topk: nil)
       current_weight = nil
       current_group = []
       recommendation = []
+      topk = (topk.nil? ? self.size : topk)
       # sort and filter out duplicate consequents
-      self.sort_on(measures: measures, rules: self.unique_by(measures.first)).each do |r|
+      self.sort_on(measures: measures, rules: self.unique_by(measures.first)).take(topk).each do |r|
         expected = ((r.rhs - expected_outcome).empty? ? 1 : 0)
         weight_tag = measures.map {|m| r.get_measure(m).value.nil? ? "INF" : r.get_measure(m).to_s}.join('_')
         if current_weight.nil?
@@ -153,19 +154,19 @@ module Evoc
     # @param [String] evaluator the method to use for evaluating
     # @param [Array] expected_outcome the list of items to evaluate against
     # @param [Array] measure_combination the list of measures used to first sort the recommendation
-    def evaluate_with(evaluators:,expected_outcome:,measure_combination:,top_k: nil,unique_consequents: nil)
+    def evaluate_with(evaluators:,expected_outcome:,measure_combination:,topk: nil,unique_consequents: nil)
       if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end
       logger.debug "#{__method__} params: evaluators: #{evaluators}, measure_combination: #{measure_combination}"
       # sort the rules on each combination and evaluate
-      # if !top_k.nil?
-      #   raise ArgumentError, "Top K must be a number" unless top_k.is_a?(Numeric)
-      #   sorted_rules = sorted_rules.take(top_k)
+      # if !topk.nil?
+      #   raise ArgumentError, "Top K must be a number" unless topk.is_a?(Numeric)
+      #   sorted_rules = sorted_rules.take(topk)
       # end
       # convert rules into format used in evaluation
       # map to 0/1 list where 1 is a correct item and 0 is not
       # second item in each tuple gives the weight of the rule
       # evaluate the sorted list against the expected outcome
-      recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome)
+      recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome, topk: topk)
       potential_params = {rec: recommendation, exp: expected_outcome.size, rules: self}
       results = Hash.new
       evaluators.each do |evaluator|
@@ -193,6 +194,10 @@ module Evoc
       rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}}
     end
+    def self.sort_on(rules:, measures:)
+      rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}}
+    end
     ##
     # returns the set of unique consequents
     # where each consequent is the strongest given by the input measure
@@ -258,7 +263,7 @@ module Evoc
         csv << ['rule'] + defined_measures
         self.each do |rule|
           row = CSV::Row.new([],[],false)
-          row << rule.human_name
+          row << rule.name
           defined_measures.each do |m|
             row << rule.get_measure(m).value
           end

data/lib/evoc/scenario.rb CHANGED Viewed

@@ -228,43 +228,5 @@ module Evoc
     def tx_size
       self.tx.size
     end
-    ##
-    #
-    def instance_values_for_csv
-      dont_include = ['opts', 'logger','time','filtered_model_size']
-      self.instance_values.delete_if {|k,v| dont_include.include?(k)}
-    end
-    ##
-    # generate an array suitable for a csv header
-    def csv_header
-      query = self.instance_values_for_csv.keys
-      rule_store = !self.recommendation? ? [] : self.recommendation.csv_header
-      rule_store + query
-    end
-    ##
-    # generate an array of the current values of <self>
-    # converts any array values to a comma separated string representation
-    def to_csv_row
-      query = self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val}
-      rule_store = !self.recommendation? ? [] : self.recommendation.to_csv_row
-      rule_store + query
-    end
-  	##
-  	# Prints the rules to standard out
-  	# sorted by strength
-  	def print
-      if !self.recommendation?
-          $stdout.puts ""
-      else
-        self.recommendation.print(measures)
-      end
-  	end
   end
 end

data/lib/evoc/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Evoc
-  VERSION = "3.7.0"
+  VERSION = "3.8.0"
 end

data/lib/evoc_cli/experiment.rb CHANGED Viewed

@@ -23,6 +23,7 @@ module EvocCLI
     method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
     method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
     method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
+    method_option :recent, type: :boolean, desc: "If transactions should be the most recent"
     method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
     method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
     desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
@@ -71,7 +72,7 @@ module EvocCLI
     method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
     method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
     method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
-    method_option :top_k, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER an evential unique consequents filter"
+    method_option :topk, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER any consequent filter"
     desc "execute_scenarios [options]",""
     long_desc <<-LONGDESC
   keyword                  description

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: evoc
 version: !ruby/object:Gem::Version
-  version: 3.7.0
+  version: 3.8.0
 platform: ruby
 authors:
 - Thomas Rolfsnes
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-01-23 00:00:00.000000000 Z
+date: 2017-02-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler