RubyGems - evoc - Versions diffs - 3.9.1 → 3.10.0 - Mend

evoc 3.9.1 → 3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/lib/evoc/algorithms/closed_rules.rb +49 -56
data/lib/evoc/analyze.rb +29 -0
data/lib/evoc/array.rb +45 -33
data/lib/evoc/evaluate.rb +8 -0
data/lib/evoc/experiment.rb +56 -35
data/lib/evoc/recommendation_cache.rb +12 -4
data/lib/evoc/scenario.rb +70 -3
data/lib/evoc/version.rb +1 -1
data/lib/evoc_cli/analyze.rb +11 -0
data/lib/evoc_cli/experiment.rb +4 -0
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: d556543c7f271c2c9c6a18ffb508b5f167b64629
-  data.tar.gz: 62a6606b2096cf0d25a522cc27766f9bac752bcf
+  metadata.gz: debd6763073d247f88a9ba0b80f252b8f31c5a30
+  data.tar.gz: b21e8c6dfd037bc87ec631aff4ff998591347a10
 SHA512:
-  metadata.gz: b73b5929392077c66b2b6a21e50abcb76715a8173cee0958a8238e93d8d3c86deb92bded7162158015b0cb1b38485d17194d629d3afa315af99800eff16ae55c
-  data.tar.gz: 8882a45dab133b128ca7901d3924a46b716205af42fd6997bd8482bcc735bd63a869d4a55509f559ff53b69c62acb93ec8a62cebd09350696ce0e6f7aee20c38
+  metadata.gz: 66d8e086a2992bf24a50b786989e7d4a8a1cb1e170f2d94a97f0f8438860f5f31b8abdea7c301422fa83a020009f909e88e181a8391f6b4341c40737ecf7b0ea
+  data.tar.gz: c0dfb6534e75d4663284610e1c3b3198f3be91491203b4238130e23b53f1da00287f0ec9447005710d30f8fcb3dafb79ff1be286f817b7610ec574c70b8a2d3a

data/lib/evoc/algorithms/closed_rules.rb CHANGED

@@ -1,59 +1,51 @@
 module Evoc
   class ClosedRules
-    extend Logging
     def self.closed_rules(tx_store:,query:)
       # @@store = tx_store
       # create initial trees, one tree per consequent
       tree = self.initialize_tree(tx_store,query)
-      logger.debug "INIT TREE:"
-      tree.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}"})
+      # puts "INIT TREE:"
+      # tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
       closed_rules = Evoc::RuleStore.new(query: query)
-      # tree.children.each do |consequent|
-        self.extend_nodes(tree).each do |frequency, closed_sets|
+      tree.children.each do |consequent|
+        self.extend_nodes(consequent).each do |frequency, closed_sets|
           closed_sets.each do |closed_set|
             antecedent = closed_set - consequent.name
             closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
           end
         end
-      # end
+      end
       return closed_rules
     end
     private
     def self.initialize_tree(tx_store, query)
-      rules = Evoc::Algorithm.co_change(tx_store: tx_store, query: query)
       tree = Tree::TreeNode.new([])
-      rules.each do |rule|
-        txes_union = tx_store.transactions_of(rule.lhs.first) & tx_store.transactions_of(rule.rhs.first)
-        union = [rule.lhs.first.to_s,rule.rhs.first.to_s]
-        tree << Tree::TreeNode.new(union,txes_union)
+      # find all items that changed with something in the query
+      query_changed_in = tx_store.transactions_of_list(query)
+      # store all items from the query that have changed with each consequent
+      query_changed_in.each do |tx_id|
+        tx = tx_store.get_tx(id:tx_id,id_type: :index)
+        antecedent = (query & tx.items)
+        consequents = (tx.items - antecedent)
+        if consequents.size != 0
+          consequents.each do |consequent|
+            consequent_key = [consequent.to_s]
+            if tree[consequent_key].nil?
+              # initialize candidates
+              tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
+            end
+            txes_consequent = tree[consequent_key].content
+            antecedent.each do |item|
+              union = [item.to_s,consequent.to_s]
+              if tree[consequent_key][union].nil?
+                txes_union = tx_store.transactions_of(item) & txes_consequent
+                tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
+              end
+            end
+          end
+        end
       end
-      # # find all items that changed with something in the query
-      # query_changed_in = tx_store.transactions_of_list(query)
-      # # store all items from the query that have changed with each consequent
-      # query_changed_in.each do |tx_id|
-      #   tx = tx_store.get_tx(id:tx_id,id_type: :index)
-      #   antecedent = (query & tx.items)
-      #   consequents = (tx.items - antecedent)
-      #   if consequents.size != 0
-      #     consequents.each do |consequent|
-      #       consequent_key = [consequent.to_s]
-      #       if tree[consequent_key].nil?
-      #         # initialize candidates
-      #         tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
-      #       end
-      #       txes_consequent = tree[consequent_key].content
-      #       antecedent.each do |item|
-      #         union = [item.to_s,consequent.to_s]
-      #         if tree[consequent_key][union].nil?
-      #           txes_union = tx_store.transactions_of(item) & txes_consequent
-      #           tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
-      #         end
-      #       end
-      #     end
-      #   end
-      # end
       return(tree)
     end
@@ -63,7 +55,7 @@ module Evoc
         a = current_node
         b = a.next_sibling
         while(!b.nil?) do
-          logger.debug "Checking #{a.name}:{#{a.content}} against #{b.name}:{#{b.content}}"
+          # print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
           ab = a.name | b.name
           a_txes = a.content
           b_txes = b.content
@@ -75,53 +67,54 @@ module Evoc
           if ab_txes.size > 0
             case self.compare(a_txes,b_txes)
             when 'EQUAL'
-              logger.debug "  EQUAL"
-              logger.debug "    removing #{b.name}"
-              logger.debug "    renaming #{a.name} to #{ab}"
+              # puts "  EQUAL"
+              # puts "    removing #{@@store.ints2names(b.name.map(&:to_i))}"
+              # puts "    renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
               temp = b.previous_sibling
               root.remove!(b)
               b = temp
               a.each {|n| n.rename(ab | n.name)}
             when 'A_IN_B'
-              logger.debug "  A in B"
-              logger.debug "    renaming #{a.name} to #{ab}"
+              # puts "  A in B"
+              # puts "    renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
               a.each {|n| n.rename(ab | n.name)}
             when 'B_IN_A'
-              logger.debug "  B in A"
-              logger.debug "    removing #{b.name}"
-              logger.debug "    adding child #{ab} to #{a.name}"
+              # puts "  B in A"
+              # puts "    removing #{@@store.ints2names(b.name.map(&:to_i))}"
+              # puts "    adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
               temp = b.previous_sibling
               root.remove!(b)
               b = temp
               a << Tree::TreeNode.new(ab,ab_txes)
             when 'NOT_EQUAL'
-              logger.debug "  NOT EQUAL"
-              logger.debug "    adding child #{ab} to #{a.name}"
+              # puts "  NOT EQUAL"
+              # puts "    adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
               a << Tree::TreeNode.new(ab,ab_txes)
             end
           end
-          logger.debug "NEW TREE:"
-          root.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}:#{node.content.size}"})
+          # puts "NEW TREE:"
+          # root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
           b = b.next_sibling
-          logger.debug "A next sibling #{b}}"
-          logger.debug "A:#{a.name}, B:#{b.nil? ? nil : b.name}"
+          # puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
+          # puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
+          # puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
         end # siblings.each
         if !a.children.empty?
-          logger.debug "TRAVERSING DOWN"
+          # puts "TRAVERSING DOWN"
           self.extend_nodes(a, closed_rules: closed_rules)
         end
         # add node as closed rule if not subsumed by another rule already added
         rule_frequency = a.content.size
         rule = a.name
         if closed_rules[rule_frequency].nil?
-          logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
+          # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
           closed_rules[rule_frequency] = [rule]
         else
           if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
-            logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
+            # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
             closed_rules[rule_frequency] << rule
           else
-            logger.debug "RULE SUBSUMED, NOT ADDING: #{rule}:#{rule_frequency}"
+            # puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
           end
         end
         current_node = current_node.next_sibling

data/lib/evoc/analyze.rb CHANGED

@@ -28,6 +28,35 @@ module Evoc
       CSV {|row| row << results}
     end
+    def evolution
+      CSV {|row| row << %w(index relevant_index overlap)}
+      self.tx_store.each do |tx|
+        changed_in = self.tx_store.transactions_of_list(tx.items, strict: false, identifier: :index)
+        previous_txes = changed_in.select {|i| i <= tx.index}
+        previous_txes.each do |prev_index|
+          prev_tx = self.tx_store.get_tx(id: prev_index,id_type: :index)
+          overlap = ((prev_tx.items & tx.items).size/tx.size.to_f).round(2)
+          CSV {|row| row << [tx.index,prev_index,overlap]}
+        end
+      end
+    end
+    def commits
+      unique_items = Set.new
+      changes_so_far = 0
+      self.tx_store.each do |tx|
+        data = Hash.new
+        tx.items.each {|item| unique_items << item}
+        changes_so_far = changes_so_far += tx.items.size
+        data['sha'] = tx.id
+        data['index'] = tx.index
+        data['num_changes'] = tx.items.size
+        data['items_touched_so_far'] = unique_items.size
+        data['moving_average'] = (changes_so_far/(tx.index+1)).to_f.round(2)
+        STDOUT.puts data.to_json
+      end
+    end
     def avg_method_changes_per_parsable_file
       parsable_files_changed = 0
       method_changes = 0

data/lib/evoc/array.rb CHANGED

@@ -1,37 +1,49 @@
 class Array
-	def subset?(other)
-		self & other == self
-	end
-	def include_any?(other)
-		(self & other).size > 0
-	end
-	##
-	# returns the union of an array of arraya
-	def array_union
-		if union = self.inject(:|)
-			return union
-		else
-			return []
-		end
-	end
-	##
-	# returns the intersection of a list of lists
-	def array_intersection
-		if intersection = self.inject(:&)
-			return intersection
-		else
-			return []
-		end
-	end
-	##
-	# returns the list of items in self that was not in other
-	def array_difference(other)
-		self.map {|a| a - other}.array_union
-	end
+  def mean
+    self.inject(0) { |sum, x| sum += x } / self.size.to_f
+  end
+  def median(already_sorted=false)
+    return nil if self.empty?
+    array = (already_sorted ? self : self.sort)
+    m_pos = array.size / 2
+    return array.size % 2 == 1 ? array[m_pos] : array[m_pos-1..m_pos].mean
+  end
+  def subset?(other)
+    self & other == self
+  end
+  def include_any?(other)
+    (self & other).size > 0
+  end
+  ##
+  # returns the union of an array of arraya
+  def array_union
+    if union = self.inject(:|)
+      return union
+    else
+      return []
+    end
+  end
+  ##
+  # returns the intersection of a list of lists
+  def array_intersection
+    if intersection = self.inject(:&)
+      return intersection
+    else
+      return []
+    end
+  end
+  ##
+  # returns the list of items in self that was not in other
+  def array_difference(other)
+    self.map {|a| a - other}.array_union
+  end
   def self.powerset(set)
     return [set] if set.empty?

data/lib/evoc/evaluate.rb CHANGED

@@ -11,6 +11,14 @@ module Evoc
       end
     end
+    def self.mean_support(rules:)
+      if rules.empty? then return nil end
+      return (rules.inject(0) {|sum,r| sum + r.m_support.value}/rules.size).to_f
+    end
+    def self.mean_support10(rules:)
+      return self.mean_support(rules: Evoc::RuleStore.sort_on(rules: rules,measures: ['m_support']).take(10).flatten.take(10))
+    end
     def self.mean_confidence(rules:)
       if rules.empty? then return nil end

data/lib/evoc/experiment.rb CHANGED

@@ -17,6 +17,8 @@ module Evoc
     end
     def sample_transactions
+      # initialze a random number generator with fixed seed
+      rand = Random.new(self.opts[:seed])
       # by default we can sample from the whole history
       sampling_history = Evoc::HistoryStore.base_history
       STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
@@ -74,7 +76,7 @@ module Evoc
       tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
       tx_sizes_to_sample_from.each do |group_size|
         if group_size == '*'
-          sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size])
+          sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size], random: rand)
           sample << sampled_ids
           STDERR.puts "Sampled #{sampled_ids.size} txes"
           # remove sampled txes from sampling_history
@@ -87,7 +89,7 @@ module Evoc
             if group.size < self.opts[:sample_size]
               logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
             end
-            sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
+            sampled_ids = group.sample(self.opts[:sample_size], random: rand).map(&:id)
             sample << sampled_ids
             STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
           else
@@ -109,6 +111,8 @@ module Evoc
     # tx_id, query
     #
     def generate_queries
+      # initialze a random number generator with fixed seed
+      rand = Random.new(self.opts[:seed])
       ##
       # write dict
       ##
@@ -154,15 +158,15 @@ module Evoc
           specified_sizes.uniq!
           random_sizes = []
-          if self.opts[:random_select] then random_sizes << Random.new.rand(1..(tx_size-1)) end
+          if self.opts[:random_select] then random_sizes << Random.new.rand(self.opts[:minimum_query_size]..(tx_size-1)) end
           sampled_queries = []
           # only specified sizes
           if random_sizes.empty? & !specified_sizes.empty?
-            sampled_queries = specified_sizes.map {|s| items.sample(s)}
+            sampled_queries = specified_sizes.map {|s| items.sample(s, random: rand)}
           # only random sizes
           elsif !random_sizes.empty? & specified_sizes.empty?
-            sampled_queries = random_sizes.map {|s| items.sample(s)}
+            sampled_queries = random_sizes.map {|s| items.sample(s, random: rand)}
           # random + specified = randomly sample in range defined by specified
           # ex:
           # specified = [1,3,10,20]
@@ -172,9 +176,9 @@ module Evoc
           # 2. randomly select X in specified = Y
           # 3. randomly select Y in tx
           elsif !random_sizes.empty? & !specified_sizes.empty?
-            specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
-            if randomly_sampled_size = specified_sizes.sample #2.
-              sampled_queries = [items.sample(randomly_sampled_size)] #3.
+            specified_sizes.select! {|s| (s < tx_size) & (s >= self.opts[:minimum_query_size])} #1.
+            if randomly_sampled_size = specified_sizes.sample(random: rand) #2.
+              sampled_queries = [items.sample(randomly_sampled_size, random: rand)] #3.
             end
           end
@@ -189,6 +193,9 @@ module Evoc
               logger.debug "The size of the sampled query was equal to the size of the transaction, skipping.. Tx ID: #{tx_id}. Query size: #{query.size}"
               next
             end
+            if query.size < self.opts[:minimum_query_size]
+              next
+            end
             CSV {|row| row << [tx_id,query.join(',')]}
           end
         else
@@ -231,8 +238,6 @@ module Evoc
       factor_max_size = self.opts[:max_size].nil? ? nil : self.opts[:max_size].map {|s| [ 'max_size',s ]}
       # Factor: Model age aka number of commits between query and last tx in history
       factor_model_age = self.opts[:model_age].nil? ? nil : self.opts[:model_age].map {|s| [ 'model_age',s ]}
-      # Factor: Algorithm
-      factor_algorithms = self.opts[:algorithms].nil? ? nil : self.opts[:algorithms].map {|a| ['algorithm',a]}
       # Factor: Measures
       factor_measures = self.opts[:measures].map {|c| ['measures',c]}
       # Factor: Aggregator
@@ -253,7 +258,8 @@ module Evoc
       num_lines = File.read(self.opts[:queries]).each_line.count-1
       current_line = 1
-      factors = [factor_model_size,factor_max_size,factor_model_age,factor_algorithms,factor_measures,factor_permutation,factor_aggregators].compact
+      # compact removes nil values (not used factors)
+      factors = [factor_model_size,factor_max_size,factor_model_age,factor_measures,factor_permutation,factor_aggregators].compact
       num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
       invalid_configuration = 0
       last_error = 'no errors'
@@ -280,38 +286,53 @@ module Evoc
         end
         current_scenario = 1
-        # - compact removes nil values (not used factors)
         # - the splat operator '*' turns the array into parameters for #product
         # - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
         factors.first.product(*factors[1..-1]).each do |scenario|
-          # Print progress to stderr
-          STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
-          if invalid_configuration > 0
-            STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...)                             \r"
-          else
-            STDERR.print "                                \r"
-          end
           params = query_hash.merge(scenario.to_h)
           params[:case_id] = self.opts[:case_id]
           params[:granularity] = self.opts[:granularity]
           # initialize scenario
           s =  Evoc::Scenario.new(params)
-          begin
-            Evoc::RecommendationCache.get_recommendation(algorithm: s.algorithm,
-                                                         query: s.query,
-                                                         model_start: s.model_start,
-                                                         model_end: s.model_end,
-                                                         max_size: s.max_size,
-                                                         aggregator: s.aggregator,
-                                                         measures: s.measures)
-            Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], topk: self.opts[:topk], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
-            result = Evoc::RecommendationCache.to_h(measures: s.measures)
-            # merge scenario params with result hash and dump as json
-            $stdout.puts s.to_h.merge({topk: self.opts[:topk],date: tx.date}).merge(result).to_json
-          rescue ArgumentError => e
-            invalid_configuration += 1
-            last_error = e.message
+          scenario_stats = {}
+          if self.opts[:stats]
+            scenario_stats = s.stats
+          end
+          # Factor: Algorithm
+          self.opts[:algorithms].each do |algorithm|
+            s.algorithm = algorithm
+            # Print progress to stderr
+            STDERR.print "(#{self.opts[:case_id]}) Executing #{algorithm} on scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
+            if invalid_configuration > 0
+              STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...)                             \r"
+            else
+              STDERR.print "                                \r"
+            end
+            begin
+              Evoc::RecommendationCache.get_recommendation(algorithm: algorithm,
+                                                           query: s.query,
+                                                           model_start: s.model_start,
+                                                           model_end: s.model_end,
+                                                           max_size: s.max_size,
+                                                           aggregator: s.aggregator,
+                                                           measures: s.measures)
+              Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators],
+                                                      topk: self.opts[:topk],
+                                                      unique_consequents: self.opts[:unique_consequents],
+                                                      expected_outcome: s.expected_outcome,
+                                                      measure_combination: s.measures)
+              # build json line by merging hashes
+              $stdout.puts s.to_h
+                             .merge(scenario_stats)
+                             .merge({topk: self.opts[:topk], date: tx.date})
+                             .merge(Evoc::RecommendationCache.to_h(measures: s.measures))
+                             .to_json
+            rescue ArgumentError => e
+              invalid_configuration += 1
+              last_error = e.message
+            end
           end
           current_scenario += 1
         end

data/lib/evoc/recommendation_cache.rb CHANGED

@@ -12,14 +12,22 @@ module Evoc
             attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
         end
-        def self.recommendation_cached?(algorithm:, query:, model_start:, model_end:, max_size: nil)
+        def self.recommendation_cached?(algorithm:,
+                                    query:,
+                                    model_start:,
+                                    model_end:,
+                                    max_size: nil)
             return self.tag == [algorithm,query,model_start,model_end,max_size].hash
         end
-        ##
-        # @param scenario <Evoc::Scenario> the scenario to cache a new recommendation for
-        def self.get_recommendation(algorithm:, query:, model_start:, model_end:, max_size: nil, aggregator: nil, measures: [])
+        def self.get_recommendation(algorithm:,
+                                    query:,
+                                    model_start:,
+                                    model_end:,
+                                    max_size: nil,
+                                    aggregator: nil,
+                                    measures: [])
           # check if a new base recommendation needs to be generated
             tag = [algorithm,query,model_start,model_end,max_size].hash
             if self.tag != tag

data/lib/evoc/scenario.rb CHANGED

@@ -16,6 +16,7 @@ module Evoc
                   :model_size,
                   :model_age,
                   :max_size,
+                  :stats,
                   :opts
     def initialize(opts = Hash.new)
@@ -23,7 +24,7 @@ module Evoc
       self.opts        = opts
       self.scenario_id = opts.hash
-      # model_size depends on model_age, so set model_age first
+      self.tx_id = opts[:tx_id]
       self.model_age = opts[:model_age]
       opts.each do |attribute,value|
         self.send("#{attribute}=", value)
@@ -55,11 +56,17 @@ module Evoc
     def to_h
-      fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
+      fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures stats)
       hash = Hash.new
       fields.each do |key|
         value = self.method(key).call
-        hash[key] = value.is_a?(Array) ? value.join(',') : value
+        if value.is_a?(Array)
+          hash[key] = value.join(',')
+        elsif value.is_a?(Hash)
+          hash.merge!(value)
+        else
+          hash[key] = value
+        end
       end
       return hash
     end
@@ -228,5 +235,65 @@ module Evoc
     def tx_size
       self.tx.size
     end
+    def stats
+      time_start = Time.now
+      history = Evoc::HistoryStore.get_history(self.model_start,self.model_end,self.max_size)
+      relevant_transactions = Set.new
+      relevant_items = Set.new
+      relevant_ages = []
+      avg_age_of_relevant_transactions = 0
+      avg_size_of_relevant_transactions = 0
+      files_changed = Set.new
+      num_methods_changed = 0
+      num_new_items = 0
+      # @avg_size_of_relevant_transactions = 0
+      # mean_age_of_relevant
+      # media_age_of_relevant
+      # ratio_new_items
+      self.query.each do |item|
+        if change = history.int_2_name[item]
+          change = change.split(':')
+          files_changed << change[0]
+          if change.size > 1
+            num_methods_changed = num_methods_changed + 1
+          end
+          indexes_of_previous_changes = history.transactions_of(item, identifier: :index)
+          if new_item = (indexes_of_previous_changes.size == 0)
+            num_new_items = num_new_items + 1
+          else
+            indexes_of_previous_changes.each do |tx_index|
+              relevant_so_far = relevant_transactions.size
+              relevant_transactions << tx_index
+              new_relevant = (relevant_transactions.size > relevant_so_far)
+              if new_relevant
+                age = (self.tx_index - tx_index)
+                relevant_ages << age
+                tx = history.get_tx(id: tx_index,id_type: :index)
+                avg_size_of_relevant_transactions = avg_size_of_relevant_transactions + tx.size
+                avg_age_of_relevant_transactions = avg_age_of_relevant_transactions + age
+              end
+            end
+          end
+        else
+          num_new_items = num_new_items + 1
+        end
+      end
+      num_relevant_transactions = relevant_transactions.size
+      time_end = Time.now
+      time_generate_stats = TimeDifference.between(time_start,time_end).in_seconds.round(8)
+      {time_generate_stats: time_generate_stats,
+       num_files_changed: files_changed.size,
+       num_methods_changed: num_methods_changed,
+       num_new_items: num_new_items,
+       num_relevant_transactions: num_relevant_transactions,
+       median_age_of_relevant_transactions: relevant_ages.median,
+       avg_age_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_age_of_relevant_transactions/num_relevant_transactions).to_f,
+       avg_size_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_size_of_relevant_transactions/num_relevant_transactions).to_f}
+    end
   end
 end

data/lib/evoc/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Evoc
-  VERSION = "3.9.1"
+  VERSION = "3.10.0"
 end

data/lib/evoc_cli/analyze.rb CHANGED

@@ -6,6 +6,17 @@ module EvocCLI
     class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
     class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
+    desc "evolution","Outputs where the items of all transactions previously changed."
+    def evolution
+      a = Evoc::Analyze.new(options)
+      a.evolution
+    end
+    desc "commits","Outputs the items touched so far and the moving average of items per transactions for every transaction in the history"
+    def commits
+      a = Evoc::Analyze.new(options)
+      a.commits
+    end
     method_option :number, aliases: '-n', type: :numeric, default: 1000, desc: "The number of rules to calculate measures for"
     desc "measure_values","Empirically investigate the range of interestingness measures"

data/lib/evoc_cli/experiment.rb CHANGED

@@ -19,6 +19,7 @@ module EvocCLI
     ##
     # sample_transactions
+    method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
     method_option :sample_groups, aliases: "-g", type: :array, required: true, desc: "What tx size groups to sample from"
     method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
     method_option :recent, type: :numeric, desc: "Filter to the X most recent transactions"
@@ -38,6 +39,7 @@ module EvocCLI
     ##
     # generate_queries
     #
+    method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
     method_option :transaction_ids_path, :aliases => '-i', :type => :string, :required => true, :desc => "Path to file with \\n separated list of transaction ids"
     method_option :random_select, type: :boolean, default: false, desc: "Randomly select a query from each given transaction"
     method_option :select, aliases: '-s', type: :array, default: [],
@@ -46,6 +48,7 @@ module EvocCLI
       desc: "Reverse version of --select (select \"all but\" X)"
     method_option :percentage, aliases: '-e', type: :array,
       desc: "Percentage of items to select for each query"
+    method_option :minimum_query_size, type: :numeric, default: 1, desc: "Only sample queries of at least this size"
     method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
     method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
     method_option :write_dict, type: :string, desc: "Write an item dictionary to the provided file"
@@ -75,6 +78,7 @@ module EvocCLI
     method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
     method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
     method_option :topk, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER any consequent filter"
+    method_option :stats, type: :boolean, required: false, desc: "Generate extra stats describing each scenario"
     desc "execute_scenarios [options]",""
     def execute_scenarios
       if !options[:permutation].nil?

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: evoc
 version: !ruby/object:Gem::Version
-  version: 3.9.1
+  version: 3.10.0
 platform: ruby
 authors:
 - Thomas Rolfsnes
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-03-09 00:00:00.000000000 Z
+date: 2017-03-30 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler