evoc 3.9.1 → 3.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/evoc/algorithms/closed_rules.rb +49 -56
 - data/lib/evoc/analyze.rb +29 -0
 - data/lib/evoc/array.rb +45 -33
 - data/lib/evoc/evaluate.rb +8 -0
 - data/lib/evoc/experiment.rb +56 -35
 - data/lib/evoc/recommendation_cache.rb +12 -4
 - data/lib/evoc/scenario.rb +70 -3
 - data/lib/evoc/version.rb +1 -1
 - data/lib/evoc_cli/analyze.rb +11 -0
 - data/lib/evoc_cli/experiment.rb +4 -0
 - metadata +2 -2
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: debd6763073d247f88a9ba0b80f252b8f31c5a30
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: b21e8c6dfd037bc87ec631aff4ff998591347a10
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 66d8e086a2992bf24a50b786989e7d4a8a1cb1e170f2d94a97f0f8438860f5f31b8abdea7c301422fa83a020009f909e88e181a8391f6b4341c40737ecf7b0ea
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: c0dfb6534e75d4663284610e1c3b3198f3be91491203b4238130e23b53f1da00287f0ec9447005710d30f8fcb3dafb79ff1be286f817b7610ec574c70b8a2d3a
         
     | 
| 
         @@ -1,59 +1,51 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Evoc
         
     | 
| 
       2 
2 
     | 
    
         
             
              class ClosedRules
         
     | 
| 
       3 
     | 
    
         
            -
                extend Logging
         
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
3 
     | 
    
         
             
                def self.closed_rules(tx_store:,query:)
         
     | 
| 
       6 
4 
     | 
    
         
             
                  # @@store = tx_store
         
     | 
| 
       7 
5 
     | 
    
         
             
                  # create initial trees, one tree per consequent
         
     | 
| 
       8 
6 
     | 
    
         
             
                  tree = self.initialize_tree(tx_store,query)
         
     | 
| 
       9 
     | 
    
         
            -
                   
     | 
| 
       10 
     | 
    
         
            -
                  tree.print_tree(1,nil,lambda {|node,pre|  
     | 
| 
      
 7 
     | 
    
         
            +
                  # puts "INIT TREE:"
         
     | 
| 
      
 8 
     | 
    
         
            +
                  # tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
         
     | 
| 
       11 
9 
     | 
    
         
             
                  closed_rules = Evoc::RuleStore.new(query: query)
         
     | 
| 
       12 
     | 
    
         
            -
                   
     | 
| 
       13 
     | 
    
         
            -
                    self.extend_nodes( 
     | 
| 
      
 10 
     | 
    
         
            +
                  tree.children.each do |consequent|
         
     | 
| 
      
 11 
     | 
    
         
            +
                    self.extend_nodes(consequent).each do |frequency, closed_sets|
         
     | 
| 
       14 
12 
     | 
    
         
             
                      closed_sets.each do |closed_set| 
         
     | 
| 
       15 
13 
     | 
    
         
             
                        antecedent = closed_set - consequent.name
         
     | 
| 
       16 
14 
     | 
    
         
             
                        closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
         
     | 
| 
       17 
15 
     | 
    
         
             
                      end
         
     | 
| 
       18 
16 
     | 
    
         
             
                    end
         
     | 
| 
       19 
     | 
    
         
            -
                   
     | 
| 
      
 17 
     | 
    
         
            +
                  end
         
     | 
| 
       20 
18 
     | 
    
         
             
                  return closed_rules
         
     | 
| 
       21 
19 
     | 
    
         
             
                end
         
     | 
| 
       22 
20 
     | 
    
         | 
| 
       23 
21 
     | 
    
         
             
                private
         
     | 
| 
       24 
22 
     | 
    
         
             
                def self.initialize_tree(tx_store, query)
         
     | 
| 
       25 
     | 
    
         
            -
                  rules = Evoc::Algorithm.co_change(tx_store: tx_store, query: query)
         
     | 
| 
       26 
23 
     | 
    
         
             
                  tree = Tree::TreeNode.new([])
         
     | 
| 
       27 
     | 
    
         
            -
                   
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
      
 24 
     | 
    
         
            +
                  # find all items that changed with something in the query
         
     | 
| 
      
 25 
     | 
    
         
            +
                  query_changed_in = tx_store.transactions_of_list(query)
         
     | 
| 
      
 26 
     | 
    
         
            +
                  # store all items from the query that have changed with each consequent
         
     | 
| 
      
 27 
     | 
    
         
            +
                  query_changed_in.each do |tx_id|
         
     | 
| 
      
 28 
     | 
    
         
            +
                    tx = tx_store.get_tx(id:tx_id,id_type: :index)
         
     | 
| 
      
 29 
     | 
    
         
            +
                    antecedent = (query & tx.items)
         
     | 
| 
      
 30 
     | 
    
         
            +
                    consequents = (tx.items - antecedent)
         
     | 
| 
      
 31 
     | 
    
         
            +
                    if consequents.size != 0
         
     | 
| 
      
 32 
     | 
    
         
            +
                      consequents.each do |consequent|
         
     | 
| 
      
 33 
     | 
    
         
            +
                        consequent_key = [consequent.to_s]
         
     | 
| 
      
 34 
     | 
    
         
            +
                        if tree[consequent_key].nil?
         
     | 
| 
      
 35 
     | 
    
         
            +
                          # initialize candidates
         
     | 
| 
      
 36 
     | 
    
         
            +
                          tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
         
     | 
| 
      
 37 
     | 
    
         
            +
                        end
         
     | 
| 
      
 38 
     | 
    
         
            +
                        txes_consequent = tree[consequent_key].content
         
     | 
| 
      
 39 
     | 
    
         
            +
                        antecedent.each do |item|
         
     | 
| 
      
 40 
     | 
    
         
            +
                          union = [item.to_s,consequent.to_s]
         
     | 
| 
      
 41 
     | 
    
         
            +
                          if tree[consequent_key][union].nil?
         
     | 
| 
      
 42 
     | 
    
         
            +
                            txes_union = tx_store.transactions_of(item) & txes_consequent
         
     | 
| 
      
 43 
     | 
    
         
            +
                            tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
         
     | 
| 
      
 44 
     | 
    
         
            +
                          end
         
     | 
| 
      
 45 
     | 
    
         
            +
                        end
         
     | 
| 
      
 46 
     | 
    
         
            +
                      end
         
     | 
| 
      
 47 
     | 
    
         
            +
                    end
         
     | 
| 
       31 
48 
     | 
    
         
             
                  end
         
     | 
| 
       32 
     | 
    
         
            -
                  # # find all items that changed with something in the query
         
     | 
| 
       33 
     | 
    
         
            -
                  # query_changed_in = tx_store.transactions_of_list(query)
         
     | 
| 
       34 
     | 
    
         
            -
                  # # store all items from the query that have changed with each consequent
         
     | 
| 
       35 
     | 
    
         
            -
                  # query_changed_in.each do |tx_id|
         
     | 
| 
       36 
     | 
    
         
            -
                  #   tx = tx_store.get_tx(id:tx_id,id_type: :index)
         
     | 
| 
       37 
     | 
    
         
            -
                  #   antecedent = (query & tx.items)
         
     | 
| 
       38 
     | 
    
         
            -
                  #   consequents = (tx.items - antecedent)
         
     | 
| 
       39 
     | 
    
         
            -
                  #   if consequents.size != 0
         
     | 
| 
       40 
     | 
    
         
            -
                  #     consequents.each do |consequent|
         
     | 
| 
       41 
     | 
    
         
            -
                  #       consequent_key = [consequent.to_s]
         
     | 
| 
       42 
     | 
    
         
            -
                  #       if tree[consequent_key].nil?
         
     | 
| 
       43 
     | 
    
         
            -
                  #         # initialize candidates
         
     | 
| 
       44 
     | 
    
         
            -
                  #         tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
         
     | 
| 
       45 
     | 
    
         
            -
                  #       end
         
     | 
| 
       46 
     | 
    
         
            -
                  #       txes_consequent = tree[consequent_key].content
         
     | 
| 
       47 
     | 
    
         
            -
                  #       antecedent.each do |item|
         
     | 
| 
       48 
     | 
    
         
            -
                  #         union = [item.to_s,consequent.to_s]
         
     | 
| 
       49 
     | 
    
         
            -
                  #         if tree[consequent_key][union].nil?
         
     | 
| 
       50 
     | 
    
         
            -
                  #           txes_union = tx_store.transactions_of(item) & txes_consequent
         
     | 
| 
       51 
     | 
    
         
            -
                  #           tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
         
     | 
| 
       52 
     | 
    
         
            -
                  #         end
         
     | 
| 
       53 
     | 
    
         
            -
                  #       end
         
     | 
| 
       54 
     | 
    
         
            -
                  #     end
         
     | 
| 
       55 
     | 
    
         
            -
                  #   end
         
     | 
| 
       56 
     | 
    
         
            -
                  # end
         
     | 
| 
       57 
49 
     | 
    
         
             
                  return(tree)
         
     | 
| 
       58 
50 
     | 
    
         
             
                end
         
     | 
| 
       59 
51 
     | 
    
         | 
| 
         @@ -63,7 +55,7 @@ module Evoc 
     | 
|
| 
       63 
55 
     | 
    
         
             
                    a = current_node
         
     | 
| 
       64 
56 
     | 
    
         
             
                    b = a.next_sibling
         
     | 
| 
       65 
57 
     | 
    
         
             
                    while(!b.nil?) do
         
     | 
| 
       66 
     | 
    
         
            -
                       
     | 
| 
      
 58 
     | 
    
         
            +
                      # print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
         
     | 
| 
       67 
59 
     | 
    
         
             
                      ab = a.name | b.name
         
     | 
| 
       68 
60 
     | 
    
         
             
                      a_txes = a.content
         
     | 
| 
       69 
61 
     | 
    
         
             
                      b_txes = b.content
         
     | 
| 
         @@ -75,53 +67,54 @@ module Evoc 
     | 
|
| 
       75 
67 
     | 
    
         
             
                      if ab_txes.size > 0
         
     | 
| 
       76 
68 
     | 
    
         
             
                        case self.compare(a_txes,b_txes)
         
     | 
| 
       77 
69 
     | 
    
         
             
                        when 'EQUAL'
         
     | 
| 
       78 
     | 
    
         
            -
                           
     | 
| 
       79 
     | 
    
         
            -
                           
     | 
| 
       80 
     | 
    
         
            -
                           
     | 
| 
      
 70 
     | 
    
         
            +
                          # puts "  EQUAL"
         
     | 
| 
      
 71 
     | 
    
         
            +
                          # puts "    removing #{@@store.ints2names(b.name.map(&:to_i))}"
         
     | 
| 
      
 72 
     | 
    
         
            +
                          # puts "    renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
         
     | 
| 
       81 
73 
     | 
    
         
             
                          temp = b.previous_sibling
         
     | 
| 
       82 
74 
     | 
    
         
             
                          root.remove!(b)
         
     | 
| 
       83 
75 
     | 
    
         
             
                          b = temp
         
     | 
| 
       84 
76 
     | 
    
         
             
                          a.each {|n| n.rename(ab | n.name)}
         
     | 
| 
       85 
77 
     | 
    
         
             
                        when 'A_IN_B'
         
     | 
| 
       86 
     | 
    
         
            -
                           
     | 
| 
       87 
     | 
    
         
            -
                           
     | 
| 
      
 78 
     | 
    
         
            +
                          # puts "  A in B"
         
     | 
| 
      
 79 
     | 
    
         
            +
                          # puts "    renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
         
     | 
| 
       88 
80 
     | 
    
         
             
                          a.each {|n| n.rename(ab | n.name)}
         
     | 
| 
       89 
81 
     | 
    
         
             
                        when 'B_IN_A'
         
     | 
| 
       90 
     | 
    
         
            -
                           
     | 
| 
       91 
     | 
    
         
            -
                           
     | 
| 
       92 
     | 
    
         
            -
                           
     | 
| 
      
 82 
     | 
    
         
            +
                          # puts "  B in A"
         
     | 
| 
      
 83 
     | 
    
         
            +
                          # puts "    removing #{@@store.ints2names(b.name.map(&:to_i))}"
         
     | 
| 
      
 84 
     | 
    
         
            +
                          # puts "    adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
         
     | 
| 
       93 
85 
     | 
    
         
             
                          temp = b.previous_sibling
         
     | 
| 
       94 
86 
     | 
    
         
             
                          root.remove!(b)
         
     | 
| 
       95 
87 
     | 
    
         
             
                          b = temp
         
     | 
| 
       96 
88 
     | 
    
         
             
                          a << Tree::TreeNode.new(ab,ab_txes)
         
     | 
| 
       97 
89 
     | 
    
         
             
                        when 'NOT_EQUAL'
         
     | 
| 
       98 
     | 
    
         
            -
                           
     | 
| 
       99 
     | 
    
         
            -
                           
     | 
| 
      
 90 
     | 
    
         
            +
                          # puts "  NOT EQUAL"
         
     | 
| 
      
 91 
     | 
    
         
            +
                          # puts "    adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
         
     | 
| 
       100 
92 
     | 
    
         
             
                          a << Tree::TreeNode.new(ab,ab_txes)
         
     | 
| 
       101 
93 
     | 
    
         
             
                        end
         
     | 
| 
       102 
94 
     | 
    
         
             
                      end
         
     | 
| 
       103 
     | 
    
         
            -
                       
     | 
| 
       104 
     | 
    
         
            -
                      root.print_tree(1,nil,lambda {|node,pre|  
     | 
| 
      
 95 
     | 
    
         
            +
                      # puts "NEW TREE:"
         
     | 
| 
      
 96 
     | 
    
         
            +
                      # root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
         
     | 
| 
       105 
97 
     | 
    
         
             
                      b = b.next_sibling
         
     | 
| 
       106 
     | 
    
         
            -
                       
     | 
| 
       107 
     | 
    
         
            -
                       
     | 
| 
      
 98 
     | 
    
         
            +
                      # puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
         
     | 
| 
      
 99 
     | 
    
         
            +
                      # puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
         
     | 
| 
      
 100 
     | 
    
         
            +
                      # puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
         
     | 
| 
       108 
101 
     | 
    
         
             
                    end # siblings.each
         
     | 
| 
       109 
102 
     | 
    
         
             
                    if !a.children.empty?
         
     | 
| 
       110 
     | 
    
         
            -
                       
     | 
| 
      
 103 
     | 
    
         
            +
                      # puts "TRAVERSING DOWN"
         
     | 
| 
       111 
104 
     | 
    
         
             
                      self.extend_nodes(a, closed_rules: closed_rules)
         
     | 
| 
       112 
105 
     | 
    
         
             
                    end
         
     | 
| 
       113 
106 
     | 
    
         
             
                    # add node as closed rule if not subsumed by another rule already added
         
     | 
| 
       114 
107 
     | 
    
         
             
                    rule_frequency = a.content.size
         
     | 
| 
       115 
108 
     | 
    
         
             
                    rule = a.name
         
     | 
| 
       116 
109 
     | 
    
         
             
                    if closed_rules[rule_frequency].nil?
         
     | 
| 
       117 
     | 
    
         
            -
                       
     | 
| 
      
 110 
     | 
    
         
            +
                      # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
         
     | 
| 
       118 
111 
     | 
    
         
             
                      closed_rules[rule_frequency] = [rule] 
         
     | 
| 
       119 
112 
     | 
    
         
             
                    else
         
     | 
| 
       120 
113 
     | 
    
         
             
                      if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
         
     | 
| 
       121 
     | 
    
         
            -
                         
     | 
| 
      
 114 
     | 
    
         
            +
                        # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
         
     | 
| 
       122 
115 
     | 
    
         
             
                        closed_rules[rule_frequency] << rule
         
     | 
| 
       123 
116 
     | 
    
         
             
                      else
         
     | 
| 
       124 
     | 
    
         
            -
                         
     | 
| 
      
 117 
     | 
    
         
            +
                        # puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
         
     | 
| 
       125 
118 
     | 
    
         
             
                      end
         
     | 
| 
       126 
119 
     | 
    
         
             
                    end
         
     | 
| 
       127 
120 
     | 
    
         
             
                    current_node = current_node.next_sibling
         
     | 
    
        data/lib/evoc/analyze.rb
    CHANGED
    
    | 
         @@ -28,6 +28,35 @@ module Evoc 
     | 
|
| 
       28 
28 
     | 
    
         
             
                  CSV {|row| row << results}
         
     | 
| 
       29 
29 
     | 
    
         
             
                end
         
     | 
| 
       30 
30 
     | 
    
         | 
| 
      
 31 
     | 
    
         
            +
                def evolution
         
     | 
| 
      
 32 
     | 
    
         
            +
                  CSV {|row| row << %w(index relevant_index overlap)}
         
     | 
| 
      
 33 
     | 
    
         
            +
                  self.tx_store.each do |tx|
         
     | 
| 
      
 34 
     | 
    
         
            +
                    changed_in = self.tx_store.transactions_of_list(tx.items, strict: false, identifier: :index)
         
     | 
| 
      
 35 
     | 
    
         
            +
                    previous_txes = changed_in.select {|i| i <= tx.index}
         
     | 
| 
      
 36 
     | 
    
         
            +
                    previous_txes.each do |prev_index|
         
     | 
| 
      
 37 
     | 
    
         
            +
                      prev_tx = self.tx_store.get_tx(id: prev_index,id_type: :index)
         
     | 
| 
      
 38 
     | 
    
         
            +
                      overlap = ((prev_tx.items & tx.items).size/tx.size.to_f).round(2)
         
     | 
| 
      
 39 
     | 
    
         
            +
                      CSV {|row| row << [tx.index,prev_index,overlap]}
         
     | 
| 
      
 40 
     | 
    
         
            +
                    end
         
     | 
| 
      
 41 
     | 
    
         
            +
                  end
         
     | 
| 
      
 42 
     | 
    
         
            +
                end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                def commits
         
     | 
| 
      
 45 
     | 
    
         
            +
                  unique_items = Set.new
         
     | 
| 
      
 46 
     | 
    
         
            +
                  changes_so_far = 0
         
     | 
| 
      
 47 
     | 
    
         
            +
                  self.tx_store.each do |tx|
         
     | 
| 
      
 48 
     | 
    
         
            +
                    data = Hash.new
         
     | 
| 
      
 49 
     | 
    
         
            +
                    tx.items.each {|item| unique_items << item}
         
     | 
| 
      
 50 
     | 
    
         
            +
                    changes_so_far = changes_so_far += tx.items.size
         
     | 
| 
      
 51 
     | 
    
         
            +
                    data['sha'] = tx.id
         
     | 
| 
      
 52 
     | 
    
         
            +
                    data['index'] = tx.index
         
     | 
| 
      
 53 
     | 
    
         
            +
                    data['num_changes'] = tx.items.size
         
     | 
| 
      
 54 
     | 
    
         
            +
                    data['items_touched_so_far'] = unique_items.size
         
     | 
| 
      
 55 
     | 
    
         
            +
                    data['moving_average'] = (changes_so_far/(tx.index+1)).to_f.round(2)
         
     | 
| 
      
 56 
     | 
    
         
            +
                    STDOUT.puts data.to_json
         
     | 
| 
      
 57 
     | 
    
         
            +
                  end
         
     | 
| 
      
 58 
     | 
    
         
            +
                end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
       31 
60 
     | 
    
         
             
                def avg_method_changes_per_parsable_file
         
     | 
| 
       32 
61 
     | 
    
         
             
                  parsable_files_changed = 0
         
     | 
| 
       33 
62 
     | 
    
         
             
                  method_changes = 0
         
     | 
    
        data/lib/evoc/array.rb
    CHANGED
    
    | 
         @@ -1,37 +1,49 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            class Array
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              def mean
         
     | 
| 
      
 4 
     | 
    
         
            +
                self.inject(0) { |sum, x| sum += x } / self.size.to_f
         
     | 
| 
      
 5 
     | 
    
         
            +
              end
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
              def median(already_sorted=false)
         
     | 
| 
      
 8 
     | 
    
         
            +
                return nil if self.empty?
         
     | 
| 
      
 9 
     | 
    
         
            +
                array = (already_sorted ? self : self.sort)
         
     | 
| 
      
 10 
     | 
    
         
            +
                m_pos = array.size / 2
         
     | 
| 
      
 11 
     | 
    
         
            +
                return array.size % 2 == 1 ? array[m_pos] : array[m_pos-1..m_pos].mean
         
     | 
| 
      
 12 
     | 
    
         
            +
              end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              def subset?(other)
         
     | 
| 
      
 15 
     | 
    
         
            +
                self & other == self
         
     | 
| 
      
 16 
     | 
    
         
            +
              end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
              def include_any?(other)
         
     | 
| 
      
 19 
     | 
    
         
            +
                (self & other).size > 0
         
     | 
| 
      
 20 
     | 
    
         
            +
              end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              ##
         
     | 
| 
      
 23 
     | 
    
         
            +
              # returns the union of an array of arraya
         
     | 
| 
      
 24 
     | 
    
         
            +
              def array_union
         
     | 
| 
      
 25 
     | 
    
         
            +
                if union = self.inject(:|)
         
     | 
| 
      
 26 
     | 
    
         
            +
                  return union
         
     | 
| 
      
 27 
     | 
    
         
            +
                else
         
     | 
| 
      
 28 
     | 
    
         
            +
                  return []
         
     | 
| 
      
 29 
     | 
    
         
            +
                end
         
     | 
| 
      
 30 
     | 
    
         
            +
              end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
              ##
         
     | 
| 
      
 33 
     | 
    
         
            +
              # returns the intersection of a list of lists
         
     | 
| 
      
 34 
     | 
    
         
            +
              def array_intersection
         
     | 
| 
      
 35 
     | 
    
         
            +
                if intersection = self.inject(:&)
         
     | 
| 
      
 36 
     | 
    
         
            +
                  return intersection
         
     | 
| 
      
 37 
     | 
    
         
            +
                else
         
     | 
| 
      
 38 
     | 
    
         
            +
                  return []
         
     | 
| 
      
 39 
     | 
    
         
            +
                end
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
              ##
         
     | 
| 
      
 43 
     | 
    
         
            +
              # returns the list of items in self that was not in other
         
     | 
| 
      
 44 
     | 
    
         
            +
              def array_difference(other)
         
     | 
| 
      
 45 
     | 
    
         
            +
                self.map {|a| a - other}.array_union
         
     | 
| 
      
 46 
     | 
    
         
            +
              end
         
     | 
| 
       35 
47 
     | 
    
         | 
| 
       36 
48 
     | 
    
         
             
              def self.powerset(set)
         
     | 
| 
       37 
49 
     | 
    
         
             
                return [set] if set.empty?
         
     | 
    
        data/lib/evoc/evaluate.rb
    CHANGED
    
    | 
         @@ -11,6 +11,14 @@ module Evoc 
     | 
|
| 
       11 
11 
     | 
    
         
             
                  end
         
     | 
| 
       12 
12 
     | 
    
         
             
                end
         
     | 
| 
       13 
13 
     | 
    
         | 
| 
      
 14 
     | 
    
         
            +
                def self.mean_support(rules:)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  if rules.empty? then return nil end
         
     | 
| 
      
 16 
     | 
    
         
            +
                  return (rules.inject(0) {|sum,r| sum + r.m_support.value}/rules.size).to_f
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                def self.mean_support10(rules:)
         
     | 
| 
      
 20 
     | 
    
         
            +
                  return self.mean_support(rules: Evoc::RuleStore.sort_on(rules: rules,measures: ['m_support']).take(10).flatten.take(10))
         
     | 
| 
      
 21 
     | 
    
         
            +
                end
         
     | 
| 
       14 
22 
     | 
    
         | 
| 
       15 
23 
     | 
    
         
             
                def self.mean_confidence(rules:)
         
     | 
| 
       16 
24 
     | 
    
         
             
                  if rules.empty? then return nil end
         
     | 
    
        data/lib/evoc/experiment.rb
    CHANGED
    
    | 
         @@ -17,6 +17,8 @@ module Evoc 
     | 
|
| 
       17 
17 
     | 
    
         
             
                end
         
     | 
| 
       18 
18 
     | 
    
         | 
| 
       19 
19 
     | 
    
         
             
                def sample_transactions
         
     | 
| 
      
 20 
     | 
    
         
            +
                  # initialze a random number generator with fixed seed
         
     | 
| 
      
 21 
     | 
    
         
            +
                  rand = Random.new(self.opts[:seed])
         
     | 
| 
       20 
22 
     | 
    
         
             
                  # by default we can sample from the whole history
         
     | 
| 
       21 
23 
     | 
    
         
             
                  sampling_history = Evoc::HistoryStore.base_history
         
     | 
| 
       22 
24 
     | 
    
         
             
                  STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
         
     | 
| 
         @@ -74,7 +76,7 @@ module Evoc 
     | 
|
| 
       74 
76 
     | 
    
         
             
                  tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
         
     | 
| 
       75 
77 
     | 
    
         
             
                  tx_sizes_to_sample_from.each do |group_size|
         
     | 
| 
       76 
78 
     | 
    
         
             
                    if group_size == '*'
         
     | 
| 
       77 
     | 
    
         
            -
                      sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size])
         
     | 
| 
      
 79 
     | 
    
         
            +
                      sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size], random: rand)
         
     | 
| 
       78 
80 
     | 
    
         
             
                      sample << sampled_ids
         
     | 
| 
       79 
81 
     | 
    
         
             
                      STDERR.puts "Sampled #{sampled_ids.size} txes"
         
     | 
| 
       80 
82 
     | 
    
         
             
                      # remove sampled txes from sampling_history
         
     | 
| 
         @@ -87,7 +89,7 @@ module Evoc 
     | 
|
| 
       87 
89 
     | 
    
         
             
                        if group.size < self.opts[:sample_size]
         
     | 
| 
       88 
90 
     | 
    
         
             
                          logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
         
     | 
| 
       89 
91 
     | 
    
         
             
                        end
         
     | 
| 
       90 
     | 
    
         
            -
                        sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
         
     | 
| 
      
 92 
     | 
    
         
            +
                        sampled_ids = group.sample(self.opts[:sample_size], random: rand).map(&:id)
         
     | 
| 
       91 
93 
     | 
    
         
             
                        sample << sampled_ids
         
     | 
| 
       92 
94 
     | 
    
         
             
                        STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
         
     | 
| 
       93 
95 
     | 
    
         
             
                      else
         
     | 
| 
         @@ -109,6 +111,8 @@ module Evoc 
     | 
|
| 
       109 
111 
     | 
    
         
             
                # tx_id, query
         
     | 
| 
       110 
112 
     | 
    
         
             
                #
         
     | 
| 
       111 
113 
     | 
    
         
             
                def generate_queries
         
     | 
| 
      
 114 
     | 
    
         
            +
                  # initialze a random number generator with fixed seed
         
     | 
| 
      
 115 
     | 
    
         
            +
                  rand = Random.new(self.opts[:seed])
         
     | 
| 
       112 
116 
     | 
    
         
             
                  ##
         
     | 
| 
       113 
117 
     | 
    
         
             
                  # write dict
         
     | 
| 
       114 
118 
     | 
    
         
             
                  ##
         
     | 
| 
         @@ -154,15 +158,15 @@ module Evoc 
     | 
|
| 
       154 
158 
     | 
    
         
             
                      specified_sizes.uniq!
         
     | 
| 
       155 
159 
     | 
    
         | 
| 
       156 
160 
     | 
    
         
             
                      random_sizes = []
         
     | 
| 
       157 
     | 
    
         
            -
                      if self.opts[:random_select] then random_sizes << Random.new.rand( 
     | 
| 
      
 161 
     | 
    
         
            +
                      if self.opts[:random_select] then random_sizes << Random.new.rand(self.opts[:minimum_query_size]..(tx_size-1)) end
         
     | 
| 
       158 
162 
     | 
    
         | 
| 
       159 
163 
     | 
    
         
             
                      sampled_queries = []
         
     | 
| 
       160 
164 
     | 
    
         
             
                      # only specified sizes
         
     | 
| 
       161 
165 
     | 
    
         
             
                      if random_sizes.empty? & !specified_sizes.empty?
         
     | 
| 
       162 
     | 
    
         
            -
                        sampled_queries = specified_sizes.map {|s| items.sample(s)}
         
     | 
| 
      
 166 
     | 
    
         
            +
                        sampled_queries = specified_sizes.map {|s| items.sample(s, random: rand)}
         
     | 
| 
       163 
167 
     | 
    
         
             
                      # only random sizes
         
     | 
| 
       164 
168 
     | 
    
         
             
                      elsif !random_sizes.empty? & specified_sizes.empty?
         
     | 
| 
       165 
     | 
    
         
            -
                        sampled_queries = random_sizes.map {|s| items.sample(s)}
         
     | 
| 
      
 169 
     | 
    
         
            +
                        sampled_queries = random_sizes.map {|s| items.sample(s, random: rand)}
         
     | 
| 
       166 
170 
     | 
    
         
             
                      # random + specified = randomly sample in range defined by specified
         
     | 
| 
       167 
171 
     | 
    
         
             
                      # ex:
         
     | 
| 
       168 
172 
     | 
    
         
             
                      # specified = [1,3,10,20]
         
     | 
| 
         @@ -172,9 +176,9 @@ module Evoc 
     | 
|
| 
       172 
176 
     | 
    
         
             
                      # 2. randomly select X in specified = Y
         
     | 
| 
       173 
177 
     | 
    
         
             
                      # 3. randomly select Y in tx
         
     | 
| 
       174 
178 
     | 
    
         
             
                      elsif !random_sizes.empty? & !specified_sizes.empty?
         
     | 
| 
       175 
     | 
    
         
            -
                        specified_sizes.select! {|s| (s < tx_size) & (s  
     | 
| 
       176 
     | 
    
         
            -
                        if randomly_sampled_size = specified_sizes.sample #2.
         
     | 
| 
       177 
     | 
    
         
            -
                          sampled_queries = [items.sample(randomly_sampled_size)] #3.
         
     | 
| 
      
 179 
     | 
    
         
            +
                        specified_sizes.select! {|s| (s < tx_size) & (s >= self.opts[:minimum_query_size])} #1.
         
     | 
| 
      
 180 
     | 
    
         
            +
                        if randomly_sampled_size = specified_sizes.sample(random: rand) #2.
         
     | 
| 
      
 181 
     | 
    
         
            +
                          sampled_queries = [items.sample(randomly_sampled_size, random: rand)] #3.
         
     | 
| 
       178 
182 
     | 
    
         
             
                        end
         
     | 
| 
       179 
183 
     | 
    
         
             
                      end
         
     | 
| 
       180 
184 
     | 
    
         | 
| 
         @@ -189,6 +193,9 @@ module Evoc 
     | 
|
| 
       189 
193 
     | 
    
         
             
                          logger.debug "The size of the sampled query was equal to the size of the transaction, skipping.. Tx ID: #{tx_id}. Query size: #{query.size}"
         
     | 
| 
       190 
194 
     | 
    
         
             
                          next
         
     | 
| 
       191 
195 
     | 
    
         
             
                        end
         
     | 
| 
      
 196 
     | 
    
         
            +
                        if query.size < self.opts[:minimum_query_size]
         
     | 
| 
      
 197 
     | 
    
         
            +
                          next
         
     | 
| 
      
 198 
     | 
    
         
            +
                        end
         
     | 
| 
       192 
199 
     | 
    
         
             
                        CSV {|row| row << [tx_id,query.join(',')]}
         
     | 
| 
       193 
200 
     | 
    
         
             
                      end
         
     | 
| 
       194 
201 
     | 
    
         
             
                    else
         
     | 
| 
         @@ -231,8 +238,6 @@ module Evoc 
     | 
|
| 
       231 
238 
     | 
    
         
             
                  factor_max_size = self.opts[:max_size].nil? ? nil : self.opts[:max_size].map {|s| [ 'max_size',s ]}
         
     | 
| 
       232 
239 
     | 
    
         
             
                  # Factor: Model age aka number of commits between query and last tx in history
         
     | 
| 
       233 
240 
     | 
    
         
             
                  factor_model_age = self.opts[:model_age].nil? ? nil : self.opts[:model_age].map {|s| [ 'model_age',s ]}
         
     | 
| 
       234 
     | 
    
         
            -
                  # Factor: Algorithm
         
     | 
| 
       235 
     | 
    
         
            -
                  factor_algorithms = self.opts[:algorithms].nil? ? nil : self.opts[:algorithms].map {|a| ['algorithm',a]}
         
     | 
| 
       236 
241 
     | 
    
         
             
                  # Factor: Measures
         
     | 
| 
       237 
242 
     | 
    
         
             
                  factor_measures = self.opts[:measures].map {|c| ['measures',c]}
         
     | 
| 
       238 
243 
     | 
    
         
             
                  # Factor: Aggregator
         
     | 
| 
         @@ -253,7 +258,8 @@ module Evoc 
     | 
|
| 
       253 
258 
     | 
    
         
             
                  num_lines = File.read(self.opts[:queries]).each_line.count-1
         
     | 
| 
       254 
259 
     | 
    
         
             
                  current_line = 1
         
     | 
| 
       255 
260 
     | 
    
         | 
| 
       256 
     | 
    
         
            -
                   
     | 
| 
      
 261 
     | 
    
         
            +
                  # compact removes nil values (not used factors)
         
     | 
| 
      
 262 
     | 
    
         
            +
                  factors = [factor_model_size,factor_max_size,factor_model_age,factor_measures,factor_permutation,factor_aggregators].compact
         
     | 
| 
       257 
263 
     | 
    
         
             
                  num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
         
     | 
| 
       258 
264 
     | 
    
         
             
                  invalid_configuration = 0 
         
     | 
| 
       259 
265 
     | 
    
         
             
                  last_error = 'no errors'
         
     | 
| 
         @@ -280,38 +286,53 @@ module Evoc 
     | 
|
| 
       280 
286 
     | 
    
         
             
                    end
         
     | 
| 
       281 
287 
     | 
    
         | 
| 
       282 
288 
     | 
    
         
             
                    current_scenario = 1
         
     | 
| 
       283 
     | 
    
         
            -
                    # - compact removes nil values (not used factors)
         
     | 
| 
       284 
289 
     | 
    
         
             
                    # - the splat operator '*' turns the array into parameters for #product
         
     | 
| 
       285 
290 
     | 
    
         
             
                    # - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
         
     | 
| 
       286 
291 
     | 
    
         
             
                    factors.first.product(*factors[1..-1]).each do |scenario|
         
     | 
| 
       287 
     | 
    
         
            -
                      # Print progress to stderr
         
     | 
| 
       288 
     | 
    
         
            -
                      STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
         
     | 
| 
       289 
     | 
    
         
            -
                      if invalid_configuration > 0
         
     | 
| 
       290 
     | 
    
         
            -
                        STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...)                             \r"
         
     | 
| 
       291 
     | 
    
         
            -
                      else
         
     | 
| 
       292 
     | 
    
         
            -
                        STDERR.print "                                \r"
         
     | 
| 
       293 
     | 
    
         
            -
                      end
         
     | 
| 
       294 
     | 
    
         
            -
             
     | 
| 
       295 
292 
     | 
    
         
             
                      params = query_hash.merge(scenario.to_h)
         
     | 
| 
       296 
293 
     | 
    
         
             
                      params[:case_id] = self.opts[:case_id]
         
     | 
| 
       297 
294 
     | 
    
         
             
                      params[:granularity] = self.opts[:granularity]
         
     | 
| 
       298 
295 
     | 
    
         
             
                      # initialize scenario
         
     | 
| 
       299 
296 
     | 
    
         
             
                      s =  Evoc::Scenario.new(params)
         
     | 
| 
       300 
     | 
    
         
            -
                       
     | 
| 
       301 
     | 
    
         
            -
             
     | 
| 
       302 
     | 
    
         
            -
             
     | 
| 
       303 
     | 
    
         
            -
             
     | 
| 
       304 
     | 
    
         
            -
             
     | 
| 
       305 
     | 
    
         
            -
             
     | 
| 
       306 
     | 
    
         
            -
             
     | 
| 
       307 
     | 
    
         
            -
             
     | 
| 
       308 
     | 
    
         
            -
                         
     | 
| 
       309 
     | 
    
         
            -
                         
     | 
| 
       310 
     | 
    
         
            -
             
     | 
| 
       311 
     | 
    
         
            -
                         
     | 
| 
       312 
     | 
    
         
            -
             
     | 
| 
       313 
     | 
    
         
            -
                         
     | 
| 
       314 
     | 
    
         
            -
             
     | 
| 
      
 297 
     | 
    
         
            +
                      scenario_stats = {}
         
     | 
| 
      
 298 
     | 
    
         
            +
                      if self.opts[:stats]
         
     | 
| 
      
 299 
     | 
    
         
            +
                        scenario_stats = s.stats
         
     | 
| 
      
 300 
     | 
    
         
            +
                      end
         
     | 
| 
      
 301 
     | 
    
         
            +
                      # Factor: Algorithm
         
     | 
| 
      
 302 
     | 
    
         
            +
                      self.opts[:algorithms].each do |algorithm|
         
     | 
| 
      
 303 
     | 
    
         
            +
                        s.algorithm = algorithm
         
     | 
| 
      
 304 
     | 
    
         
            +
                        # Print progress to stderr
         
     | 
| 
      
 305 
     | 
    
         
            +
                        STDERR.print "(#{self.opts[:case_id]}) Executing #{algorithm} on scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
         
     | 
| 
      
 306 
     | 
    
         
            +
                        if invalid_configuration > 0
         
     | 
| 
      
 307 
     | 
    
         
            +
                          STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...)                             \r"
         
     | 
| 
      
 308 
     | 
    
         
            +
                        else
         
     | 
| 
      
 309 
     | 
    
         
            +
                          STDERR.print "                                \r"
         
     | 
| 
      
 310 
     | 
    
         
            +
                        end
         
     | 
| 
      
 311 
     | 
    
         
            +
             
     | 
| 
      
 312 
     | 
    
         
            +
                        begin
         
     | 
| 
      
 313 
     | 
    
         
            +
                          Evoc::RecommendationCache.get_recommendation(algorithm: algorithm,
         
     | 
| 
      
 314 
     | 
    
         
            +
                                                                       query: s.query,
         
     | 
| 
      
 315 
     | 
    
         
            +
                                                                       model_start: s.model_start,
         
     | 
| 
      
 316 
     | 
    
         
            +
                                                                       model_end: s.model_end,
         
     | 
| 
      
 317 
     | 
    
         
            +
                                                                       max_size: s.max_size,
         
     | 
| 
      
 318 
     | 
    
         
            +
                                                                       aggregator: s.aggregator,
         
     | 
| 
      
 319 
     | 
    
         
            +
                                                                       measures: s.measures)
         
     | 
| 
      
 320 
     | 
    
         
            +
                          Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators],
         
     | 
| 
      
 321 
     | 
    
         
            +
                                                                  topk: self.opts[:topk],
         
     | 
| 
      
 322 
     | 
    
         
            +
                                                                  unique_consequents: self.opts[:unique_consequents],
         
     | 
| 
      
 323 
     | 
    
         
            +
                                                                  expected_outcome: s.expected_outcome,
         
     | 
| 
      
 324 
     | 
    
         
            +
                                                                  measure_combination: s.measures)
         
     | 
| 
      
 325 
     | 
    
         
            +
             
     | 
| 
      
 326 
     | 
    
         
            +
                          # build json line by merging hashes
         
     | 
| 
      
 327 
     | 
    
         
            +
                          $stdout.puts s.to_h
         
     | 
| 
      
 328 
     | 
    
         
            +
                                         .merge(scenario_stats)
         
     | 
| 
      
 329 
     | 
    
         
            +
                                         .merge({topk: self.opts[:topk], date: tx.date})
         
     | 
| 
      
 330 
     | 
    
         
            +
                                         .merge(Evoc::RecommendationCache.to_h(measures: s.measures))
         
     | 
| 
      
 331 
     | 
    
         
            +
                                         .to_json
         
     | 
| 
      
 332 
     | 
    
         
            +
                        rescue ArgumentError => e
         
     | 
| 
      
 333 
     | 
    
         
            +
                          invalid_configuration += 1
         
     | 
| 
      
 334 
     | 
    
         
            +
                          last_error = e.message
         
     | 
| 
      
 335 
     | 
    
         
            +
                        end
         
     | 
| 
       315 
336 
     | 
    
         
             
                      end
         
     | 
| 
       316 
337 
     | 
    
         
             
                      current_scenario += 1
         
     | 
| 
       317 
338 
     | 
    
         
             
                    end
         
     | 
| 
         @@ -12,14 +12,22 @@ module Evoc 
     | 
|
| 
       12 
12 
     | 
    
         
             
                        attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
         
     | 
| 
       13 
13 
     | 
    
         
             
                    end
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
       15 
     | 
    
         
            -
                    def self.recommendation_cached?(algorithm:, 
     | 
| 
      
 15 
     | 
    
         
            +
                    def self.recommendation_cached?(algorithm:,
         
     | 
| 
      
 16 
     | 
    
         
            +
                                                query:,
         
     | 
| 
      
 17 
     | 
    
         
            +
                                                model_start:,
         
     | 
| 
      
 18 
     | 
    
         
            +
                                                model_end:,
         
     | 
| 
      
 19 
     | 
    
         
            +
                                                max_size: nil)
         
     | 
| 
       16 
20 
     | 
    
         
             
                        return self.tag == [algorithm,query,model_start,model_end,max_size].hash
         
     | 
| 
       17 
21 
     | 
    
         
             
                    end
         
     | 
| 
       18 
22 
     | 
    
         | 
| 
       19 
23 
     | 
    
         | 
| 
       20 
     | 
    
         
            -
                     
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
      
 24 
     | 
    
         
            +
                    def self.get_recommendation(algorithm:,
         
     | 
| 
      
 25 
     | 
    
         
            +
                                                query:,
         
     | 
| 
      
 26 
     | 
    
         
            +
                                                model_start:,
         
     | 
| 
      
 27 
     | 
    
         
            +
                                                model_end:,
         
     | 
| 
      
 28 
     | 
    
         
            +
                                                max_size: nil,
         
     | 
| 
      
 29 
     | 
    
         
            +
                                                aggregator: nil,
         
     | 
| 
      
 30 
     | 
    
         
            +
                                                measures: [])
         
     | 
| 
       23 
31 
     | 
    
         
             
                      # check if a new base recommendation needs to be generated
         
     | 
| 
       24 
32 
     | 
    
         
             
                        tag = [algorithm,query,model_start,model_end,max_size].hash
         
     | 
| 
       25 
33 
     | 
    
         
             
                        if self.tag != tag
         
     | 
    
        data/lib/evoc/scenario.rb
    CHANGED
    
    | 
         @@ -16,6 +16,7 @@ module Evoc 
     | 
|
| 
       16 
16 
     | 
    
         
             
                              :model_size,
         
     | 
| 
       17 
17 
     | 
    
         
             
                              :model_age,
         
     | 
| 
       18 
18 
     | 
    
         
             
                              :max_size,
         
     | 
| 
      
 19 
     | 
    
         
            +
                              :stats,
         
     | 
| 
       19 
20 
     | 
    
         
             
                              :opts
         
     | 
| 
       20 
21 
     | 
    
         | 
| 
       21 
22 
     | 
    
         
             
                def initialize(opts = Hash.new)
         
     | 
| 
         @@ -23,7 +24,7 @@ module Evoc 
     | 
|
| 
       23 
24 
     | 
    
         
             
                  self.opts        = opts
         
     | 
| 
       24 
25 
     | 
    
         
             
                  self.scenario_id = opts.hash
         
     | 
| 
       25 
26 
     | 
    
         | 
| 
       26 
     | 
    
         
            -
                   
     | 
| 
      
 27 
     | 
    
         
            +
                  self.tx_id = opts[:tx_id]
         
     | 
| 
       27 
28 
     | 
    
         
             
                  self.model_age = opts[:model_age]
         
     | 
| 
       28 
29 
     | 
    
         
             
                  opts.each do |attribute,value|
         
     | 
| 
       29 
30 
     | 
    
         
             
                    self.send("#{attribute}=", value)
         
     | 
| 
         @@ -55,11 +56,17 @@ module Evoc 
     | 
|
| 
       55 
56 
     | 
    
         | 
| 
       56 
57 
     | 
    
         | 
| 
       57 
58 
     | 
    
         
             
                def to_h
         
     | 
| 
       58 
     | 
    
         
            -
                  fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
         
     | 
| 
      
 59 
     | 
    
         
            +
                  fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures stats)
         
     | 
| 
       59 
60 
     | 
    
         
             
                  hash = Hash.new
         
     | 
| 
       60 
61 
     | 
    
         
             
                  fields.each do |key|
         
     | 
| 
       61 
62 
     | 
    
         
             
                    value = self.method(key).call
         
     | 
| 
       62 
     | 
    
         
            -
                     
     | 
| 
      
 63 
     | 
    
         
            +
                    if value.is_a?(Array)
         
     | 
| 
      
 64 
     | 
    
         
            +
                      hash[key] = value.join(',')
         
     | 
| 
      
 65 
     | 
    
         
            +
                    elsif value.is_a?(Hash)
         
     | 
| 
      
 66 
     | 
    
         
            +
                      hash.merge!(value)
         
     | 
| 
      
 67 
     | 
    
         
            +
                    else
         
     | 
| 
      
 68 
     | 
    
         
            +
                      hash[key] = value
         
     | 
| 
      
 69 
     | 
    
         
            +
                    end
         
     | 
| 
       63 
70 
     | 
    
         
             
                  end
         
     | 
| 
       64 
71 
     | 
    
         
             
                  return hash
         
     | 
| 
       65 
72 
     | 
    
         
             
                end
         
     | 
| 
         @@ -228,5 +235,65 @@ module Evoc 
     | 
|
| 
       228 
235 
     | 
    
         
             
                def tx_size
         
     | 
| 
       229 
236 
     | 
    
         
             
                  self.tx.size
         
     | 
| 
       230 
237 
     | 
    
         
             
                end
         
     | 
| 
      
 238 
     | 
    
         
            +
             
     | 
| 
      
 239 
     | 
    
         
            +
                def stats
         
     | 
| 
      
 240 
     | 
    
         
            +
                  time_start = Time.now
         
     | 
| 
      
 241 
     | 
    
         
            +
                  history = Evoc::HistoryStore.get_history(self.model_start,self.model_end,self.max_size)
         
     | 
| 
      
 242 
     | 
    
         
            +
                  relevant_transactions = Set.new
         
     | 
| 
      
 243 
     | 
    
         
            +
                  relevant_items = Set.new
         
     | 
| 
      
 244 
     | 
    
         
            +
                  relevant_ages = []
         
     | 
| 
      
 245 
     | 
    
         
            +
                  avg_age_of_relevant_transactions = 0
         
     | 
| 
      
 246 
     | 
    
         
            +
                  avg_size_of_relevant_transactions = 0
         
     | 
| 
      
 247 
     | 
    
         
            +
                  files_changed = Set.new
         
     | 
| 
      
 248 
     | 
    
         
            +
                  num_methods_changed = 0
         
     | 
| 
      
 249 
     | 
    
         
            +
                  num_new_items = 0
         
     | 
| 
      
 250 
     | 
    
         
            +
                  # @avg_size_of_relevant_transactions = 0
         
     | 
| 
      
 251 
     | 
    
         
            +
                  # mean_age_of_relevant
         
     | 
| 
      
 252 
     | 
    
         
            +
                  # media_age_of_relevant
         
     | 
| 
      
 253 
     | 
    
         
            +
                  # ratio_new_items
         
     | 
| 
      
 254 
     | 
    
         
            +
                  self.query.each do |item|
         
     | 
| 
      
 255 
     | 
    
         
            +
                    if change = history.int_2_name[item]
         
     | 
| 
      
 256 
     | 
    
         
            +
                      change = change.split(':')
         
     | 
| 
      
 257 
     | 
    
         
            +
                      files_changed << change[0]
         
     | 
| 
      
 258 
     | 
    
         
            +
                      if change.size > 1
         
     | 
| 
      
 259 
     | 
    
         
            +
                        num_methods_changed = num_methods_changed + 1
         
     | 
| 
      
 260 
     | 
    
         
            +
                      end
         
     | 
| 
      
 261 
     | 
    
         
            +
                      indexes_of_previous_changes = history.transactions_of(item, identifier: :index)
         
     | 
| 
      
 262 
     | 
    
         
            +
                      if new_item = (indexes_of_previous_changes.size == 0)
         
     | 
| 
      
 263 
     | 
    
         
            +
                        num_new_items = num_new_items + 1
         
     | 
| 
      
 264 
     | 
    
         
            +
                      else
         
     | 
| 
      
 265 
     | 
    
         
            +
                        indexes_of_previous_changes.each do |tx_index|
         
     | 
| 
      
 266 
     | 
    
         
            +
                          relevant_so_far = relevant_transactions.size
         
     | 
| 
      
 267 
     | 
    
         
            +
                          relevant_transactions << tx_index
         
     | 
| 
      
 268 
     | 
    
         
            +
                          new_relevant = (relevant_transactions.size > relevant_so_far)
         
     | 
| 
      
 269 
     | 
    
         
            +
                          if new_relevant
         
     | 
| 
      
 270 
     | 
    
         
            +
                            age = (self.tx_index - tx_index)
         
     | 
| 
      
 271 
     | 
    
         
            +
                            relevant_ages << age
         
     | 
| 
      
 272 
     | 
    
         
            +
                            tx = history.get_tx(id: tx_index,id_type: :index)
         
     | 
| 
      
 273 
     | 
    
         
            +
                            avg_size_of_relevant_transactions = avg_size_of_relevant_transactions + tx.size
         
     | 
| 
      
 274 
     | 
    
         
            +
                            avg_age_of_relevant_transactions = avg_age_of_relevant_transactions + age
         
     | 
| 
      
 275 
     | 
    
         
            +
                          end
         
     | 
| 
      
 276 
     | 
    
         
            +
                        end
         
     | 
| 
      
 277 
     | 
    
         
            +
                      end
         
     | 
| 
      
 278 
     | 
    
         
            +
                    else
         
     | 
| 
      
 279 
     | 
    
         
            +
                      num_new_items = num_new_items + 1
         
     | 
| 
      
 280 
     | 
    
         
            +
                    end
         
     | 
| 
      
 281 
     | 
    
         
            +
                  end
         
     | 
| 
      
 282 
     | 
    
         
            +
             
     | 
| 
      
 283 
     | 
    
         
            +
                  num_relevant_transactions = relevant_transactions.size
         
     | 
| 
      
 284 
     | 
    
         
            +
             
     | 
| 
      
 285 
     | 
    
         
            +
                  time_end = Time.now
         
     | 
| 
      
 286 
     | 
    
         
            +
                  time_generate_stats = TimeDifference.between(time_start,time_end).in_seconds.round(8)
         
     | 
| 
      
 287 
     | 
    
         
            +
                  {time_generate_stats: time_generate_stats,
         
     | 
| 
      
 288 
     | 
    
         
            +
                   num_files_changed: files_changed.size,
         
     | 
| 
      
 289 
     | 
    
         
            +
                   num_methods_changed: num_methods_changed,
         
     | 
| 
      
 290 
     | 
    
         
            +
                   num_new_items: num_new_items,
         
     | 
| 
      
 291 
     | 
    
         
            +
                   num_relevant_transactions: num_relevant_transactions,
         
     | 
| 
      
 292 
     | 
    
         
            +
                   median_age_of_relevant_transactions: relevant_ages.median,
         
     | 
| 
      
 293 
     | 
    
         
            +
                   avg_age_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_age_of_relevant_transactions/num_relevant_transactions).to_f,
         
     | 
| 
      
 294 
     | 
    
         
            +
                   avg_size_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_size_of_relevant_transactions/num_relevant_transactions).to_f}
         
     | 
| 
      
 295 
     | 
    
         
            +
                end
         
     | 
| 
      
 296 
     | 
    
         
            +
             
     | 
| 
      
 297 
     | 
    
         
            +
             
     | 
| 
       231 
298 
     | 
    
         
             
              end
         
     | 
| 
       232 
299 
     | 
    
         
             
            end
         
     | 
    
        data/lib/evoc/version.rb
    CHANGED
    
    
    
        data/lib/evoc_cli/analyze.rb
    CHANGED
    
    | 
         @@ -6,6 +6,17 @@ module EvocCLI 
     | 
|
| 
       6 
6 
     | 
    
         
             
                class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
         
     | 
| 
       7 
7 
     | 
    
         
             
                class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
      
 9 
     | 
    
         
            +
                desc "evolution","Outputs where the items of all transactions previously changed."
         
     | 
| 
      
 10 
     | 
    
         
            +
                def evolution
         
     | 
| 
      
 11 
     | 
    
         
            +
                  a = Evoc::Analyze.new(options)
         
     | 
| 
      
 12 
     | 
    
         
            +
                  a.evolution
         
     | 
| 
      
 13 
     | 
    
         
            +
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                desc "commits","Outputs the items touched so far and the moving average of items per transactions for every transaction in the history"
         
     | 
| 
      
 16 
     | 
    
         
            +
                def commits
         
     | 
| 
      
 17 
     | 
    
         
            +
                  a = Evoc::Analyze.new(options)
         
     | 
| 
      
 18 
     | 
    
         
            +
                  a.commits
         
     | 
| 
      
 19 
     | 
    
         
            +
                end
         
     | 
| 
       9 
20 
     | 
    
         | 
| 
       10 
21 
     | 
    
         
             
                method_option :number, aliases: '-n', type: :numeric, default: 1000, desc: "The number of rules to calculate measures for"
         
     | 
| 
       11 
22 
     | 
    
         
             
                desc "measure_values","Empirically investigate the range of interestingness measures"
         
     | 
    
        data/lib/evoc_cli/experiment.rb
    CHANGED
    
    | 
         @@ -19,6 +19,7 @@ module EvocCLI 
     | 
|
| 
       19 
19 
     | 
    
         | 
| 
       20 
20 
     | 
    
         
             
                ##
         
     | 
| 
       21 
21 
     | 
    
         
             
                # sample_transactions
         
     | 
| 
      
 22 
     | 
    
         
            +
                method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
         
     | 
| 
       22 
23 
     | 
    
         
             
                method_option :sample_groups, aliases: "-g", type: :array, required: true, desc: "What tx size groups to sample from"
         
     | 
| 
       23 
24 
     | 
    
         
             
                method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
         
     | 
| 
       24 
25 
     | 
    
         
             
                method_option :recent, type: :numeric, desc: "Filter to the X most recent transactions"
         
     | 
| 
         @@ -38,6 +39,7 @@ module EvocCLI 
     | 
|
| 
       38 
39 
     | 
    
         
             
                ##
         
     | 
| 
       39 
40 
     | 
    
         
             
                # generate_queries
         
     | 
| 
       40 
41 
     | 
    
         
             
                #
         
     | 
| 
      
 42 
     | 
    
         
            +
                method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
         
     | 
| 
       41 
43 
     | 
    
         
             
                method_option :transaction_ids_path, :aliases => '-i', :type => :string, :required => true, :desc => "Path to file with \\n separated list of transaction ids"
         
     | 
| 
       42 
44 
     | 
    
         
             
                method_option :random_select, type: :boolean, default: false, desc: "Randomly select a query from each given transaction"
         
     | 
| 
       43 
45 
     | 
    
         
             
                method_option :select, aliases: '-s', type: :array, default: [],
         
     | 
| 
         @@ -46,6 +48,7 @@ module EvocCLI 
     | 
|
| 
       46 
48 
     | 
    
         
             
                  desc: "Reverse version of --select (select \"all but\" X)"
         
     | 
| 
       47 
49 
     | 
    
         
             
                method_option :percentage, aliases: '-e', type: :array,
         
     | 
| 
       48 
50 
     | 
    
         
             
                  desc: "Percentage of items to select for each query"
         
     | 
| 
      
 51 
     | 
    
         
            +
                method_option :minimum_query_size, type: :numeric, default: 1, desc: "Only sample queries of at least this size"
         
     | 
| 
       49 
52 
     | 
    
         
             
                method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
         
     | 
| 
       50 
53 
     | 
    
         
             
                method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
         
     | 
| 
       51 
54 
     | 
    
         
             
                method_option :write_dict, type: :string, desc: "Write an item dictionary to the provided file"
         
     | 
| 
         @@ -75,6 +78,7 @@ module EvocCLI 
     | 
|
| 
       75 
78 
     | 
    
         
             
                method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
         
     | 
| 
       76 
79 
     | 
    
         
             
                method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
         
     | 
| 
       77 
80 
     | 
    
         
             
                method_option :topk, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER any consequent filter"
         
     | 
| 
      
 81 
     | 
    
         
            +
                method_option :stats, type: :boolean, required: false, desc: "Generate extra stats describing each scenario"
         
     | 
| 
       78 
82 
     | 
    
         
             
                desc "execute_scenarios [options]",""
         
     | 
| 
       79 
83 
     | 
    
         
             
                def execute_scenarios
         
     | 
| 
       80 
84 
     | 
    
         
             
                  if !options[:permutation].nil?
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: evoc
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 3. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 3.10.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Thomas Rolfsnes
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2017-03- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2017-03-30 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: bundler
         
     |