evoc 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Makefile +4 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/evoc +3 -0
- data/bin/setup +7 -0
- data/evoc.gemspec +30 -0
- data/lib/evoc/algorithm.rb +147 -0
- data/lib/evoc/algorithms/top_k.rb +86 -0
- data/lib/evoc/analyze.rb +395 -0
- data/lib/evoc/array.rb +43 -0
- data/lib/evoc/evaluate.rb +109 -0
- data/lib/evoc/exceptions/aggregation_error.rb +6 -0
- data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
- data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
- data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_result.rb +6 -0
- data/lib/evoc/exceptions/non_finite.rb +8 -0
- data/lib/evoc/exceptions/non_numeric.rb +8 -0
- data/lib/evoc/exceptions/not_a_query.rb +6 -0
- data/lib/evoc/exceptions/not_a_result.rb +6 -0
- data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
- data/lib/evoc/exceptions/not_initialized.rb +6 -0
- data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
- data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
- data/lib/evoc/experiment.rb +239 -0
- data/lib/evoc/hash.rb +56 -0
- data/lib/evoc/history_store.rb +53 -0
- data/lib/evoc/hyper_rule.rb +53 -0
- data/lib/evoc/interestingness_measure.rb +77 -0
- data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
- data/lib/evoc/interestingness_measures.rb +882 -0
- data/lib/evoc/logger.rb +34 -0
- data/lib/evoc/memory_profiler.rb +43 -0
- data/lib/evoc/recommendation_cache.rb +152 -0
- data/lib/evoc/rule.rb +32 -0
- data/lib/evoc/rule_store.rb +340 -0
- data/lib/evoc/scenario.rb +303 -0
- data/lib/evoc/svd.rb +124 -0
- data/lib/evoc/tx.rb +34 -0
- data/lib/evoc/tx_store.rb +379 -0
- data/lib/evoc/version.rb +3 -0
- data/lib/evoc.rb +4 -0
- data/lib/evoc_cli/analyze.rb +198 -0
- data/lib/evoc_cli/cli_helper.rb +1 -0
- data/lib/evoc_cli/experiment.rb +78 -0
- data/lib/evoc_cli/info.rb +22 -0
- data/lib/evoc_cli/main.rb +29 -0
- data/lib/evoc_cli/util.rb +36 -0
- data/lib/evoc_helper.rb +40 -0
- data/mem_profiler/Gemfile.lock +39 -0
- data/mem_profiler/README.md +126 -0
- data/mem_profiler/createdb.rb +4 -0
- data/mem_profiler/db.rb +82 -0
- data/mem_profiler/gemfile +6 -0
- data/mem_profiler/gencsv.rb +64 -0
- data/mem_profiler/genimport.sh +8 -0
- data/mem_profiler/graph.rb +91 -0
- metadata +251 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: dba4bd171ddbca345a90203454afd6285122db56
|
4
|
+
data.tar.gz: 987093521af4703fcc540f665a70125fe9e39716
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9d3c6d229df7deb55a7bff7afea8dd953e656e4382be8fa6e5a16f4248996fc6401f51ba962466e090742e6c07039b92f7675ca10cf709b79a212e51b824a011
|
7
|
+
data.tar.gz: e1bf42491834eab8d90eb5ed9cac1af0e54f30f74bdaa70c2f518d2eea19e42d23d9ee231767da10a3795038c4549bfe200b6f94f32d9625d369493a83b2d7c0
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Thomas Rolfsnes
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/Makefile
ADDED
data/README.md
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
# Evoc
|
2
|
+
|
3
|
+
Evoc is now using [semantic versioning](http://semver.org/)
|
4
|
+
|
5
|
+
## Instructions for installing Ruby
|
6
|
+
|
7
|
+
1. install ruby version manager (rvm)
|
8
|
+
|
9
|
+
```
|
10
|
+
gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3
|
11
|
+
curl -sSL https://get.rvm.io | bash -s stable
|
12
|
+
source /home/ubuntu/.rvm/scripts/rvm
|
13
|
+
```
|
14
|
+
|
15
|
+
2. install ruby
|
16
|
+
|
17
|
+
```
|
18
|
+
rvm install ruby-2.2.0
|
19
|
+
rvm --default use ruby-2.2.0
|
20
|
+
```
|
21
|
+
|
22
|
+
## Instructions for installing Evoc
|
23
|
+
|
24
|
+
1. clone the evoc repo
|
25
|
+
|
26
|
+
```
|
27
|
+
git clone git@bitbucket.org:evolveit/evoc.git
|
28
|
+
```
|
29
|
+
|
30
|
+
2. (linux) install atlas (used for svd, can be skipped)
|
31
|
+
|
32
|
+
```
|
33
|
+
sudo apt-get install libatlas-base-dev
|
34
|
+
```
|
35
|
+
2. (mac) install gcc49 (used for svd, can be skipped)
|
36
|
+
|
37
|
+
```
|
38
|
+
brew install gcc49
|
39
|
+
```
|
40
|
+
|
41
|
+
3. install bundler
|
42
|
+
|
43
|
+
```
|
44
|
+
gem install bundler
|
45
|
+
```
|
46
|
+
|
47
|
+
4. install gem dependencies for evoc (using bundler)
|
48
|
+
|
49
|
+
```
|
50
|
+
cd 'evoc folder'
|
51
|
+
bundle install
|
52
|
+
```
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
5. install evoc
|
57
|
+
|
58
|
+
```
|
59
|
+
cd 'evoc folder'
|
60
|
+
rake install
|
61
|
+
```
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "evoc_helper"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/evoc
ADDED
data/bin/setup
ADDED
data/evoc.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'evoc/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "evoc"
|
8
|
+
spec.version = Evoc::VERSION
|
9
|
+
spec.authors = ["Thomas Rolfsnes"]
|
10
|
+
spec.email = ["mail@thomasrolfsnes.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{A collection of algorithms for doing Targeted Association Rule Mining}
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
16
|
+
spec.bindir = "bin"
|
17
|
+
spec.executables = ["evoc",]
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
21
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
22
|
+
spec.add_development_dependency "rspec"
|
23
|
+
spec.add_development_dependency "ruby-prof"
|
24
|
+
spec.add_runtime_dependency "require_all"
|
25
|
+
spec.add_runtime_dependency "thor"
|
26
|
+
spec.add_runtime_dependency "time_difference"
|
27
|
+
spec.add_runtime_dependency "ruby-progressbar"
|
28
|
+
spec.add_runtime_dependency "rubyzip"
|
29
|
+
spec.add_runtime_dependency "algorithms"
|
30
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
#file algorithm.rb
|
2
|
+
module Evoc
|
3
|
+
class Algorithm
|
4
|
+
extend Logging
|
5
|
+
|
6
|
+
def self.execute(tx_store:,query:,algorithm:)
|
7
|
+
if algorithm.nil?
|
8
|
+
raise ArgumentError.new, "Algorithm was equal to nil"
|
9
|
+
end
|
10
|
+
# tarmaq2 => call tarmaq(2)
|
11
|
+
if match = /tarmaq(?<num>\d+)/.match(algorithm)
|
12
|
+
Evoc::Algorithm.tarmaq(match[:num].to_i,tx_store:tx_store,query:query)
|
13
|
+
elsif match = /topk_(?<k>\d+)_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
|
14
|
+
Evoc::TopK.topk(k: match[:k].to_i,
|
15
|
+
min: match[:min].to_i,
|
16
|
+
max: match[:max].to_i,
|
17
|
+
tx_store: tx_store,
|
18
|
+
query: query)
|
19
|
+
elsif match = /crule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
|
20
|
+
Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
|
21
|
+
elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
|
22
|
+
Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
|
23
|
+
elsif Evoc::Algorithm.respond_to?(algorithm+'_algorithm')
|
24
|
+
Evoc::Algorithm.method(algorithm+'_algorithm').call(tx_store:tx_store,query:query)
|
25
|
+
else raise ArgumentError.new, "#{algorithm} is not an available algorithm"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Targeted Association Rule Mining implementations
|
31
|
+
#
|
32
|
+
#
|
33
|
+
|
34
|
+
def self.rule_cache
|
35
|
+
return @@rule_range_cache
|
36
|
+
end
|
37
|
+
|
38
|
+
@@rule_range_cache = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
|
39
|
+
def self.cached_rule_range(min,max,tx_store:,query:)
|
40
|
+
tag = tx_store.hash+query.hash
|
41
|
+
rule_store = Evoc::RuleStore.new(query: query)
|
42
|
+
if @@rule_range_cache[tag].empty?
|
43
|
+
@@rule_range_cache.clear
|
44
|
+
Evoc::Algorithm.rule_range(min,max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
|
45
|
+
@@rule_range_cache[tag][size] = rules
|
46
|
+
end
|
47
|
+
else
|
48
|
+
# calculate missing range
|
49
|
+
missing_range = (min..max).to_a - @@rule_range_cache[tag].keys
|
50
|
+
if !missing_range.empty?
|
51
|
+
Evoc::Algorithm.rule_range(missing_range.min,missing_range.max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
|
52
|
+
@@rule_range_cache[tag][size] = rules
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
(min..max).each do |antecedent_size|
|
57
|
+
@@rule_range_cache[tag][antecedent_size].each do |rule|
|
58
|
+
rule_store << rule
|
59
|
+
end
|
60
|
+
end
|
61
|
+
logger.debug "Algorithm (#{min},#{max}) generated #{rule_store.size} rules for query of size #{query.size}"
|
62
|
+
return rule_store
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.rule_range(min,max,tx_store:,query:)
|
66
|
+
rule_store = Evoc::RuleStore.new(query: query)
|
67
|
+
unique_rules = Set.new
|
68
|
+
# min must be equal or smaller to max
|
69
|
+
if min <= max
|
70
|
+
# can only create rules where the antecedent is equal to or smaller in size than the query
|
71
|
+
if min <= query.size
|
72
|
+
#initial filter, we consider all txes where something in the query changed
|
73
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
74
|
+
# create rules
|
75
|
+
query_changed_in.each do |tx_id|
|
76
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
77
|
+
# the tx size must be larger than the min size
|
78
|
+
# (so that we have items left for the consequent)
|
79
|
+
if (tx.size > min)
|
80
|
+
# CONSTRUCT RULES in the desired range
|
81
|
+
|
82
|
+
# 1. Find overlap
|
83
|
+
query_overlap = (query & tx.items)
|
84
|
+
consequents = (tx.items - query_overlap)
|
85
|
+
# 2. Ignore exact overlaps (no items left for consequent)
|
86
|
+
if consequents.size != 0
|
87
|
+
# 3. Construct rules for overlaps that are at least min in size
|
88
|
+
if query_overlap.size >= min
|
89
|
+
# 4. Generate all permutations of query_overlap in the desired range
|
90
|
+
antecedents = (min..max).map {|size| query_overlap.combination(size).to_a}.flatten(1)
|
91
|
+
# 5. Add one rule for each antecedent/consequent combination
|
92
|
+
antecedents.each do |antecedent|
|
93
|
+
consequents.each do |consequent|
|
94
|
+
unique_rules << [antecedent,[consequent]]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
unique_rules.each {|lhs,rhs| rule_store << Evoc::Rule.new(lhs: lhs,rhs: rhs,tx_store:tx_store)}
|
104
|
+
return rule_store
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
##
|
109
|
+
# TARMAQ
|
110
|
+
# find largest subsets in @query with evidence in @tx_store version
|
111
|
+
def self.tarmaq(depth,tx_store:,query:)
|
112
|
+
largest_match = 0
|
113
|
+
#initial filter, we consider all txes where something in the query changed
|
114
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
115
|
+
# now find what subsets of the query changed in each tx
|
116
|
+
rules = Hash.new
|
117
|
+
query_changed_in.each do |tx_id|
|
118
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
119
|
+
largest_match_in_query = (query & tx.items)
|
120
|
+
match_size = largest_match_in_query.size
|
121
|
+
remainder_in_tx = tx.items - largest_match_in_query
|
122
|
+
if remainder_in_tx.size > 0
|
123
|
+
if match_size > largest_match
|
124
|
+
largest_match = match_size
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
# now generate rules
|
129
|
+
max = largest_match
|
130
|
+
min = ((max - depth) <= 0) ? 1 : max - depth
|
131
|
+
self.cached_rule_range(min,max,tx_store: tx_store,query: query)
|
132
|
+
end
|
133
|
+
|
134
|
+
###
|
135
|
+
## rose
|
136
|
+
###
|
137
|
+
def self.rose_algorithm(tx_store:,query:)
|
138
|
+
qs = query.size
|
139
|
+
self.cached_rule_range(qs,qs,tx_store: tx_store, query: query)
|
140
|
+
end
|
141
|
+
|
142
|
+
def self.co_change_algorithm(tx_store:, query:)
|
143
|
+
self.cached_rule_range(1,1,tx_store: tx_store, query: query)
|
144
|
+
end
|
145
|
+
|
146
|
+
end # Algorithm
|
147
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Evoc
|
2
|
+
class TopK
|
3
|
+
|
4
|
+
def self.topk(k:,min:,max:,tx_store:,query:)
|
5
|
+
@@k = k
|
6
|
+
@@top_k_rules = Containers::CRBTreeMap.new
|
7
|
+
@@expansion_candidates = Containers::CRBTreeMap.new
|
8
|
+
@@min_support = 0
|
9
|
+
# start with all subsets of size @min in @query
|
10
|
+
query.combination(min).each do |antecedent|
|
11
|
+
# check if this antecedent has min_support > 0
|
12
|
+
changed_in = tx_store.transactions_of_list(antecedent, strict: true)
|
13
|
+
if changed_in.size > 0
|
14
|
+
consequents = Set.new
|
15
|
+
# find all consequents for this antecedent
|
16
|
+
changed_in.each do |tx_id|
|
17
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
18
|
+
# the tx size must be larger than the antecedent size
|
19
|
+
# (so that we have items left for the consequent)
|
20
|
+
if (tx.size > antecedent.size)
|
21
|
+
(tx.items - query).each {|c| consequents << c}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
consequents.each do |consequent|
|
25
|
+
r = Evoc::Rule.new(lhs:antecedent,rhs: [consequent], tx_store: tx_store)
|
26
|
+
if r.m_support.value.to_f >= @@min_support
|
27
|
+
self.save_rule(r)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# now expand antecedents
|
34
|
+
while !@@expansion_candidates.empty? && (@@expansion_candidates.min_key.to_f >= @@min_support)
|
35
|
+
max_candidate = @@expansion_candidates.delete_max
|
36
|
+
#puts "expanding #{max_candidate.name} | #{max_candidate.m_support.value}"
|
37
|
+
expansions = query - max_candidate.lhs
|
38
|
+
# don't make antecedents larger than @max
|
39
|
+
#if (most_frequent_rule.lhs.size < max) & !expansions.empty?
|
40
|
+
if (max_candidate.lhs.size < max)
|
41
|
+
self.expand(max_candidate,expansions,tx_store)
|
42
|
+
end
|
43
|
+
# remove all rules with sup < min_sup from candidates
|
44
|
+
while !@@expansion_candidates.empty? && (@@expansion_candidates.min_key.to_f < @@min_support)
|
45
|
+
@@expansion_candidates.delete_min
|
46
|
+
end
|
47
|
+
end
|
48
|
+
return Evoc::RuleStore.new(@@top_k_rules.map {|k,rule| rule})
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.expand(rule,expansions,tx_store)
|
52
|
+
expansions.each do |e|
|
53
|
+
# only expand rules with items lexically larger than the lhs
|
54
|
+
if rule.lhs.all? {|i| e > i}
|
55
|
+
expanded_rule = Evoc::Rule.new(lhs: rule.lhs+[e],rhs: rule.rhs,tx_store: tx_store)
|
56
|
+
if expanded_rule.m_support.value > @@min_support
|
57
|
+
self.save_rule(expanded_rule)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.save_rule(r)
|
64
|
+
key = "#{r.m_support.value.to_f}|#{r.name}"
|
65
|
+
#puts "adding rule #{key}"
|
66
|
+
@@top_k_rules.push(key,r)
|
67
|
+
@@expansion_candidates.push(key,r)
|
68
|
+
if @@top_k_rules.size >= @@k
|
69
|
+
##puts "K is #{@@k} rules in #{@@top_k_rules.size}"
|
70
|
+
#puts "new rule sup #{r.m_support.value.to_f} min sup is #{@@min_support}"
|
71
|
+
if r.m_support.value > @@min_support
|
72
|
+
#puts "new rule larger than min sup"
|
73
|
+
# remove min sup rules until size is K
|
74
|
+
while (@@top_k_rules.size > @@k) && (@@top_k_rules.min_key.to_f == @@min_support)
|
75
|
+
min_rule = @@top_k_rules.delete_min
|
76
|
+
#puts "ksize #{@@top_k_rules.size} after removing rule #{min_rule}"
|
77
|
+
end
|
78
|
+
@@min_support = @@top_k_rules.min_key.to_f
|
79
|
+
#puts "set min sup to #{@@min_support}"
|
80
|
+
else
|
81
|
+
#puts "#{"%f" % r.m_support.value.to_f.to_s} not larger than #{@@min_support}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|