evoc 3.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Makefile +4 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/evoc +3 -0
- data/bin/setup +7 -0
- data/evoc.gemspec +30 -0
- data/lib/evoc/algorithm.rb +147 -0
- data/lib/evoc/algorithms/top_k.rb +86 -0
- data/lib/evoc/analyze.rb +395 -0
- data/lib/evoc/array.rb +43 -0
- data/lib/evoc/evaluate.rb +109 -0
- data/lib/evoc/exceptions/aggregation_error.rb +6 -0
- data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
- data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
- data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_result.rb +6 -0
- data/lib/evoc/exceptions/non_finite.rb +8 -0
- data/lib/evoc/exceptions/non_numeric.rb +8 -0
- data/lib/evoc/exceptions/not_a_query.rb +6 -0
- data/lib/evoc/exceptions/not_a_result.rb +6 -0
- data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
- data/lib/evoc/exceptions/not_initialized.rb +6 -0
- data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
- data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
- data/lib/evoc/experiment.rb +239 -0
- data/lib/evoc/hash.rb +56 -0
- data/lib/evoc/history_store.rb +53 -0
- data/lib/evoc/hyper_rule.rb +53 -0
- data/lib/evoc/interestingness_measure.rb +77 -0
- data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
- data/lib/evoc/interestingness_measures.rb +882 -0
- data/lib/evoc/logger.rb +34 -0
- data/lib/evoc/memory_profiler.rb +43 -0
- data/lib/evoc/recommendation_cache.rb +152 -0
- data/lib/evoc/rule.rb +32 -0
- data/lib/evoc/rule_store.rb +340 -0
- data/lib/evoc/scenario.rb +303 -0
- data/lib/evoc/svd.rb +124 -0
- data/lib/evoc/tx.rb +34 -0
- data/lib/evoc/tx_store.rb +379 -0
- data/lib/evoc/version.rb +3 -0
- data/lib/evoc.rb +4 -0
- data/lib/evoc_cli/analyze.rb +198 -0
- data/lib/evoc_cli/cli_helper.rb +1 -0
- data/lib/evoc_cli/experiment.rb +78 -0
- data/lib/evoc_cli/info.rb +22 -0
- data/lib/evoc_cli/main.rb +29 -0
- data/lib/evoc_cli/util.rb +36 -0
- data/lib/evoc_helper.rb +40 -0
- data/mem_profiler/Gemfile.lock +39 -0
- data/mem_profiler/README.md +126 -0
- data/mem_profiler/createdb.rb +4 -0
- data/mem_profiler/db.rb +82 -0
- data/mem_profiler/gemfile +6 -0
- data/mem_profiler/gencsv.rb +64 -0
- data/mem_profiler/genimport.sh +8 -0
- data/mem_profiler/graph.rb +91 -0
- metadata +251 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: dba4bd171ddbca345a90203454afd6285122db56
|
4
|
+
data.tar.gz: 987093521af4703fcc540f665a70125fe9e39716
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9d3c6d229df7deb55a7bff7afea8dd953e656e4382be8fa6e5a16f4248996fc6401f51ba962466e090742e6c07039b92f7675ca10cf709b79a212e51b824a011
|
7
|
+
data.tar.gz: e1bf42491834eab8d90eb5ed9cac1af0e54f30f74bdaa70c2f518d2eea19e42d23d9ee231767da10a3795038c4549bfe200b6f94f32d9625d369493a83b2d7c0
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Thomas Rolfsnes
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/Makefile
ADDED
data/README.md
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
# Evoc
|
2
|
+
|
3
|
+
Evoc is now using [semantic versioning](http://semver.org/)
|
4
|
+
|
5
|
+
## Instructions for installing Ruby
|
6
|
+
|
7
|
+
1. install ruby version manager (rvm)
|
8
|
+
|
9
|
+
```
|
10
|
+
gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3
|
11
|
+
curl -sSL https://get.rvm.io | bash -s stable
|
12
|
+
source /home/ubuntu/.rvm/scripts/rvm
|
13
|
+
```
|
14
|
+
|
15
|
+
2. install ruby
|
16
|
+
|
17
|
+
```
|
18
|
+
rvm install ruby-2.2.0
|
19
|
+
rvm --default use ruby-2.2.0
|
20
|
+
```
|
21
|
+
|
22
|
+
## Instructions for installing Evoc
|
23
|
+
|
24
|
+
1. clone the evoc repo
|
25
|
+
|
26
|
+
```
|
27
|
+
git clone git@bitbucket.org:evolveit/evoc.git
|
28
|
+
```
|
29
|
+
|
30
|
+
2. (linux) install atlas (used for svd, can be skipped)
|
31
|
+
|
32
|
+
```
|
33
|
+
sudo apt-get install libatlas-base-dev
|
34
|
+
```
|
35
|
+
2. (mac) install gcc49 (used for svd, can be skipped)
|
36
|
+
|
37
|
+
```
|
38
|
+
brew install gcc49
|
39
|
+
```
|
40
|
+
|
41
|
+
3. install bundler
|
42
|
+
|
43
|
+
```
|
44
|
+
gem install bundler
|
45
|
+
```
|
46
|
+
|
47
|
+
4. install gem dependencies for evoc (using bundler)
|
48
|
+
|
49
|
+
```
|
50
|
+
cd 'evoc folder'
|
51
|
+
bundle install
|
52
|
+
```
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
5. install evoc
|
57
|
+
|
58
|
+
```
|
59
|
+
cd 'evoc folder'
|
60
|
+
rake install
|
61
|
+
```
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "evoc_helper"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/evoc
ADDED
data/bin/setup
ADDED
data/evoc.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'evoc/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "evoc"
|
8
|
+
spec.version = Evoc::VERSION
|
9
|
+
spec.authors = ["Thomas Rolfsnes"]
|
10
|
+
spec.email = ["mail@thomasrolfsnes.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{A collection of algorithms for doing Targeted Association Rule Mining}
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
16
|
+
spec.bindir = "bin"
|
17
|
+
spec.executables = ["evoc",]
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
21
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
22
|
+
spec.add_development_dependency "rspec"
|
23
|
+
spec.add_development_dependency "ruby-prof"
|
24
|
+
spec.add_runtime_dependency "require_all"
|
25
|
+
spec.add_runtime_dependency "thor"
|
26
|
+
spec.add_runtime_dependency "time_difference"
|
27
|
+
spec.add_runtime_dependency "ruby-progressbar"
|
28
|
+
spec.add_runtime_dependency "rubyzip"
|
29
|
+
spec.add_runtime_dependency "algorithms"
|
30
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
#file algorithm.rb
|
2
|
+
module Evoc
|
3
|
+
class Algorithm
|
4
|
+
extend Logging
|
5
|
+
|
6
|
+
def self.execute(tx_store:,query:,algorithm:)
|
7
|
+
if algorithm.nil?
|
8
|
+
raise ArgumentError.new, "Algorithm was equal to nil"
|
9
|
+
end
|
10
|
+
# tarmaq2 => call tarmaq(2)
|
11
|
+
if match = /tarmaq(?<num>\d+)/.match(algorithm)
|
12
|
+
Evoc::Algorithm.tarmaq(match[:num].to_i,tx_store:tx_store,query:query)
|
13
|
+
elsif match = /topk_(?<k>\d+)_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
|
14
|
+
Evoc::TopK.topk(k: match[:k].to_i,
|
15
|
+
min: match[:min].to_i,
|
16
|
+
max: match[:max].to_i,
|
17
|
+
tx_store: tx_store,
|
18
|
+
query: query)
|
19
|
+
elsif match = /crule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
|
20
|
+
Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
|
21
|
+
elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
|
22
|
+
Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
|
23
|
+
elsif Evoc::Algorithm.respond_to?(algorithm+'_algorithm')
|
24
|
+
Evoc::Algorithm.method(algorithm+'_algorithm').call(tx_store:tx_store,query:query)
|
25
|
+
else raise ArgumentError.new, "#{algorithm} is not an available algorithm"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Targeted Association Rule Mining implementations
|
31
|
+
#
|
32
|
+
#
|
33
|
+
|
34
|
+
def self.rule_cache
|
35
|
+
return @@rule_range_cache
|
36
|
+
end
|
37
|
+
|
38
|
+
@@rule_range_cache = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
|
39
|
+
def self.cached_rule_range(min,max,tx_store:,query:)
|
40
|
+
tag = tx_store.hash+query.hash
|
41
|
+
rule_store = Evoc::RuleStore.new(query: query)
|
42
|
+
if @@rule_range_cache[tag].empty?
|
43
|
+
@@rule_range_cache.clear
|
44
|
+
Evoc::Algorithm.rule_range(min,max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
|
45
|
+
@@rule_range_cache[tag][size] = rules
|
46
|
+
end
|
47
|
+
else
|
48
|
+
# calculate missing range
|
49
|
+
missing_range = (min..max).to_a - @@rule_range_cache[tag].keys
|
50
|
+
if !missing_range.empty?
|
51
|
+
Evoc::Algorithm.rule_range(missing_range.min,missing_range.max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
|
52
|
+
@@rule_range_cache[tag][size] = rules
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
(min..max).each do |antecedent_size|
|
57
|
+
@@rule_range_cache[tag][antecedent_size].each do |rule|
|
58
|
+
rule_store << rule
|
59
|
+
end
|
60
|
+
end
|
61
|
+
logger.debug "Algorithm (#{min},#{max}) generated #{rule_store.size} rules for query of size #{query.size}"
|
62
|
+
return rule_store
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.rule_range(min,max,tx_store:,query:)
|
66
|
+
rule_store = Evoc::RuleStore.new(query: query)
|
67
|
+
unique_rules = Set.new
|
68
|
+
# min must be equal or smaller to max
|
69
|
+
if min <= max
|
70
|
+
# can only create rules where the antecedent is equal to or smaller in size than the query
|
71
|
+
if min <= query.size
|
72
|
+
#initial filter, we consider all txes where something in the query changed
|
73
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
74
|
+
# create rules
|
75
|
+
query_changed_in.each do |tx_id|
|
76
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
77
|
+
# the tx size must be larger than the min size
|
78
|
+
# (so that we have items left for the consequent)
|
79
|
+
if (tx.size > min)
|
80
|
+
# CONSTRUCT RULES in the desired range
|
81
|
+
|
82
|
+
# 1. Find overlap
|
83
|
+
query_overlap = (query & tx.items)
|
84
|
+
consequents = (tx.items - query_overlap)
|
85
|
+
# 2. Ignore exact overlaps (no items left for consequent)
|
86
|
+
if consequents.size != 0
|
87
|
+
# 3. Construct rules for overlaps that are at least min in size
|
88
|
+
if query_overlap.size >= min
|
89
|
+
# 4. Generate all permutations of query_overlap in the desired range
|
90
|
+
antecedents = (min..max).map {|size| query_overlap.combination(size).to_a}.flatten(1)
|
91
|
+
# 5. Add one rule for each antecedent/consequent combination
|
92
|
+
antecedents.each do |antecedent|
|
93
|
+
consequents.each do |consequent|
|
94
|
+
unique_rules << [antecedent,[consequent]]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
unique_rules.each {|lhs,rhs| rule_store << Evoc::Rule.new(lhs: lhs,rhs: rhs,tx_store:tx_store)}
|
104
|
+
return rule_store
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
##
|
109
|
+
# TARMAQ
|
110
|
+
# find largest subsets in @query with evidence in @tx_store version
|
111
|
+
def self.tarmaq(depth,tx_store:,query:)
|
112
|
+
largest_match = 0
|
113
|
+
#initial filter, we consider all txes where something in the query changed
|
114
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
115
|
+
# now find what subsets of the query changed in each tx
|
116
|
+
rules = Hash.new
|
117
|
+
query_changed_in.each do |tx_id|
|
118
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
119
|
+
largest_match_in_query = (query & tx.items)
|
120
|
+
match_size = largest_match_in_query.size
|
121
|
+
remainder_in_tx = tx.items - largest_match_in_query
|
122
|
+
if remainder_in_tx.size > 0
|
123
|
+
if match_size > largest_match
|
124
|
+
largest_match = match_size
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
# now generate rules
|
129
|
+
max = largest_match
|
130
|
+
min = ((max - depth) <= 0) ? 1 : max - depth
|
131
|
+
self.cached_rule_range(min,max,tx_store: tx_store,query: query)
|
132
|
+
end
|
133
|
+
|
134
|
+
###
|
135
|
+
## rose
|
136
|
+
###
|
137
|
+
def self.rose_algorithm(tx_store:,query:)
|
138
|
+
qs = query.size
|
139
|
+
self.cached_rule_range(qs,qs,tx_store: tx_store, query: query)
|
140
|
+
end
|
141
|
+
|
142
|
+
def self.co_change_algorithm(tx_store:, query:)
|
143
|
+
self.cached_rule_range(1,1,tx_store: tx_store, query: query)
|
144
|
+
end
|
145
|
+
|
146
|
+
end # Algorithm
|
147
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Evoc
|
2
|
+
class TopK
|
3
|
+
|
4
|
+
def self.topk(k:,min:,max:,tx_store:,query:)
|
5
|
+
@@k = k
|
6
|
+
@@top_k_rules = Containers::CRBTreeMap.new
|
7
|
+
@@expansion_candidates = Containers::CRBTreeMap.new
|
8
|
+
@@min_support = 0
|
9
|
+
# start with all subsets of size @min in @query
|
10
|
+
query.combination(min).each do |antecedent|
|
11
|
+
# check if this antecedent has min_support > 0
|
12
|
+
changed_in = tx_store.transactions_of_list(antecedent, strict: true)
|
13
|
+
if changed_in.size > 0
|
14
|
+
consequents = Set.new
|
15
|
+
# find all consequents for this antecedent
|
16
|
+
changed_in.each do |tx_id|
|
17
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
18
|
+
# the tx size must be larger than the antecedent size
|
19
|
+
# (so that we have items left for the consequent)
|
20
|
+
if (tx.size > antecedent.size)
|
21
|
+
(tx.items - query).each {|c| consequents << c}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
consequents.each do |consequent|
|
25
|
+
r = Evoc::Rule.new(lhs:antecedent,rhs: [consequent], tx_store: tx_store)
|
26
|
+
if r.m_support.value.to_f >= @@min_support
|
27
|
+
self.save_rule(r)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# now expand antecedents
|
34
|
+
while !@@expansion_candidates.empty? && (@@expansion_candidates.min_key.to_f >= @@min_support)
|
35
|
+
max_candidate = @@expansion_candidates.delete_max
|
36
|
+
#puts "expanding #{max_candidate.name} | #{max_candidate.m_support.value}"
|
37
|
+
expansions = query - max_candidate.lhs
|
38
|
+
# don't make antecedents larger than @max
|
39
|
+
#if (most_frequent_rule.lhs.size < max) & !expansions.empty?
|
40
|
+
if (max_candidate.lhs.size < max)
|
41
|
+
self.expand(max_candidate,expansions,tx_store)
|
42
|
+
end
|
43
|
+
# remove all rules with sup < min_sup from candidates
|
44
|
+
while !@@expansion_candidates.empty? && (@@expansion_candidates.min_key.to_f < @@min_support)
|
45
|
+
@@expansion_candidates.delete_min
|
46
|
+
end
|
47
|
+
end
|
48
|
+
return Evoc::RuleStore.new(@@top_k_rules.map {|k,rule| rule})
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.expand(rule,expansions,tx_store)
|
52
|
+
expansions.each do |e|
|
53
|
+
# only expand rules with items lexically larger than the lhs
|
54
|
+
if rule.lhs.all? {|i| e > i}
|
55
|
+
expanded_rule = Evoc::Rule.new(lhs: rule.lhs+[e],rhs: rule.rhs,tx_store: tx_store)
|
56
|
+
if expanded_rule.m_support.value > @@min_support
|
57
|
+
self.save_rule(expanded_rule)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.save_rule(r)
|
64
|
+
key = "#{r.m_support.value.to_f}|#{r.name}"
|
65
|
+
#puts "adding rule #{key}"
|
66
|
+
@@top_k_rules.push(key,r)
|
67
|
+
@@expansion_candidates.push(key,r)
|
68
|
+
if @@top_k_rules.size >= @@k
|
69
|
+
##puts "K is #{@@k} rules in #{@@top_k_rules.size}"
|
70
|
+
#puts "new rule sup #{r.m_support.value.to_f} min sup is #{@@min_support}"
|
71
|
+
if r.m_support.value > @@min_support
|
72
|
+
#puts "new rule larger than min sup"
|
73
|
+
# remove min sup rules until size is K
|
74
|
+
while (@@top_k_rules.size > @@k) && (@@top_k_rules.min_key.to_f == @@min_support)
|
75
|
+
min_rule = @@top_k_rules.delete_min
|
76
|
+
#puts "ksize #{@@top_k_rules.size} after removing rule #{min_rule}"
|
77
|
+
end
|
78
|
+
@@min_support = @@top_k_rules.min_key.to_f
|
79
|
+
#puts "set min sup to #{@@min_support}"
|
80
|
+
else
|
81
|
+
#puts "#{"%f" % r.m_support.value.to_f.to_s} not larger than #{@@min_support}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|