evoc 3.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +4 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +21 -0
  7. data/Makefile +4 -0
  8. data/README.md +61 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/evoc +3 -0
  12. data/bin/setup +7 -0
  13. data/evoc.gemspec +30 -0
  14. data/lib/evoc/algorithm.rb +147 -0
  15. data/lib/evoc/algorithms/top_k.rb +86 -0
  16. data/lib/evoc/analyze.rb +395 -0
  17. data/lib/evoc/array.rb +43 -0
  18. data/lib/evoc/evaluate.rb +109 -0
  19. data/lib/evoc/exceptions/aggregation_error.rb +6 -0
  20. data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
  21. data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
  22. data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
  23. data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
  24. data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
  25. data/lib/evoc/exceptions/no_result.rb +6 -0
  26. data/lib/evoc/exceptions/non_finite.rb +8 -0
  27. data/lib/evoc/exceptions/non_numeric.rb +8 -0
  28. data/lib/evoc/exceptions/not_a_query.rb +6 -0
  29. data/lib/evoc/exceptions/not_a_result.rb +6 -0
  30. data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
  31. data/lib/evoc/exceptions/not_initialized.rb +6 -0
  32. data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
  33. data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
  34. data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
  35. data/lib/evoc/experiment.rb +239 -0
  36. data/lib/evoc/hash.rb +56 -0
  37. data/lib/evoc/history_store.rb +53 -0
  38. data/lib/evoc/hyper_rule.rb +53 -0
  39. data/lib/evoc/interestingness_measure.rb +77 -0
  40. data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
  41. data/lib/evoc/interestingness_measures.rb +882 -0
  42. data/lib/evoc/logger.rb +34 -0
  43. data/lib/evoc/memory_profiler.rb +43 -0
  44. data/lib/evoc/recommendation_cache.rb +152 -0
  45. data/lib/evoc/rule.rb +32 -0
  46. data/lib/evoc/rule_store.rb +340 -0
  47. data/lib/evoc/scenario.rb +303 -0
  48. data/lib/evoc/svd.rb +124 -0
  49. data/lib/evoc/tx.rb +34 -0
  50. data/lib/evoc/tx_store.rb +379 -0
  51. data/lib/evoc/version.rb +3 -0
  52. data/lib/evoc.rb +4 -0
  53. data/lib/evoc_cli/analyze.rb +198 -0
  54. data/lib/evoc_cli/cli_helper.rb +1 -0
  55. data/lib/evoc_cli/experiment.rb +78 -0
  56. data/lib/evoc_cli/info.rb +22 -0
  57. data/lib/evoc_cli/main.rb +29 -0
  58. data/lib/evoc_cli/util.rb +36 -0
  59. data/lib/evoc_helper.rb +40 -0
  60. data/mem_profiler/Gemfile.lock +39 -0
  61. data/mem_profiler/README.md +126 -0
  62. data/mem_profiler/createdb.rb +4 -0
  63. data/mem_profiler/db.rb +82 -0
  64. data/mem_profiler/gemfile +6 -0
  65. data/mem_profiler/gencsv.rb +64 -0
  66. data/mem_profiler/genimport.sh +8 -0
  67. data/mem_profiler/graph.rb +91 -0
  68. metadata +251 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dba4bd171ddbca345a90203454afd6285122db56
4
+ data.tar.gz: 987093521af4703fcc540f665a70125fe9e39716
5
+ SHA512:
6
+ metadata.gz: 9d3c6d229df7deb55a7bff7afea8dd953e656e4382be8fa6e5a16f4248996fc6401f51ba962466e090742e6c07039b92f7675ca10cf709b79a212e51b824a011
7
+ data.tar.gz: e1bf42491834eab8d90eb5ed9cac1af0e54f30f74bdaa70c2f518d2eea19e42d23d9ee231767da10a3795038c4549bfe200b6f94f32d9625d369493a83b2d7c0
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ evoc.log
2
+ .DS_Store
3
+ /.bundle/
4
+ /.yardoc
5
+ /Gemfile.lock
6
+ /_yardoc/
7
+ /coverage/
8
+ /doc/
9
+ /pkg/
10
+ /spec/reports/
11
+ /tmp/
12
+ */*.html
13
+ /.history
14
+ /GRTAGS
15
+ *TAGS
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.3
4
+ before_install: gem install bundler -v 1.10.4
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tarma.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Thomas Rolfsnes
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/Makefile ADDED
@@ -0,0 +1,4 @@
1
+ install:
2
+ bundle install
3
+ rake install
4
+
data/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # Evoc
2
+
3
+ Evoc is now using [semantic versioning](http://semver.org/)
4
+
5
+ ## Instructions for installing Ruby
6
+
7
+ 1. install ruby version manager (rvm)
8
+
9
+ ```
10
+ gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3
11
+ curl -sSL https://get.rvm.io | bash -s stable
12
+ source /home/ubuntu/.rvm/scripts/rvm
13
+ ```
14
+
15
+ 2. install ruby
16
+
17
+ ```
18
+ rvm install ruby-2.2.0
19
+ rvm --default use ruby-2.2.0
20
+ ```
21
+
22
+ ## Instructions for installing Evoc
23
+
24
+ 1. clone the evoc repo
25
+
26
+ ```
27
+ git clone git@bitbucket.org:evolveit/evoc.git
28
+ ```
29
+
30
+ 2. (linux) install atlas (used for svd, can be skipped)
31
+
32
+ ```
33
+ sudo apt-get install libatlas-base-dev
34
+ ```
35
+ 2. (mac) install gcc49 (used for svd, can be skipped)
36
+
37
+ ```
38
+ brew install gcc49
39
+ ```
40
+
41
+ 3. install bundler
42
+
43
+ ```
44
+ gem install bundler
45
+ ```
46
+
47
+ 4. install gem dependencies for evoc (using bundler)
48
+
49
+ ```
50
+ cd 'evoc folder'
51
+ bundle install
52
+ ```
53
+
54
+
55
+
56
+ 5. install evoc
57
+
58
+ ```
59
+ cd 'evoc folder'
60
+ rake install
61
+ ```
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "evoc_helper"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/evoc ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/evoc_helper'
3
+ EvocCLI::Main.start(ARGV)
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
data/evoc.gemspec ADDED
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'evoc/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "evoc"
8
+ spec.version = Evoc::VERSION
9
+ spec.authors = ["Thomas Rolfsnes"]
10
+ spec.email = ["mail@thomasrolfsnes.com"]
11
+
12
+ spec.summary = %q{A collection of algorithms for doing Targeted Association Rule Mining}
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
16
+ spec.bindir = "bin"
17
+ spec.executables = ["evoc",]
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.10"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency "rspec"
23
+ spec.add_development_dependency "ruby-prof"
24
+ spec.add_runtime_dependency "require_all"
25
+ spec.add_runtime_dependency "thor"
26
+ spec.add_runtime_dependency "time_difference"
27
+ spec.add_runtime_dependency "ruby-progressbar"
28
+ spec.add_runtime_dependency "rubyzip"
29
+ spec.add_runtime_dependency "algorithms"
30
+ end
@@ -0,0 +1,147 @@
1
+ #file algorithm.rb
2
+ module Evoc
3
+ class Algorithm
4
+ extend Logging
5
+
6
+ def self.execute(tx_store:,query:,algorithm:)
7
+ if algorithm.nil?
8
+ raise ArgumentError.new, "Algorithm was equal to nil"
9
+ end
10
+ # tarmaq2 => call tarmaq(2)
11
+ if match = /tarmaq(?<num>\d+)/.match(algorithm)
12
+ Evoc::Algorithm.tarmaq(match[:num].to_i,tx_store:tx_store,query:query)
13
+ elsif match = /topk_(?<k>\d+)_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
14
+ Evoc::TopK.topk(k: match[:k].to_i,
15
+ min: match[:min].to_i,
16
+ max: match[:max].to_i,
17
+ tx_store: tx_store,
18
+ query: query)
19
+ elsif match = /crule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
20
+ Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
21
+ elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
22
+ Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
23
+ elsif Evoc::Algorithm.respond_to?(algorithm+'_algorithm')
24
+ Evoc::Algorithm.method(algorithm+'_algorithm').call(tx_store:tx_store,query:query)
25
+ else raise ArgumentError.new, "#{algorithm} is not an available algorithm"
26
+ end
27
+ end
28
+
29
+ ##
30
+ # Targeted Association Rule Mining implementations
31
+ #
32
+ #
33
+
34
+ def self.rule_cache
35
+ return @@rule_range_cache
36
+ end
37
+
38
+ @@rule_range_cache = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
39
+ def self.cached_rule_range(min,max,tx_store:,query:)
40
+ tag = tx_store.hash+query.hash
41
+ rule_store = Evoc::RuleStore.new(query: query)
42
+ if @@rule_range_cache[tag].empty?
43
+ @@rule_range_cache.clear
44
+ Evoc::Algorithm.rule_range(min,max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
45
+ @@rule_range_cache[tag][size] = rules
46
+ end
47
+ else
48
+ # calculate missing range
49
+ missing_range = (min..max).to_a - @@rule_range_cache[tag].keys
50
+ if !missing_range.empty?
51
+ Evoc::Algorithm.rule_range(missing_range.min,missing_range.max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
52
+ @@rule_range_cache[tag][size] = rules
53
+ end
54
+ end
55
+ end
56
+ (min..max).each do |antecedent_size|
57
+ @@rule_range_cache[tag][antecedent_size].each do |rule|
58
+ rule_store << rule
59
+ end
60
+ end
61
+ logger.debug "Algorithm (#{min},#{max}) generated #{rule_store.size} rules for query of size #{query.size}"
62
+ return rule_store
63
+ end
64
+
65
+ def self.rule_range(min,max,tx_store:,query:)
66
+ rule_store = Evoc::RuleStore.new(query: query)
67
+ unique_rules = Set.new
68
+ # min must be equal or smaller to max
69
+ if min <= max
70
+ # can only create rules where the antecedent is equal to or smaller in size than the query
71
+ if min <= query.size
72
+ #initial filter, we consider all txes where something in the query changed
73
+ query_changed_in = tx_store.transactions_of_list(query)
74
+ # create rules
75
+ query_changed_in.each do |tx_id|
76
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
77
+ # the tx size must be larger than the min size
78
+ # (so that we have items left for the consequent)
79
+ if (tx.size > min)
80
+ # CONSTRUCT RULES in the desired range
81
+
82
+ # 1. Find overlap
83
+ query_overlap = (query & tx.items)
84
+ consequents = (tx.items - query_overlap)
85
+ # 2. Ignore exact overlaps (no items left for consequent)
86
+ if consequents.size != 0
87
+ # 3. Construct rules for overlaps that are at least min in size
88
+ if query_overlap.size >= min
89
+ # 4. Generate all permutations of query_overlap in the desired range
90
+ antecedents = (min..max).map {|size| query_overlap.combination(size).to_a}.flatten(1)
91
+ # 5. Add one rule for each antecedent/consequent combination
92
+ antecedents.each do |antecedent|
93
+ consequents.each do |consequent|
94
+ unique_rules << [antecedent,[consequent]]
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+ unique_rules.each {|lhs,rhs| rule_store << Evoc::Rule.new(lhs: lhs,rhs: rhs,tx_store:tx_store)}
104
+ return rule_store
105
+ end
106
+
107
+
108
+ ##
109
+ # TARMAQ
110
+ # find largest subsets in @query with evidence in @tx_store version
111
+ def self.tarmaq(depth,tx_store:,query:)
112
+ largest_match = 0
113
+ #initial filter, we consider all txes where something in the query changed
114
+ query_changed_in = tx_store.transactions_of_list(query)
115
+ # now find what subsets of the query changed in each tx
116
+ rules = Hash.new
117
+ query_changed_in.each do |tx_id|
118
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
119
+ largest_match_in_query = (query & tx.items)
120
+ match_size = largest_match_in_query.size
121
+ remainder_in_tx = tx.items - largest_match_in_query
122
+ if remainder_in_tx.size > 0
123
+ if match_size > largest_match
124
+ largest_match = match_size
125
+ end
126
+ end
127
+ end
128
+ # now generate rules
129
+ max = largest_match
130
+ min = ((max - depth) <= 0) ? 1 : max - depth
131
+ self.cached_rule_range(min,max,tx_store: tx_store,query: query)
132
+ end
133
+
134
+ ###
135
+ ## rose
136
+ ###
137
+ def self.rose_algorithm(tx_store:,query:)
138
+ qs = query.size
139
+ self.cached_rule_range(qs,qs,tx_store: tx_store, query: query)
140
+ end
141
+
142
+ def self.co_change_algorithm(tx_store:, query:)
143
+ self.cached_rule_range(1,1,tx_store: tx_store, query: query)
144
+ end
145
+
146
+ end # Algorithm
147
+ end
@@ -0,0 +1,86 @@
1
+ module Evoc
2
+ class TopK
3
+
4
+ def self.topk(k:,min:,max:,tx_store:,query:)
5
+ @@k = k
6
+ @@top_k_rules = Containers::CRBTreeMap.new
7
+ @@expansion_candidates = Containers::CRBTreeMap.new
8
+ @@min_support = 0
9
+ # start with all subsets of size @min in @query
10
+ query.combination(min).each do |antecedent|
11
+ # check if this antecedent has min_support > 0
12
+ changed_in = tx_store.transactions_of_list(antecedent, strict: true)
13
+ if changed_in.size > 0
14
+ consequents = Set.new
15
+ # find all consequents for this antecedent
16
+ changed_in.each do |tx_id|
17
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
18
+ # the tx size must be larger than the antecedent size
19
+ # (so that we have items left for the consequent)
20
+ if (tx.size > antecedent.size)
21
+ (tx.items - query).each {|c| consequents << c}
22
+ end
23
+ end
24
+ consequents.each do |consequent|
25
+ r = Evoc::Rule.new(lhs:antecedent,rhs: [consequent], tx_store: tx_store)
26
+ if r.m_support.value.to_f >= @@min_support
27
+ self.save_rule(r)
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ # now expand antecedents
34
+ while !@@expansion_candidates.empty? && (@@expansion_candidates.min_key.to_f >= @@min_support)
35
+ max_candidate = @@expansion_candidates.delete_max
36
+ #puts "expanding #{max_candidate.name} | #{max_candidate.m_support.value}"
37
+ expansions = query - max_candidate.lhs
38
+ # don't make antecedents larger than @max
39
+ #if (most_frequent_rule.lhs.size < max) & !expansions.empty?
40
+ if (max_candidate.lhs.size < max)
41
+ self.expand(max_candidate,expansions,tx_store)
42
+ end
43
+ # remove all rules with sup < min_sup from candidates
44
+ while !@@expansion_candidates.empty? && (@@expansion_candidates.min_key.to_f < @@min_support)
45
+ @@expansion_candidates.delete_min
46
+ end
47
+ end
48
+ return Evoc::RuleStore.new(@@top_k_rules.map {|k,rule| rule})
49
+ end
50
+
51
+ def self.expand(rule,expansions,tx_store)
52
+ expansions.each do |e|
53
+ # only expand rules with items lexically larger than the lhs
54
+ if rule.lhs.all? {|i| e > i}
55
+ expanded_rule = Evoc::Rule.new(lhs: rule.lhs+[e],rhs: rule.rhs,tx_store: tx_store)
56
+ if expanded_rule.m_support.value > @@min_support
57
+ self.save_rule(expanded_rule)
58
+ end
59
+ end
60
+ end
61
+ end
62
+
63
+ def self.save_rule(r)
64
+ key = "#{r.m_support.value.to_f}|#{r.name}"
65
+ #puts "adding rule #{key}"
66
+ @@top_k_rules.push(key,r)
67
+ @@expansion_candidates.push(key,r)
68
+ if @@top_k_rules.size >= @@k
69
+ ##puts "K is #{@@k} rules in #{@@top_k_rules.size}"
70
+ #puts "new rule sup #{r.m_support.value.to_f} min sup is #{@@min_support}"
71
+ if r.m_support.value > @@min_support
72
+ #puts "new rule larger than min sup"
73
+ # remove min sup rules until size is K
74
+ while (@@top_k_rules.size > @@k) && (@@top_k_rules.min_key.to_f == @@min_support)
75
+ min_rule = @@top_k_rules.delete_min
76
+ #puts "ksize #{@@top_k_rules.size} after removing rule #{min_rule}"
77
+ end
78
+ @@min_support = @@top_k_rules.min_key.to_f
79
+ #puts "set min sup to #{@@min_support}"
80
+ else
81
+ #puts "#{"%f" % r.m_support.value.to_f.to_s} not larger than #{@@min_support}"
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end