evoc 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +4 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +21 -0
  7. data/Makefile +4 -0
  8. data/README.md +61 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/evoc +3 -0
  12. data/bin/setup +7 -0
  13. data/evoc.gemspec +30 -0
  14. data/lib/evoc/algorithm.rb +147 -0
  15. data/lib/evoc/algorithms/top_k.rb +86 -0
  16. data/lib/evoc/analyze.rb +395 -0
  17. data/lib/evoc/array.rb +43 -0
  18. data/lib/evoc/evaluate.rb +109 -0
  19. data/lib/evoc/exceptions/aggregation_error.rb +6 -0
  20. data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
  21. data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
  22. data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
  23. data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
  24. data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
  25. data/lib/evoc/exceptions/no_result.rb +6 -0
  26. data/lib/evoc/exceptions/non_finite.rb +8 -0
  27. data/lib/evoc/exceptions/non_numeric.rb +8 -0
  28. data/lib/evoc/exceptions/not_a_query.rb +6 -0
  29. data/lib/evoc/exceptions/not_a_result.rb +6 -0
  30. data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
  31. data/lib/evoc/exceptions/not_initialized.rb +6 -0
  32. data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
  33. data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
  34. data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
  35. data/lib/evoc/experiment.rb +239 -0
  36. data/lib/evoc/hash.rb +56 -0
  37. data/lib/evoc/history_store.rb +53 -0
  38. data/lib/evoc/hyper_rule.rb +53 -0
  39. data/lib/evoc/interestingness_measure.rb +77 -0
  40. data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
  41. data/lib/evoc/interestingness_measures.rb +882 -0
  42. data/lib/evoc/logger.rb +34 -0
  43. data/lib/evoc/memory_profiler.rb +43 -0
  44. data/lib/evoc/recommendation_cache.rb +152 -0
  45. data/lib/evoc/rule.rb +32 -0
  46. data/lib/evoc/rule_store.rb +340 -0
  47. data/lib/evoc/scenario.rb +303 -0
  48. data/lib/evoc/svd.rb +124 -0
  49. data/lib/evoc/tx.rb +34 -0
  50. data/lib/evoc/tx_store.rb +379 -0
  51. data/lib/evoc/version.rb +3 -0
  52. data/lib/evoc.rb +4 -0
  53. data/lib/evoc_cli/analyze.rb +198 -0
  54. data/lib/evoc_cli/cli_helper.rb +1 -0
  55. data/lib/evoc_cli/experiment.rb +78 -0
  56. data/lib/evoc_cli/info.rb +22 -0
  57. data/lib/evoc_cli/main.rb +29 -0
  58. data/lib/evoc_cli/util.rb +36 -0
  59. data/lib/evoc_helper.rb +40 -0
  60. data/mem_profiler/Gemfile.lock +39 -0
  61. data/mem_profiler/README.md +126 -0
  62. data/mem_profiler/createdb.rb +4 -0
  63. data/mem_profiler/db.rb +82 -0
  64. data/mem_profiler/gemfile +6 -0
  65. data/mem_profiler/gencsv.rb +64 -0
  66. data/mem_profiler/genimport.sh +8 -0
  67. data/mem_profiler/graph.rb +91 -0
  68. metadata +251 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dba4bd171ddbca345a90203454afd6285122db56
4
+ data.tar.gz: 987093521af4703fcc540f665a70125fe9e39716
5
+ SHA512:
6
+ metadata.gz: 9d3c6d229df7deb55a7bff7afea8dd953e656e4382be8fa6e5a16f4248996fc6401f51ba962466e090742e6c07039b92f7675ca10cf709b79a212e51b824a011
7
+ data.tar.gz: e1bf42491834eab8d90eb5ed9cac1af0e54f30f74bdaa70c2f518d2eea19e42d23d9ee231767da10a3795038c4549bfe200b6f94f32d9625d369493a83b2d7c0
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ evoc.log
2
+ .DS_Store
3
+ /.bundle/
4
+ /.yardoc
5
+ /Gemfile.lock
6
+ /_yardoc/
7
+ /coverage/
8
+ /doc/
9
+ /pkg/
10
+ /spec/reports/
11
+ /tmp/
12
+ */*.html
13
+ /.history
14
+ /GRTAGS
15
+ *TAGS
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.3
4
+ before_install: gem install bundler -v 1.10.4
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tarma.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Thomas Rolfsnes
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/Makefile ADDED
@@ -0,0 +1,4 @@
1
+ install:
2
+ bundle install
3
+ rake install
4
+
data/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # Evoc
2
+
3
+ Evoc is now using [semantic versioning](http://semver.org/)
4
+
5
+ ## Instructions for installing Ruby
6
+
7
+ 1. install ruby version manager (rvm)
8
+
9
+ ```
10
+ gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3
11
+ curl -sSL https://get.rvm.io | bash -s stable
12
+ source /home/ubuntu/.rvm/scripts/rvm
13
+ ```
14
+
15
+ 2. install ruby
16
+
17
+ ```
18
+ rvm install ruby-2.2.0
19
+ rvm --default use ruby-2.2.0
20
+ ```
21
+
22
+ ## Instructions for installing Evoc
23
+
24
+ 1. clone the evoc repo
25
+
26
+ ```
27
+ git clone git@bitbucket.org:evolveit/evoc.git
28
+ ```
29
+
30
+ 2. (linux) install atlas (used for svd, can be skipped)
31
+
32
+ ```
33
+ sudo apt-get install libatlas-base-dev
34
+ ```
35
+ 2. (mac) install gcc49 (used for svd, can be skipped)
36
+
37
+ ```
38
+ brew install gcc49
39
+ ```
40
+
41
+ 3. install bundler
42
+
43
+ ```
44
+ gem install bundler
45
+ ```
46
+
47
+ 4. install gem dependencies for evoc (using bundler)
48
+
49
+ ```
50
+ cd 'evoc folder'
51
+ bundle install
52
+ ```
53
+
54
+
55
+
56
+ 5. install evoc
57
+
58
+ ```
59
+ cd 'evoc folder'
60
+ rake install
61
+ ```
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "evoc_helper"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/evoc ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/evoc_helper'
3
+ EvocCLI::Main.start(ARGV)
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
data/evoc.gemspec ADDED
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'evoc/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "evoc"
8
+ spec.version = Evoc::VERSION
9
+ spec.authors = ["Thomas Rolfsnes"]
10
+ spec.email = ["mail@thomasrolfsnes.com"]
11
+
12
+ spec.summary = %q{A collection of algorithms for doing Targeted Association Rule Mining}
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
16
+ spec.bindir = "bin"
17
+ spec.executables = ["evoc",]
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.10"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency "rspec"
23
+ spec.add_development_dependency "ruby-prof"
24
+ spec.add_runtime_dependency "require_all"
25
+ spec.add_runtime_dependency "thor"
26
+ spec.add_runtime_dependency "time_difference"
27
+ spec.add_runtime_dependency "ruby-progressbar"
28
+ spec.add_runtime_dependency "rubyzip"
29
+ spec.add_runtime_dependency "algorithms"
30
+ end
@@ -0,0 +1,147 @@
1
+ #file algorithm.rb
2
+ module Evoc
3
+ class Algorithm
4
+ extend Logging
5
+
6
+ def self.execute(tx_store:,query:,algorithm:)
7
+ if algorithm.nil?
8
+ raise ArgumentError.new, "Algorithm was equal to nil"
9
+ end
10
+ # tarmaq2 => call tarmaq(2)
11
+ if match = /tarmaq(?<num>\d+)/.match(algorithm)
12
+ Evoc::Algorithm.tarmaq(match[:num].to_i,tx_store:tx_store,query:query)
13
+ elsif match = /topk_(?<k>\d+)_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
14
+ Evoc::TopK.topk(k: match[:k].to_i,
15
+ min: match[:min].to_i,
16
+ max: match[:max].to_i,
17
+ tx_store: tx_store,
18
+ query: query)
19
+ elsif match = /crule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
20
+ Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
21
+ elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
22
+ Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
23
+ elsif Evoc::Algorithm.respond_to?(algorithm+'_algorithm')
24
+ Evoc::Algorithm.method(algorithm+'_algorithm').call(tx_store:tx_store,query:query)
25
+ else raise ArgumentError.new, "#{algorithm} is not an available algorithm"
26
+ end
27
+ end
28
+
29
+ ##
30
+ # Targeted Association Rule Mining implementations
31
+ #
32
+ #
33
+
34
+ def self.rule_cache
35
+ return @@rule_range_cache
36
+ end
37
+
38
+ @@rule_range_cache = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
39
+ def self.cached_rule_range(min,max,tx_store:,query:)
40
+ tag = tx_store.hash+query.hash
41
+ rule_store = Evoc::RuleStore.new(query: query)
42
+ if @@rule_range_cache[tag].empty?
43
+ @@rule_range_cache.clear
44
+ Evoc::Algorithm.rule_range(min,max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
45
+ @@rule_range_cache[tag][size] = rules
46
+ end
47
+ else
48
+ # calculate missing range
49
+ missing_range = (min..max).to_a - @@rule_range_cache[tag].keys
50
+ if !missing_range.empty?
51
+ Evoc::Algorithm.rule_range(missing_range.min,missing_range.max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
52
+ @@rule_range_cache[tag][size] = rules
53
+ end
54
+ end
55
+ end
56
+ (min..max).each do |antecedent_size|
57
+ @@rule_range_cache[tag][antecedent_size].each do |rule|
58
+ rule_store << rule
59
+ end
60
+ end
61
+ logger.debug "Algorithm (#{min},#{max}) generated #{rule_store.size} rules for query of size #{query.size}"
62
+ return rule_store
63
+ end
64
+
65
+ def self.rule_range(min,max,tx_store:,query:)
66
+ rule_store = Evoc::RuleStore.new(query: query)
67
+ unique_rules = Set.new
68
+ # min must be equal or smaller to max
69
+ if min <= max
70
+ # can only create rules where the antecedent is equal to or smaller in size than the query
71
+ if min <= query.size
72
+ #initial filter, we consider all txes where something in the query changed
73
+ query_changed_in = tx_store.transactions_of_list(query)
74
+ # create rules
75
+ query_changed_in.each do |tx_id|
76
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
77
+ # the tx size must be larger than the min size
78
+ # (so that we have items left for the consequent)
79
+ if (tx.size > min)
80
+ # CONSTRUCT RULES in the desired range
81
+
82
+ # 1. Find overlap
83
+ query_overlap = (query & tx.items)
84
+ consequents = (tx.items - query_overlap)
85
+ # 2. Ignore exact overlaps (no items left for consequent)
86
+ if consequents.size != 0
87
+ # 3. Construct rules for overlaps that are at least min in size
88
+ if query_overlap.size >= min
89
+ # 4. Generate all permutations of query_overlap in the desired range
90
+ antecedents = (min..max).map {|size| query_overlap.combination(size).to_a}.flatten(1)
91
+ # 5. Add one rule for each antecedent/consequent combination
92
+ antecedents.each do |antecedent|
93
+ consequents.each do |consequent|
94
+ unique_rules << [antecedent,[consequent]]
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+ unique_rules.each {|lhs,rhs| rule_store << Evoc::Rule.new(lhs: lhs,rhs: rhs,tx_store:tx_store)}
104
+ return rule_store
105
+ end
106
+
107
+
108
+ ##
109
+ # TARMAQ
110
+ # find largest subsets in @query with evidence in @tx_store version
111
+ def self.tarmaq(depth,tx_store:,query:)
112
+ largest_match = 0
113
+ #initial filter, we consider all txes where something in the query changed
114
+ query_changed_in = tx_store.transactions_of_list(query)
115
+ # now find what subsets of the query changed in each tx
116
+ rules = Hash.new
117
+ query_changed_in.each do |tx_id|
118
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
119
+ largest_match_in_query = (query & tx.items)
120
+ match_size = largest_match_in_query.size
121
+ remainder_in_tx = tx.items - largest_match_in_query
122
+ if remainder_in_tx.size > 0
123
+ if match_size > largest_match
124
+ largest_match = match_size
125
+ end
126
+ end
127
+ end
128
+ # now generate rules
129
+ max = largest_match
130
+ min = ((max - depth) <= 0) ? 1 : max - depth
131
+ self.cached_rule_range(min,max,tx_store: tx_store,query: query)
132
+ end
133
+
134
+ ###
135
+ ## rose
136
+ ###
137
+ def self.rose_algorithm(tx_store:,query:)
138
+ qs = query.size
139
+ self.cached_rule_range(qs,qs,tx_store: tx_store, query: query)
140
+ end
141
+
142
+ def self.co_change_algorithm(tx_store:, query:)
143
+ self.cached_rule_range(1,1,tx_store: tx_store, query: query)
144
+ end
145
+
146
+ end # Algorithm
147
+ end
@@ -0,0 +1,86 @@
1
+ module Evoc
2
+ class TopK
3
+
4
+ def self.topk(k:,min:,max:,tx_store:,query:)
5
+ @@k = k
6
+ @@top_k_rules = Containers::CRBTreeMap.new
7
+ @@expansion_candidates = Containers::CRBTreeMap.new
8
+ @@min_support = 0
9
+ # start with all subsets of size @min in @query
10
+ query.combination(min).each do |antecedent|
11
+ # check if this antecedent has min_support > 0
12
+ changed_in = tx_store.transactions_of_list(antecedent, strict: true)
13
+ if changed_in.size > 0
14
+ consequents = Set.new
15
+ # find all consequents for this antecedent
16
+ changed_in.each do |tx_id|
17
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
18
+ # the tx size must be larger than the antecedent size
19
+ # (so that we have items left for the consequent)
20
+ if (tx.size > antecedent.size)
21
+ (tx.items - query).each {|c| consequents << c}
22
+ end
23
+ end
24
+ consequents.each do |consequent|
25
+ r = Evoc::Rule.new(lhs:antecedent,rhs: [consequent], tx_store: tx_store)
26
+ if r.m_support.value.to_f >= @@min_support
27
+ self.save_rule(r)
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ # now expand antecedents
34
+ while !@@expansion_candidates.empty? && (@@expansion_candidates.min_key.to_f >= @@min_support)
35
+ max_candidate = @@expansion_candidates.delete_max
36
+ #puts "expanding #{max_candidate.name} | #{max_candidate.m_support.value}"
37
+ expansions = query - max_candidate.lhs
38
+ # don't make antecedents larger than @max
39
+ #if (most_frequent_rule.lhs.size < max) & !expansions.empty?
40
+ if (max_candidate.lhs.size < max)
41
+ self.expand(max_candidate,expansions,tx_store)
42
+ end
43
+ # remove all rules with sup < min_sup from candidates
44
+ while !@@expansion_candidates.empty? && (@@expansion_candidates.min_key.to_f < @@min_support)
45
+ @@expansion_candidates.delete_min
46
+ end
47
+ end
48
+ return Evoc::RuleStore.new(@@top_k_rules.map {|k,rule| rule})
49
+ end
50
+
51
+ def self.expand(rule,expansions,tx_store)
52
+ expansions.each do |e|
53
+ # only expand rules with items lexically larger than the lhs
54
+ if rule.lhs.all? {|i| e > i}
55
+ expanded_rule = Evoc::Rule.new(lhs: rule.lhs+[e],rhs: rule.rhs,tx_store: tx_store)
56
+ if expanded_rule.m_support.value > @@min_support
57
+ self.save_rule(expanded_rule)
58
+ end
59
+ end
60
+ end
61
+ end
62
+
63
+ def self.save_rule(r)
64
+ key = "#{r.m_support.value.to_f}|#{r.name}"
65
+ #puts "adding rule #{key}"
66
+ @@top_k_rules.push(key,r)
67
+ @@expansion_candidates.push(key,r)
68
+ if @@top_k_rules.size >= @@k
69
+ ##puts "K is #{@@k} rules in #{@@top_k_rules.size}"
70
+ #puts "new rule sup #{r.m_support.value.to_f} min sup is #{@@min_support}"
71
+ if r.m_support.value > @@min_support
72
+ #puts "new rule larger than min sup"
73
+ # remove min sup rules until size is K
74
+ while (@@top_k_rules.size > @@k) && (@@top_k_rules.min_key.to_f == @@min_support)
75
+ min_rule = @@top_k_rules.delete_min
76
+ #puts "ksize #{@@top_k_rules.size} after removing rule #{min_rule}"
77
+ end
78
+ @@min_support = @@top_k_rules.min_key.to_f
79
+ #puts "set min sup to #{@@min_support}"
80
+ else
81
+ #puts "#{"%f" % r.m_support.value.to_f.to_s} not larger than #{@@min_support}"
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end