cohort_analysis 0.4.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +2 -0
- data/CHANGELOG +14 -2
- data/Gemfile +0 -9
- data/README.markdown +0 -64
- data/cohort_analysis.gemspec +20 -2
- data/lib/cohort_analysis/active_record_base_class_methods.rb +0 -4
- data/lib/cohort_analysis/active_record_relation_instance_methods.rb +34 -5
- data/lib/cohort_analysis/arel_select_manager_instance_methods.rb +8 -0
- data/lib/cohort_analysis/arel_table_instance_methods.rb +7 -0
- data/lib/cohort_analysis/strategy/big.rb +4 -5
- data/lib/cohort_analysis/strategy/strict.rb +8 -6
- data/lib/cohort_analysis/strategy.rb +98 -11
- data/lib/cohort_analysis/version.rb +1 -1
- data/lib/cohort_analysis.rb +15 -15
- data/test/helper.rb +54 -9
- data/test/test_cohort_analysis.rb +269 -181
- metadata +169 -65
- data/.document +0 -5
data/.yardopts
ADDED
data/CHANGELOG
CHANGED
@@ -1,13 +1,25 @@
|
|
1
|
+
1.0.0 / 2012-05-30
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Cohorts with initially (originally) empty characteristics return everything. Before they returned nothing.
|
6
|
+
* Removed #cohort_constraint. There was really no correct way to use it. Now it's recommended you union together cohorts.
|
7
|
+
* ActiveRecord::{Base.,Relation#}cohort returns an Arel::SelectManager, NOT another ActiveRecord::Relation. That means you can't call, for example, #count on it - you have to treat it like an ARel object.
|
8
|
+
|
9
|
+
* Enhancements
|
10
|
+
|
11
|
+
* You can use #cohort when composing with ARel. Just be safe about how you use it - it's not an automagic ActiveRecord::Relation.
|
12
|
+
* True unit tests, including explicit tests for UNIONing together cohorts.
|
13
|
+
* Tested on MRI 1.8, MRI 1.9, and JRuby 1.6.7
|
14
|
+
|
1
15
|
0.4.0 / 2012-02-28
|
2
16
|
|
3
17
|
* renamed to cohort_analysis
|
4
|
-
|
5
18
|
* new, simplified syntax - see README.markdown
|
6
19
|
|
7
20
|
0.3.0 / 2012-02-27
|
8
21
|
|
9
22
|
* Now my_strategy = Person.strategy({:favorite_color => 'heliotrope', :birthdate => @date_range}, :importance => [:birthdate, :favorite_color]) will return a Arel::Nodes::Node which can be combined like Person.where(my_strategy.and("gender = 'male")) - it does NOT return a "scope" like before.
|
10
|
-
|
11
23
|
* Refactor to take advantage of ARel.
|
12
24
|
|
13
25
|
0.2.0
|
data/Gemfile
CHANGED
data/README.markdown
CHANGED
@@ -18,70 +18,6 @@ Replaces [`cohort_scope`](https://github.com/seamusabshere/cohort_scope).
|
|
18
18
|
<dd>Discards characteristics according to <code>:priority</code>.</dd>
|
19
19
|
</dl>
|
20
20
|
|
21
|
-
### `:big` example
|
22
|
-
|
23
|
-
This is straight from the tests:
|
24
|
-
|
25
|
-
# make some fixtures
|
26
|
-
1_000.times { FactoryGirl.create(:lax) }
|
27
|
-
100.times { FactoryGirl.create(:lax_sfo) }
|
28
|
-
10.times { FactoryGirl.create(:lax_sfo_co) }
|
29
|
-
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
30
|
-
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
31
|
-
|
32
|
-
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
33
|
-
# don't discard anything
|
34
|
-
Flight.cohort(lax_sfo_aa_a320).count.must_equal 1
|
35
|
-
# discard airline
|
36
|
-
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 2).count.must_equal 4
|
37
|
-
# discard plane and airline
|
38
|
-
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 5).count.must_equal 114
|
39
|
-
# discard plane and airline and dest
|
40
|
-
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 115).count.must_equal 1_114
|
41
|
-
|
42
|
-
lax_sfo_a320 = {:origin => 'LAX', :dest => 'SFO', :plane => 'A320'}
|
43
|
-
# don't discard anything
|
44
|
-
Flight.cohort(lax_sfo_a320).count.must_equal 4
|
45
|
-
# discard plane
|
46
|
-
Flight.cohort(lax_sfo_a320, :minimum_size => 5).count.must_equal 114
|
47
|
-
# discard plane and dest
|
48
|
-
Flight.cohort(lax_sfo_a320, :minimum_size => 115).count.must_equal 1_114
|
49
|
-
|
50
|
-
# off the rails here a bit
|
51
|
-
woah_lax_co_a320 = {:origin => 'LAX', :airline => 'Continental', :plane => 'A320'}
|
52
|
-
# discard plane
|
53
|
-
Flight.cohort(woah_lax_co_a320).count.must_equal 10
|
54
|
-
# discard plane and airline
|
55
|
-
Flight.cohort(woah_lax_co_a320, :minimum_size => 11).count.must_equal 1_114
|
56
|
-
|
57
|
-
### `:strict` example
|
58
|
-
|
59
|
-
Also from the tests...
|
60
|
-
|
61
|
-
# make some fixtures
|
62
|
-
1_000.times { FactoryGirl.create(:lax) }
|
63
|
-
100.times { FactoryGirl.create(:lax_sfo) }
|
64
|
-
10.times { FactoryGirl.create(:lax_sfo_co) }
|
65
|
-
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
66
|
-
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
67
|
-
|
68
|
-
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
69
|
-
priority = [:origin, :dest, :airline, :plane]
|
70
|
-
# discard nothing
|
71
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
72
|
-
# (force) discard plane, then (force) discard airline
|
73
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 114
|
74
|
-
# (force) discard plane, then (force) discard airline, then (force) discard dest
|
75
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 115).count.must_equal 1_114
|
76
|
-
|
77
|
-
priority = [:plane, :airline, :dest, :origin]
|
78
|
-
# discard nothing
|
79
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
80
|
-
# (force) discard origin, then (force) discard dest, then (force) discard airline
|
81
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 4
|
82
|
-
# gives up!
|
83
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 5).count.must_equal 0
|
84
|
-
|
85
21
|
## Copyright
|
86
22
|
|
87
23
|
Copyright (c) 2012 Brighter Planet, Inc.
|
data/cohort_analysis.gemspec
CHANGED
@@ -16,6 +16,24 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
gem.version = CohortAnalysis::VERSION
|
18
18
|
|
19
|
-
gem.add_runtime_dependency
|
20
|
-
gem.add_runtime_dependency
|
19
|
+
gem.add_runtime_dependency 'activesupport', '>=3'
|
20
|
+
gem.add_runtime_dependency 'arel', '>=3'
|
21
|
+
|
22
|
+
gem.add_development_dependency 'activerecord', '>=3'
|
23
|
+
gem.add_development_dependency 'active_record_inline_schema'
|
24
|
+
gem.add_development_dependency 'minitest'
|
25
|
+
gem.add_development_dependency 'minitest-reporters'
|
26
|
+
gem.add_development_dependency 'yard'
|
27
|
+
if RUBY_VERSION >= '1.9'
|
28
|
+
gem.add_development_dependency 'factory_girl'
|
29
|
+
else
|
30
|
+
gem.add_development_dependency 'factory_girl', '~>2'
|
31
|
+
end
|
32
|
+
if RUBY_PLATFORM == 'java'
|
33
|
+
gem.add_development_dependency 'activerecord-jdbcsqlite3-adapter'
|
34
|
+
else
|
35
|
+
gem.add_development_dependency 'sqlite3'
|
36
|
+
end
|
37
|
+
|
38
|
+
# gem.add_development_dependency 'debugger'
|
21
39
|
end
|
@@ -1,13 +1,42 @@
|
|
1
1
|
module CohortAnalysis
|
2
2
|
module ActiveRecordRelationInstanceMethods
|
3
|
+
# @note This doesn't return a <code>ActiveRecord::Relation</code>, so you can't just call count.
|
4
|
+
#
|
5
|
+
# @example Count a Flight cohort
|
6
|
+
# cohort = Flight.cohort(:origin => 'MSN', :dest => 'ORD')
|
7
|
+
# cohort.count #=> BAD! just plain Arel::SelectManager doesn't provide #count, that's an ActiveRecord::Relation thing
|
8
|
+
# Flight.connection.select_value(cohort.project('COUNT(*)').to_sql) #=> what you wanted
|
9
|
+
#
|
10
|
+
# @return [Arel::SelectManager] A select manager without any projections.
|
3
11
|
def cohort(characteristics, options = {})
|
4
|
-
|
12
|
+
select_manager = arel.clone
|
13
|
+
select_manager.projections = []
|
14
|
+
select_manager.where Strategy.create(select_manager, characteristics, options)
|
15
|
+
select_manager
|
5
16
|
end
|
6
17
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
18
|
+
# @note Won't work properly unless it's the last constraint in your chain.
|
19
|
+
#
|
20
|
+
# @example Making sure it's the last thing you call
|
21
|
+
# Flight.cohort_relation(:origin => 'MSN', :dest => 'ORD').where(:year => 2009) #=> BAD! the cohort calculation CANNOT see :year => 2009
|
22
|
+
# Flight.where(:year => 2009).cohort_relation(:origin => 'MSN', :dest => 'ORD') #=> OK!
|
23
|
+
#
|
24
|
+
# @return [ActiveRecord::Relation]
|
25
|
+
def cohort_relation(characteristics, options = {})
|
26
|
+
where Strategy.create(arel, characteristics, options)
|
11
27
|
end
|
12
28
|
end
|
13
29
|
end
|
30
|
+
|
31
|
+
=begin
|
32
|
+
if i return ActiveRecord::Relation#where(strategy), and somebody calls #where on it, a new relation is returned that includes the strategy, but the strategy can't see the new where values
|
33
|
+
|
34
|
+
relation = clone # which keeps where_values but clears @arel
|
35
|
+
relation.where_values += build_where(opts, rest) # which just adds the expr
|
36
|
+
relation
|
37
|
+
|
38
|
+
if i return Arel::SelectManager#where(strategy), it keeps the context, so the strategy can use that
|
39
|
+
|
40
|
+
@ctx.wheres << expr
|
41
|
+
self
|
42
|
+
=end
|
@@ -5,14 +5,13 @@ module CohortAnalysis
|
|
5
5
|
#
|
6
6
|
# The characteristic whose removal leads to the highest record count is removed from the overall characteristic set.
|
7
7
|
def reduce!
|
8
|
-
@
|
8
|
+
@current = if current.keys.length < 2
|
9
9
|
{}
|
10
10
|
else
|
11
|
-
|
12
|
-
|
13
|
-
@active_record_relation.where(conditions).count
|
11
|
+
most_restrictive = current.keys.max_by do |k|
|
12
|
+
count current.except(k)
|
14
13
|
end
|
15
|
-
|
14
|
+
current.except most_restrictive
|
16
15
|
end
|
17
16
|
end
|
18
17
|
end
|
@@ -1,7 +1,9 @@
|
|
1
1
|
module CohortAnalysis
|
2
2
|
class Strategy
|
3
3
|
class Strict < Strategy
|
4
|
-
|
4
|
+
attr_reader :reverse_priority
|
5
|
+
|
6
|
+
def initialize(select_manager, characteristics, options = {})
|
5
7
|
super
|
6
8
|
if priority = options[:priority]
|
7
9
|
@reverse_priority = priority.reverse
|
@@ -12,15 +14,15 @@ module CohortAnalysis
|
|
12
14
|
|
13
15
|
# Reduce characteristics by removing the least important one.
|
14
16
|
def reduce!
|
15
|
-
least_important_key = if
|
16
|
-
|
17
|
-
|
17
|
+
least_important_key = if reverse_priority
|
18
|
+
reverse_priority.detect do |k|
|
19
|
+
current.has_key? k
|
18
20
|
end
|
19
21
|
else
|
20
|
-
|
22
|
+
current.keys.last
|
21
23
|
end
|
22
24
|
if least_important_key
|
23
|
-
|
25
|
+
current.delete least_important_key
|
24
26
|
else
|
25
27
|
raise ::RuntimeError, "[cohort_analysis] Priority improperly specified"
|
26
28
|
end
|
@@ -1,31 +1,118 @@
|
|
1
1
|
module CohortAnalysis
|
2
2
|
class Strategy < ::Arel::Nodes::Node
|
3
|
-
|
3
|
+
class << self
|
4
|
+
def create(select_manager, characteristics, options = {})
|
5
|
+
options = options.symbolize_keys
|
6
|
+
strategy = if options.has_key? :strategy
|
7
|
+
options[:strategy]
|
8
|
+
elsif options.has_key? :priority
|
9
|
+
:strict
|
10
|
+
else
|
11
|
+
DEFAULT_STRATEGY
|
12
|
+
end
|
13
|
+
const_get(strategy.to_s.camelcase).new(select_manager, characteristics, options)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
module AlwaysTrue
|
18
|
+
def self.to_sql; '1 = 1' end
|
19
|
+
end
|
20
|
+
module Impossible
|
21
|
+
def self.to_sql; '1 = 2' end
|
22
|
+
end
|
23
|
+
|
24
|
+
DEFAULT_STRATEGY = :big
|
4
25
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
26
|
+
attr_reader :select_manager
|
27
|
+
attr_reader :original
|
28
|
+
attr_reader :current
|
29
|
+
attr_reader :minimum_size
|
30
|
+
attr_reader :table_name
|
31
|
+
attr_reader :table
|
32
|
+
|
33
|
+
def initialize(select_manager, characteristics, options = {})
|
34
|
+
@select_manager = select_manager
|
35
|
+
@table_name = select_manager.source.left.name
|
36
|
+
@table = Arel::Table.new table_name
|
37
|
+
@original = characteristics.dup
|
38
|
+
@current = characteristics.dup
|
9
39
|
@minimum_size = options.fetch(:minimum_size, 1)
|
40
|
+
@final_mutex = ::Mutex.new
|
41
|
+
end
|
42
|
+
|
43
|
+
def final
|
44
|
+
@final || if @final_mutex.try_lock
|
45
|
+
begin
|
46
|
+
@final ||= resolve!
|
47
|
+
ensure
|
48
|
+
@final_mutex.unlock
|
49
|
+
end
|
50
|
+
else
|
51
|
+
Impossible
|
52
|
+
end
|
10
53
|
end
|
11
54
|
|
12
55
|
def expr
|
13
|
-
|
56
|
+
final.to_sql
|
57
|
+
end
|
58
|
+
|
59
|
+
def ==(other)
|
60
|
+
other.is_a?(Strategy) and
|
61
|
+
table_name == other.table_name and
|
62
|
+
minimum_size = other.minimum_size and
|
63
|
+
original == other.original
|
14
64
|
end
|
15
|
-
alias :to_sql :expr
|
16
65
|
|
17
66
|
private
|
18
67
|
|
19
68
|
# Recursively look for a scope that meets the characteristics and is at least <tt>minimum_size</tt>.
|
20
69
|
def resolve!
|
21
|
-
if
|
22
|
-
|
23
|
-
elsif
|
24
|
-
|
70
|
+
if original.empty?
|
71
|
+
AlwaysTrue
|
72
|
+
elsif current.empty?
|
73
|
+
Impossible
|
74
|
+
elsif count(current) >= minimum_size
|
75
|
+
Arel::Nodes::Grouping.new grasp(current).inject(:and)
|
25
76
|
else
|
26
77
|
reduce!
|
27
78
|
resolve!
|
28
79
|
end
|
29
80
|
end
|
81
|
+
|
82
|
+
def grasp(subset)
|
83
|
+
subset.map do |k, v|
|
84
|
+
case v
|
85
|
+
when Array
|
86
|
+
table[k].in v
|
87
|
+
else
|
88
|
+
table[k].eq v
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def count(subset)
|
94
|
+
constraints = grasp subset
|
95
|
+
|
96
|
+
select_manager.constraints.each do |constraint|
|
97
|
+
if self == constraint
|
98
|
+
next
|
99
|
+
end
|
100
|
+
if constraint.is_a? String
|
101
|
+
constraint = Arel::Nodes::Grouping.new constraint
|
102
|
+
end
|
103
|
+
constraints << constraint
|
104
|
+
end
|
105
|
+
|
106
|
+
relation = constraints.inject(nil) do |memo, constraint|
|
107
|
+
if memo
|
108
|
+
memo.and(constraint)
|
109
|
+
else
|
110
|
+
constraint
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
sql = table.dup.project('COUNT(*)').where(relation).to_sql
|
115
|
+
select_manager.engine.connection.select_value(sql).to_i
|
116
|
+
end
|
30
117
|
end
|
31
118
|
end
|
data/lib/cohort_analysis.rb
CHANGED
@@ -1,26 +1,26 @@
|
|
1
1
|
require 'arel'
|
2
|
-
require 'active_record'
|
3
2
|
require 'active_support/core_ext'
|
4
3
|
|
5
4
|
require 'cohort_analysis/strategy'
|
6
5
|
require 'cohort_analysis/strategy/big'
|
7
6
|
require 'cohort_analysis/strategy/strict'
|
8
7
|
|
9
|
-
require 'cohort_analysis/active_record_base_class_methods'
|
10
|
-
require 'cohort_analysis/active_record_relation_instance_methods'
|
11
|
-
require 'cohort_analysis/arel_visitors_visitor_instance_methods'
|
12
|
-
|
13
8
|
module CohortAnalysis
|
14
|
-
def self.conditions_for(characteristics)
|
15
|
-
case characteristics
|
16
|
-
when ::Array
|
17
|
-
characteristics.inject({}) { |memo, (k, v)| memo[k] = v; memo }
|
18
|
-
else
|
19
|
-
characteristics
|
20
|
-
end
|
21
|
-
end
|
22
9
|
end
|
23
10
|
|
24
|
-
|
25
|
-
|
11
|
+
require 'cohort_analysis/arel_select_manager_instance_methods'
|
12
|
+
Arel::SelectManager.send :include, CohortAnalysis::ArelSelectManagerInstanceMethods
|
13
|
+
|
14
|
+
require 'cohort_analysis/arel_table_instance_methods'
|
15
|
+
Arel::Table.send :include, CohortAnalysis::ArelTableInstanceMethods
|
16
|
+
|
17
|
+
require 'cohort_analysis/arel_visitors_visitor_instance_methods'
|
26
18
|
Arel::Visitors::Visitor.send :include, CohortAnalysis::ArelVisitorsVisitorInstanceMethods
|
19
|
+
|
20
|
+
if defined?(ActiveRecord)
|
21
|
+
require 'cohort_analysis/active_record_base_class_methods'
|
22
|
+
ActiveRecord::Base.extend CohortAnalysis::ActiveRecordBaseClassMethods
|
23
|
+
|
24
|
+
require 'cohort_analysis/active_record_relation_instance_methods'
|
25
|
+
ActiveRecord::Relation.send :include, CohortAnalysis::ActiveRecordRelationInstanceMethods
|
26
|
+
end
|
data/test/helper.rb
CHANGED
@@ -9,20 +9,65 @@ MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
|
9
9
|
|
10
10
|
require 'factory_girl'
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
require 'active_record'
|
13
|
+
require 'active_record_inline_schema'
|
14
|
+
|
14
15
|
require 'cohort_analysis'
|
15
16
|
|
16
|
-
if ::Bundler.definition.specs['
|
17
|
+
if ::Bundler.definition.specs['debugger'].first
|
18
|
+
require 'debugger'
|
19
|
+
elsif ::Bundler.definition.specs['ruby-debug'].first
|
17
20
|
require 'ruby-debug'
|
18
21
|
end
|
19
22
|
|
20
23
|
# require 'logger'
|
21
24
|
# ActiveRecord::Base.logger = Logger.new($stdout)
|
22
25
|
|
23
|
-
ActiveRecord::Base.establish_connection(
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
|
27
|
+
|
28
|
+
Arel::Table.engine = ActiveRecord::Base
|
29
|
+
|
30
|
+
# https://gist.github.com/1560208 - shared examples in minispec
|
31
|
+
|
32
|
+
MiniTest::Spec.class_eval do
|
33
|
+
# start transaction
|
34
|
+
before do
|
35
|
+
# activerecord-3.2.3/lib/active_record/fixtures.rb
|
36
|
+
@fixture_connections = ActiveRecord::Base.connection_handler.connection_pools.values.map(&:connection)
|
37
|
+
@fixture_connections.each do |connection|
|
38
|
+
connection.increment_open_transactions
|
39
|
+
connection.transaction_joinable = false
|
40
|
+
connection.begin_db_transaction
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# rollback
|
45
|
+
after do
|
46
|
+
@fixture_connections.each do |connection|
|
47
|
+
if connection.open_transactions != 0
|
48
|
+
connection.rollback_db_transaction
|
49
|
+
connection.decrement_open_transactions
|
50
|
+
end
|
51
|
+
end
|
52
|
+
@fixture_connections.clear
|
53
|
+
ActiveRecord::Base.clear_active_connections!
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.shared_examples
|
57
|
+
@shared_examples ||= {}
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
module MiniTest::Spec::SharedExamples
|
62
|
+
def shared_examples_for(desc, &block)
|
63
|
+
MiniTest::Spec.shared_examples[desc] = block
|
64
|
+
end
|
65
|
+
|
66
|
+
def it_behaves_like(desc)
|
67
|
+
self.instance_eval do
|
68
|
+
MiniTest::Spec.shared_examples[desc].call
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
Object.class_eval { include(MiniTest::Spec::SharedExamples) }
|