cohort_analysis 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +2 -0
- data/CHANGELOG +14 -2
- data/Gemfile +0 -9
- data/README.markdown +0 -64
- data/cohort_analysis.gemspec +20 -2
- data/lib/cohort_analysis/active_record_base_class_methods.rb +0 -4
- data/lib/cohort_analysis/active_record_relation_instance_methods.rb +34 -5
- data/lib/cohort_analysis/arel_select_manager_instance_methods.rb +8 -0
- data/lib/cohort_analysis/arel_table_instance_methods.rb +7 -0
- data/lib/cohort_analysis/strategy/big.rb +4 -5
- data/lib/cohort_analysis/strategy/strict.rb +8 -6
- data/lib/cohort_analysis/strategy.rb +98 -11
- data/lib/cohort_analysis/version.rb +1 -1
- data/lib/cohort_analysis.rb +15 -15
- data/test/helper.rb +54 -9
- data/test/test_cohort_analysis.rb +269 -181
- metadata +169 -65
- data/.document +0 -5
data/.yardopts
ADDED
data/CHANGELOG
CHANGED
@@ -1,13 +1,25 @@
|
|
1
|
+
1.0.0 / 2012-05-30
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Cohorts with initially (originally) empty characteristics return everything. Before they returned nothing.
|
6
|
+
* Removed #cohort_constraint. There was really no correct way to use it. Now it's recommended you union together cohorts.
|
7
|
+
* ActiveRecord::{Base.,Relation#}cohort returns an Arel::SelectManager, NOT another ActiveRecord::Relation. That means you can't call, for example, #count on it - you have to treat it like an ARel object.
|
8
|
+
|
9
|
+
* Enhancements
|
10
|
+
|
11
|
+
* You can use #cohort when composing with ARel. Just be safe about how you use it - it's not an automagic ActiveRecord::Relation.
|
12
|
+
* True unit tests, including explicit tests for UNIONing together cohorts.
|
13
|
+
* Tested on MRI 1.8, MRI 1.9, and JRuby 1.6.7
|
14
|
+
|
1
15
|
0.4.0 / 2012-02-28
|
2
16
|
|
3
17
|
* renamed to cohort_analysis
|
4
|
-
|
5
18
|
* new, simplified syntax - see README.markdown
|
6
19
|
|
7
20
|
0.3.0 / 2012-02-27
|
8
21
|
|
9
22
|
* Now my_strategy = Person.strategy({:favorite_color => 'heliotrope', :birthdate => @date_range}, :importance => [:birthdate, :favorite_color]) will return a Arel::Nodes::Node which can be combined like Person.where(my_strategy.and("gender = 'male")) - it does NOT return a "scope" like before.
|
10
|
-
|
11
23
|
* Refactor to take advantage of ARel.
|
12
24
|
|
13
25
|
0.2.0
|
data/Gemfile
CHANGED
data/README.markdown
CHANGED
@@ -18,70 +18,6 @@ Replaces [`cohort_scope`](https://github.com/seamusabshere/cohort_scope).
|
|
18
18
|
<dd>Discards characteristics according to <code>:priority</code>.</dd>
|
19
19
|
</dl>
|
20
20
|
|
21
|
-
### `:big` example
|
22
|
-
|
23
|
-
This is straight from the tests:
|
24
|
-
|
25
|
-
# make some fixtures
|
26
|
-
1_000.times { FactoryGirl.create(:lax) }
|
27
|
-
100.times { FactoryGirl.create(:lax_sfo) }
|
28
|
-
10.times { FactoryGirl.create(:lax_sfo_co) }
|
29
|
-
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
30
|
-
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
31
|
-
|
32
|
-
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
33
|
-
# don't discard anything
|
34
|
-
Flight.cohort(lax_sfo_aa_a320).count.must_equal 1
|
35
|
-
# discard airline
|
36
|
-
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 2).count.must_equal 4
|
37
|
-
# discard plane and airline
|
38
|
-
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 5).count.must_equal 114
|
39
|
-
# discard plane and airline and dest
|
40
|
-
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 115).count.must_equal 1_114
|
41
|
-
|
42
|
-
lax_sfo_a320 = {:origin => 'LAX', :dest => 'SFO', :plane => 'A320'}
|
43
|
-
# don't discard anything
|
44
|
-
Flight.cohort(lax_sfo_a320).count.must_equal 4
|
45
|
-
# discard plane
|
46
|
-
Flight.cohort(lax_sfo_a320, :minimum_size => 5).count.must_equal 114
|
47
|
-
# discard plane and dest
|
48
|
-
Flight.cohort(lax_sfo_a320, :minimum_size => 115).count.must_equal 1_114
|
49
|
-
|
50
|
-
# off the rails here a bit
|
51
|
-
woah_lax_co_a320 = {:origin => 'LAX', :airline => 'Continental', :plane => 'A320'}
|
52
|
-
# discard plane
|
53
|
-
Flight.cohort(woah_lax_co_a320).count.must_equal 10
|
54
|
-
# discard plane and airline
|
55
|
-
Flight.cohort(woah_lax_co_a320, :minimum_size => 11).count.must_equal 1_114
|
56
|
-
|
57
|
-
### `:strict` example
|
58
|
-
|
59
|
-
Also from the tests...
|
60
|
-
|
61
|
-
# make some fixtures
|
62
|
-
1_000.times { FactoryGirl.create(:lax) }
|
63
|
-
100.times { FactoryGirl.create(:lax_sfo) }
|
64
|
-
10.times { FactoryGirl.create(:lax_sfo_co) }
|
65
|
-
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
66
|
-
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
67
|
-
|
68
|
-
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
69
|
-
priority = [:origin, :dest, :airline, :plane]
|
70
|
-
# discard nothing
|
71
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
72
|
-
# (force) discard plane, then (force) discard airline
|
73
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 114
|
74
|
-
# (force) discard plane, then (force) discard airline, then (force) discard dest
|
75
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 115).count.must_equal 1_114
|
76
|
-
|
77
|
-
priority = [:plane, :airline, :dest, :origin]
|
78
|
-
# discard nothing
|
79
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
80
|
-
# (force) discard origin, then (force) discard dest, then (force) discard airline
|
81
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 4
|
82
|
-
# gives up!
|
83
|
-
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 5).count.must_equal 0
|
84
|
-
|
85
21
|
## Copyright
|
86
22
|
|
87
23
|
Copyright (c) 2012 Brighter Planet, Inc.
|
data/cohort_analysis.gemspec
CHANGED
@@ -16,6 +16,24 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
gem.version = CohortAnalysis::VERSION
|
18
18
|
|
19
|
-
gem.add_runtime_dependency
|
20
|
-
gem.add_runtime_dependency
|
19
|
+
gem.add_runtime_dependency 'activesupport', '>=3'
|
20
|
+
gem.add_runtime_dependency 'arel', '>=3'
|
21
|
+
|
22
|
+
gem.add_development_dependency 'activerecord', '>=3'
|
23
|
+
gem.add_development_dependency 'active_record_inline_schema'
|
24
|
+
gem.add_development_dependency 'minitest'
|
25
|
+
gem.add_development_dependency 'minitest-reporters'
|
26
|
+
gem.add_development_dependency 'yard'
|
27
|
+
if RUBY_VERSION >= '1.9'
|
28
|
+
gem.add_development_dependency 'factory_girl'
|
29
|
+
else
|
30
|
+
gem.add_development_dependency 'factory_girl', '~>2'
|
31
|
+
end
|
32
|
+
if RUBY_PLATFORM == 'java'
|
33
|
+
gem.add_development_dependency 'activerecord-jdbcsqlite3-adapter'
|
34
|
+
else
|
35
|
+
gem.add_development_dependency 'sqlite3'
|
36
|
+
end
|
37
|
+
|
38
|
+
# gem.add_development_dependency 'debugger'
|
21
39
|
end
|
@@ -1,13 +1,42 @@
|
|
1
1
|
module CohortAnalysis
|
2
2
|
module ActiveRecordRelationInstanceMethods
|
3
|
+
# @note This doesn't return a <code>ActiveRecord::Relation</code>, so you can't just call count.
|
4
|
+
#
|
5
|
+
# @example Count a Flight cohort
|
6
|
+
# cohort = Flight.cohort(:origin => 'MSN', :dest => 'ORD')
|
7
|
+
# cohort.count #=> BAD! just plain Arel::SelectManager doesn't provide #count, that's an ActiveRecord::Relation thing
|
8
|
+
# Flight.connection.select_value(cohort.project('COUNT(*)').to_sql) #=> what you wanted
|
9
|
+
#
|
10
|
+
# @return [Arel::SelectManager] A select manager without any projections.
|
3
11
|
def cohort(characteristics, options = {})
|
4
|
-
|
12
|
+
select_manager = arel.clone
|
13
|
+
select_manager.projections = []
|
14
|
+
select_manager.where Strategy.create(select_manager, characteristics, options)
|
15
|
+
select_manager
|
5
16
|
end
|
6
17
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
18
|
+
# @note Won't work properly unless it's the last constraint in your chain.
|
19
|
+
#
|
20
|
+
# @example Making sure it's the last thing you call
|
21
|
+
# Flight.cohort_relation(:origin => 'MSN', :dest => 'ORD').where(:year => 2009) #=> BAD! the cohort calculation CANNOT see :year => 2009
|
22
|
+
# Flight.where(:year => 2009).cohort_relation(:origin => 'MSN', :dest => 'ORD') #=> OK!
|
23
|
+
#
|
24
|
+
# @return [ActiveRecord::Relation]
|
25
|
+
def cohort_relation(characteristics, options = {})
|
26
|
+
where Strategy.create(arel, characteristics, options)
|
11
27
|
end
|
12
28
|
end
|
13
29
|
end
|
30
|
+
|
31
|
+
=begin
|
32
|
+
if i return ActiveRecord::Relation#where(strategy), and somebody calls #where on it, a new relation is returned that includes the strategy, but the strategy can't see the new where values
|
33
|
+
|
34
|
+
relation = clone # which keeps where_values but clears @arel
|
35
|
+
relation.where_values += build_where(opts, rest) # which just adds the expr
|
36
|
+
relation
|
37
|
+
|
38
|
+
if i return Arel::SelectManager#where(strategy), it keeps the context, so the strategy can use that
|
39
|
+
|
40
|
+
@ctx.wheres << expr
|
41
|
+
self
|
42
|
+
=end
|
@@ -5,14 +5,13 @@ module CohortAnalysis
|
|
5
5
|
#
|
6
6
|
# The characteristic whose removal leads to the highest record count is removed from the overall characteristic set.
|
7
7
|
def reduce!
|
8
|
-
@
|
8
|
+
@current = if current.keys.length < 2
|
9
9
|
{}
|
10
10
|
else
|
11
|
-
|
12
|
-
|
13
|
-
@active_record_relation.where(conditions).count
|
11
|
+
most_restrictive = current.keys.max_by do |k|
|
12
|
+
count current.except(k)
|
14
13
|
end
|
15
|
-
|
14
|
+
current.except most_restrictive
|
16
15
|
end
|
17
16
|
end
|
18
17
|
end
|
@@ -1,7 +1,9 @@
|
|
1
1
|
module CohortAnalysis
|
2
2
|
class Strategy
|
3
3
|
class Strict < Strategy
|
4
|
-
|
4
|
+
attr_reader :reverse_priority
|
5
|
+
|
6
|
+
def initialize(select_manager, characteristics, options = {})
|
5
7
|
super
|
6
8
|
if priority = options[:priority]
|
7
9
|
@reverse_priority = priority.reverse
|
@@ -12,15 +14,15 @@ module CohortAnalysis
|
|
12
14
|
|
13
15
|
# Reduce characteristics by removing the least important one.
|
14
16
|
def reduce!
|
15
|
-
least_important_key = if
|
16
|
-
|
17
|
-
|
17
|
+
least_important_key = if reverse_priority
|
18
|
+
reverse_priority.detect do |k|
|
19
|
+
current.has_key? k
|
18
20
|
end
|
19
21
|
else
|
20
|
-
|
22
|
+
current.keys.last
|
21
23
|
end
|
22
24
|
if least_important_key
|
23
|
-
|
25
|
+
current.delete least_important_key
|
24
26
|
else
|
25
27
|
raise ::RuntimeError, "[cohort_analysis] Priority improperly specified"
|
26
28
|
end
|
@@ -1,31 +1,118 @@
|
|
1
1
|
module CohortAnalysis
|
2
2
|
class Strategy < ::Arel::Nodes::Node
|
3
|
-
|
3
|
+
class << self
|
4
|
+
def create(select_manager, characteristics, options = {})
|
5
|
+
options = options.symbolize_keys
|
6
|
+
strategy = if options.has_key? :strategy
|
7
|
+
options[:strategy]
|
8
|
+
elsif options.has_key? :priority
|
9
|
+
:strict
|
10
|
+
else
|
11
|
+
DEFAULT_STRATEGY
|
12
|
+
end
|
13
|
+
const_get(strategy.to_s.camelcase).new(select_manager, characteristics, options)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
module AlwaysTrue
|
18
|
+
def self.to_sql; '1 = 1' end
|
19
|
+
end
|
20
|
+
module Impossible
|
21
|
+
def self.to_sql; '1 = 2' end
|
22
|
+
end
|
23
|
+
|
24
|
+
DEFAULT_STRATEGY = :big
|
4
25
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
26
|
+
attr_reader :select_manager
|
27
|
+
attr_reader :original
|
28
|
+
attr_reader :current
|
29
|
+
attr_reader :minimum_size
|
30
|
+
attr_reader :table_name
|
31
|
+
attr_reader :table
|
32
|
+
|
33
|
+
def initialize(select_manager, characteristics, options = {})
|
34
|
+
@select_manager = select_manager
|
35
|
+
@table_name = select_manager.source.left.name
|
36
|
+
@table = Arel::Table.new table_name
|
37
|
+
@original = characteristics.dup
|
38
|
+
@current = characteristics.dup
|
9
39
|
@minimum_size = options.fetch(:minimum_size, 1)
|
40
|
+
@final_mutex = ::Mutex.new
|
41
|
+
end
|
42
|
+
|
43
|
+
def final
|
44
|
+
@final || if @final_mutex.try_lock
|
45
|
+
begin
|
46
|
+
@final ||= resolve!
|
47
|
+
ensure
|
48
|
+
@final_mutex.unlock
|
49
|
+
end
|
50
|
+
else
|
51
|
+
Impossible
|
52
|
+
end
|
10
53
|
end
|
11
54
|
|
12
55
|
def expr
|
13
|
-
|
56
|
+
final.to_sql
|
57
|
+
end
|
58
|
+
|
59
|
+
def ==(other)
|
60
|
+
other.is_a?(Strategy) and
|
61
|
+
table_name == other.table_name and
|
62
|
+
minimum_size = other.minimum_size and
|
63
|
+
original == other.original
|
14
64
|
end
|
15
|
-
alias :to_sql :expr
|
16
65
|
|
17
66
|
private
|
18
67
|
|
19
68
|
# Recursively look for a scope that meets the characteristics and is at least <tt>minimum_size</tt>.
|
20
69
|
def resolve!
|
21
|
-
if
|
22
|
-
|
23
|
-
elsif
|
24
|
-
|
70
|
+
if original.empty?
|
71
|
+
AlwaysTrue
|
72
|
+
elsif current.empty?
|
73
|
+
Impossible
|
74
|
+
elsif count(current) >= minimum_size
|
75
|
+
Arel::Nodes::Grouping.new grasp(current).inject(:and)
|
25
76
|
else
|
26
77
|
reduce!
|
27
78
|
resolve!
|
28
79
|
end
|
29
80
|
end
|
81
|
+
|
82
|
+
def grasp(subset)
|
83
|
+
subset.map do |k, v|
|
84
|
+
case v
|
85
|
+
when Array
|
86
|
+
table[k].in v
|
87
|
+
else
|
88
|
+
table[k].eq v
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def count(subset)
|
94
|
+
constraints = grasp subset
|
95
|
+
|
96
|
+
select_manager.constraints.each do |constraint|
|
97
|
+
if self == constraint
|
98
|
+
next
|
99
|
+
end
|
100
|
+
if constraint.is_a? String
|
101
|
+
constraint = Arel::Nodes::Grouping.new constraint
|
102
|
+
end
|
103
|
+
constraints << constraint
|
104
|
+
end
|
105
|
+
|
106
|
+
relation = constraints.inject(nil) do |memo, constraint|
|
107
|
+
if memo
|
108
|
+
memo.and(constraint)
|
109
|
+
else
|
110
|
+
constraint
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
sql = table.dup.project('COUNT(*)').where(relation).to_sql
|
115
|
+
select_manager.engine.connection.select_value(sql).to_i
|
116
|
+
end
|
30
117
|
end
|
31
118
|
end
|
data/lib/cohort_analysis.rb
CHANGED
@@ -1,26 +1,26 @@
|
|
1
1
|
require 'arel'
|
2
|
-
require 'active_record'
|
3
2
|
require 'active_support/core_ext'
|
4
3
|
|
5
4
|
require 'cohort_analysis/strategy'
|
6
5
|
require 'cohort_analysis/strategy/big'
|
7
6
|
require 'cohort_analysis/strategy/strict'
|
8
7
|
|
9
|
-
require 'cohort_analysis/active_record_base_class_methods'
|
10
|
-
require 'cohort_analysis/active_record_relation_instance_methods'
|
11
|
-
require 'cohort_analysis/arel_visitors_visitor_instance_methods'
|
12
|
-
|
13
8
|
module CohortAnalysis
|
14
|
-
def self.conditions_for(characteristics)
|
15
|
-
case characteristics
|
16
|
-
when ::Array
|
17
|
-
characteristics.inject({}) { |memo, (k, v)| memo[k] = v; memo }
|
18
|
-
else
|
19
|
-
characteristics
|
20
|
-
end
|
21
|
-
end
|
22
9
|
end
|
23
10
|
|
24
|
-
|
25
|
-
|
11
|
+
require 'cohort_analysis/arel_select_manager_instance_methods'
|
12
|
+
Arel::SelectManager.send :include, CohortAnalysis::ArelSelectManagerInstanceMethods
|
13
|
+
|
14
|
+
require 'cohort_analysis/arel_table_instance_methods'
|
15
|
+
Arel::Table.send :include, CohortAnalysis::ArelTableInstanceMethods
|
16
|
+
|
17
|
+
require 'cohort_analysis/arel_visitors_visitor_instance_methods'
|
26
18
|
Arel::Visitors::Visitor.send :include, CohortAnalysis::ArelVisitorsVisitorInstanceMethods
|
19
|
+
|
20
|
+
if defined?(ActiveRecord)
|
21
|
+
require 'cohort_analysis/active_record_base_class_methods'
|
22
|
+
ActiveRecord::Base.extend CohortAnalysis::ActiveRecordBaseClassMethods
|
23
|
+
|
24
|
+
require 'cohort_analysis/active_record_relation_instance_methods'
|
25
|
+
ActiveRecord::Relation.send :include, CohortAnalysis::ActiveRecordRelationInstanceMethods
|
26
|
+
end
|
data/test/helper.rb
CHANGED
@@ -9,20 +9,65 @@ MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
|
9
9
|
|
10
10
|
require 'factory_girl'
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
require 'active_record'
|
13
|
+
require 'active_record_inline_schema'
|
14
|
+
|
14
15
|
require 'cohort_analysis'
|
15
16
|
|
16
|
-
if ::Bundler.definition.specs['
|
17
|
+
if ::Bundler.definition.specs['debugger'].first
|
18
|
+
require 'debugger'
|
19
|
+
elsif ::Bundler.definition.specs['ruby-debug'].first
|
17
20
|
require 'ruby-debug'
|
18
21
|
end
|
19
22
|
|
20
23
|
# require 'logger'
|
21
24
|
# ActiveRecord::Base.logger = Logger.new($stdout)
|
22
25
|
|
23
|
-
ActiveRecord::Base.establish_connection(
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
|
27
|
+
|
28
|
+
Arel::Table.engine = ActiveRecord::Base
|
29
|
+
|
30
|
+
# https://gist.github.com/1560208 - shared examples in minispec
|
31
|
+
|
32
|
+
MiniTest::Spec.class_eval do
|
33
|
+
# start transaction
|
34
|
+
before do
|
35
|
+
# activerecord-3.2.3/lib/active_record/fixtures.rb
|
36
|
+
@fixture_connections = ActiveRecord::Base.connection_handler.connection_pools.values.map(&:connection)
|
37
|
+
@fixture_connections.each do |connection|
|
38
|
+
connection.increment_open_transactions
|
39
|
+
connection.transaction_joinable = false
|
40
|
+
connection.begin_db_transaction
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# rollback
|
45
|
+
after do
|
46
|
+
@fixture_connections.each do |connection|
|
47
|
+
if connection.open_transactions != 0
|
48
|
+
connection.rollback_db_transaction
|
49
|
+
connection.decrement_open_transactions
|
50
|
+
end
|
51
|
+
end
|
52
|
+
@fixture_connections.clear
|
53
|
+
ActiveRecord::Base.clear_active_connections!
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.shared_examples
|
57
|
+
@shared_examples ||= {}
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
module MiniTest::Spec::SharedExamples
|
62
|
+
def shared_examples_for(desc, &block)
|
63
|
+
MiniTest::Spec.shared_examples[desc] = block
|
64
|
+
end
|
65
|
+
|
66
|
+
def it_behaves_like(desc)
|
67
|
+
self.instance_eval do
|
68
|
+
MiniTest::Spec.shared_examples[desc].call
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
Object.class_eval { include(MiniTest::Spec::SharedExamples) }
|