cohort_analysis 0.4.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.yardopts ADDED
@@ -0,0 +1,2 @@
1
+ --no-private
2
+ --readme README.markdown
data/CHANGELOG CHANGED
@@ -1,13 +1,25 @@
1
+ 1.0.0 / 2012-05-30
2
+
3
+ * Breaking changes
4
+
5
+ * Cohorts with initially (originally) empty characteristics return everything. Before they returned nothing.
6
+ * Removed #cohort_constraint. There was really no correct way to use it. Now it's recommended you union together cohorts.
7
+ * ActiveRecord::{Base.,Relation#}cohort returns an Arel::SelectManager, NOT another ActiveRecord::Relation. That means you can't call, for example, #count on it - you have to treat it like an ARel object.
8
+
9
+ * Enhancements
10
+
11
+ * You can use #cohort when composing with ARel. Just be safe about how you use it - it's not an automagic ActiveRecord::Relation.
12
+ * True unit tests, including explicit tests for UNIONing together cohorts.
13
+ * Tested on MRI 1.8, MRI 1.9, and JRuby 1.6.7
14
+
1
15
  0.4.0 / 2012-02-28
2
16
 
3
17
  * renamed to cohort_analysis
4
-
5
18
  * new, simplified syntax - see README.markdown
6
19
 
7
20
  0.3.0 / 2012-02-27
8
21
 
9
22
  * Now my_strategy = Person.strategy({:favorite_color => 'heliotrope', :birthdate => @date_range}, :importance => [:birthdate, :favorite_color]) will return a Arel::Nodes::Node which can be combined like Person.where(my_strategy.and("gender = 'male")) - it does NOT return a "scope" like before.
10
-
11
23
  * Refactor to take advantage of ARel.
12
24
 
13
25
  0.2.0
data/Gemfile CHANGED
@@ -1,12 +1,3 @@
1
1
  source :rubygems
2
2
 
3
3
  gemspec
4
-
5
- # development dependencies
6
- gem 'minitest'
7
- gem 'minitest-reporters'
8
- gem 'factory_girl'
9
- gem 'mysql2'
10
- gem 'rake'
11
- gem 'yard'
12
- # gem 'ruby-debug19'
data/README.markdown CHANGED
@@ -18,70 +18,6 @@ Replaces [`cohort_scope`](https://github.com/seamusabshere/cohort_scope).
18
18
  <dd>Discards characteristics according to <code>:priority</code>.</dd>
19
19
  </dl>
20
20
 
21
- ### `:big` example
22
-
23
- This is straight from the tests:
24
-
25
- # make some fixtures
26
- 1_000.times { FactoryGirl.create(:lax) }
27
- 100.times { FactoryGirl.create(:lax_sfo) }
28
- 10.times { FactoryGirl.create(:lax_sfo_co) }
29
- 3.times { FactoryGirl.create(:lax_sfo_a320) }
30
- 1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
31
-
32
- lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
33
- # don't discard anything
34
- Flight.cohort(lax_sfo_aa_a320).count.must_equal 1
35
- # discard airline
36
- Flight.cohort(lax_sfo_aa_a320, :minimum_size => 2).count.must_equal 4
37
- # discard plane and airline
38
- Flight.cohort(lax_sfo_aa_a320, :minimum_size => 5).count.must_equal 114
39
- # discard plane and airline and dest
40
- Flight.cohort(lax_sfo_aa_a320, :minimum_size => 115).count.must_equal 1_114
41
-
42
- lax_sfo_a320 = {:origin => 'LAX', :dest => 'SFO', :plane => 'A320'}
43
- # don't discard anything
44
- Flight.cohort(lax_sfo_a320).count.must_equal 4
45
- # discard plane
46
- Flight.cohort(lax_sfo_a320, :minimum_size => 5).count.must_equal 114
47
- # discard plane and dest
48
- Flight.cohort(lax_sfo_a320, :minimum_size => 115).count.must_equal 1_114
49
-
50
- # off the rails here a bit
51
- woah_lax_co_a320 = {:origin => 'LAX', :airline => 'Continental', :plane => 'A320'}
52
- # discard plane
53
- Flight.cohort(woah_lax_co_a320).count.must_equal 10
54
- # discard plane and airline
55
- Flight.cohort(woah_lax_co_a320, :minimum_size => 11).count.must_equal 1_114
56
-
57
- ### `:strict` example
58
-
59
- Also from the tests...
60
-
61
- # make some fixtures
62
- 1_000.times { FactoryGirl.create(:lax) }
63
- 100.times { FactoryGirl.create(:lax_sfo) }
64
- 10.times { FactoryGirl.create(:lax_sfo_co) }
65
- 3.times { FactoryGirl.create(:lax_sfo_a320) }
66
- 1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
67
-
68
- lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
69
- priority = [:origin, :dest, :airline, :plane]
70
- # discard nothing
71
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
72
- # (force) discard plane, then (force) discard airline
73
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 114
74
- # (force) discard plane, then (force) discard airline, then (force) discard dest
75
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 115).count.must_equal 1_114
76
-
77
- priority = [:plane, :airline, :dest, :origin]
78
- # discard nothing
79
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
80
- # (force) discard origin, then (force) discard dest, then (force) discard airline
81
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 4
82
- # gives up!
83
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 5).count.must_equal 0
84
-
85
21
  ## Copyright
86
22
 
87
23
  Copyright (c) 2012 Brighter Planet, Inc.
@@ -16,6 +16,24 @@ Gem::Specification.new do |gem|
16
16
  gem.require_paths = ["lib"]
17
17
  gem.version = CohortAnalysis::VERSION
18
18
 
19
- gem.add_runtime_dependency "activesupport", '>=3'
20
- gem.add_runtime_dependency "activerecord", '>=3'
19
+ gem.add_runtime_dependency 'activesupport', '>=3'
20
+ gem.add_runtime_dependency 'arel', '>=3'
21
+
22
+ gem.add_development_dependency 'activerecord', '>=3'
23
+ gem.add_development_dependency 'active_record_inline_schema'
24
+ gem.add_development_dependency 'minitest'
25
+ gem.add_development_dependency 'minitest-reporters'
26
+ gem.add_development_dependency 'yard'
27
+ if RUBY_VERSION >= '1.9'
28
+ gem.add_development_dependency 'factory_girl'
29
+ else
30
+ gem.add_development_dependency 'factory_girl', '~>2'
31
+ end
32
+ if RUBY_PLATFORM == 'java'
33
+ gem.add_development_dependency 'activerecord-jdbcsqlite3-adapter'
34
+ else
35
+ gem.add_development_dependency 'sqlite3'
36
+ end
37
+
38
+ # gem.add_development_dependency 'debugger'
21
39
  end
@@ -3,9 +3,5 @@ module CohortAnalysis
3
3
  def cohort(*args)
4
4
  scoped.cohort *args
5
5
  end
6
-
7
- def cohort_constraint(*args)
8
- scoped.cohort_constraint *args
9
- end
10
6
  end
11
7
  end
@@ -1,13 +1,42 @@
1
1
  module CohortAnalysis
2
2
  module ActiveRecordRelationInstanceMethods
3
+ # @note This doesn't return a <code>ActiveRecord::Relation</code>, so you can't just call count.
4
+ #
5
+ # @example Count a Flight cohort
6
+ # cohort = Flight.cohort(:origin => 'MSN', :dest => 'ORD')
7
+ # cohort.count #=> BAD! just plain Arel::SelectManager doesn't provide #count, that's an ActiveRecord::Relation thing
8
+ # Flight.connection.select_value(cohort.project('COUNT(*)').to_sql) #=> what you wanted
9
+ #
10
+ # @return [Arel::SelectManager] A select manager without any projections.
3
11
  def cohort(characteristics, options = {})
4
- where cohort_constraint(characteristics, options)
12
+ select_manager = arel.clone
13
+ select_manager.projections = []
14
+ select_manager.where Strategy.create(select_manager, characteristics, options)
15
+ select_manager
5
16
  end
6
17
 
7
- def cohort_constraint(characteristics, options = {})
8
- options = options.symbolize_keys
9
- strategy = (options.delete(:strategy) || :big).to_s.camelcase
10
- Strategy.const_get(strategy).new(self, characteristics, options)
18
+ # @note Won't work properly unless it's the last constraint in your chain.
19
+ #
20
+ # @example Making sure it's the last thing you call
21
+ # Flight.cohort_relation(:origin => 'MSN', :dest => 'ORD').where(:year => 2009) #=> BAD! the cohort calculation CANNOT see :year => 2009
22
+ # Flight.where(:year => 2009).cohort_relation(:origin => 'MSN', :dest => 'ORD') #=> OK!
23
+ #
24
+ # @return [ActiveRecord::Relation]
25
+ def cohort_relation(characteristics, options = {})
26
+ where Strategy.create(arel, characteristics, options)
11
27
  end
12
28
  end
13
29
  end
30
+
31
+ =begin
32
+ if i return ActiveRecord::Relation#where(strategy), and somebody calls #where on it, a new relation is returned that includes the strategy, but the strategy can't see the new where values
33
+
34
+ relation = clone # which keeps where_values but clears @arel
35
+ relation.where_values += build_where(opts, rest) # which just adds the expr
36
+ relation
37
+
38
+ if i return Arel::SelectManager#where(strategy), it keeps the context, so the strategy can use that
39
+
40
+ @ctx.wheres << expr
41
+ self
42
+ =end
@@ -0,0 +1,8 @@
1
+ module CohortAnalysis
2
+ module ArelSelectManagerInstanceMethods
3
+ # @return [Arel::SelectManager]
4
+ def cohort(characteristics, options = {})
5
+ where Strategy.create(self, characteristics, options)
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,7 @@
1
+ module CohortAnalysis
2
+ module ArelTableInstanceMethods
3
+ def cohort(*args)
4
+ from(self).cohort *args
5
+ end
6
+ end
7
+ end
@@ -5,14 +5,13 @@ module CohortAnalysis
5
5
  #
6
6
  # The characteristic whose removal leads to the highest record count is removed from the overall characteristic set.
7
7
  def reduce!
8
- @reduced_characteristics = if @reduced_characteristics.keys.length < 2
8
+ @current = if current.keys.length < 2
9
9
  {}
10
10
  else
11
- most_restrictive_characteristic = @reduced_characteristics.keys.max_by do |key|
12
- conditions = CohortAnalysis.conditions_for @reduced_characteristics.except(key)
13
- @active_record_relation.where(conditions).count
11
+ most_restrictive = current.keys.max_by do |k|
12
+ count current.except(k)
14
13
  end
15
- @reduced_characteristics.except most_restrictive_characteristic
14
+ current.except most_restrictive
16
15
  end
17
16
  end
18
17
  end
@@ -1,7 +1,9 @@
1
1
  module CohortAnalysis
2
2
  class Strategy
3
3
  class Strict < Strategy
4
- def initialize(active_record_relation, characteristics, options = {})
4
+ attr_reader :reverse_priority
5
+
6
+ def initialize(select_manager, characteristics, options = {})
5
7
  super
6
8
  if priority = options[:priority]
7
9
  @reverse_priority = priority.reverse
@@ -12,15 +14,15 @@ module CohortAnalysis
12
14
 
13
15
  # Reduce characteristics by removing the least important one.
14
16
  def reduce!
15
- least_important_key = if @reverse_priority
16
- @reverse_priority.detect do |k|
17
- @reduced_characteristics.has_key? k
17
+ least_important_key = if reverse_priority
18
+ reverse_priority.detect do |k|
19
+ current.has_key? k
18
20
  end
19
21
  else
20
- @reduced_characteristics.keys.last
22
+ current.keys.last
21
23
  end
22
24
  if least_important_key
23
- @reduced_characteristics.delete least_important_key
25
+ current.delete least_important_key
24
26
  else
25
27
  raise ::RuntimeError, "[cohort_analysis] Priority improperly specified"
26
28
  end
@@ -1,31 +1,118 @@
1
1
  module CohortAnalysis
2
2
  class Strategy < ::Arel::Nodes::Node
3
- IMPOSSIBLE = '1 = 2'
3
+ class << self
4
+ def create(select_manager, characteristics, options = {})
5
+ options = options.symbolize_keys
6
+ strategy = if options.has_key? :strategy
7
+ options[:strategy]
8
+ elsif options.has_key? :priority
9
+ :strict
10
+ else
11
+ DEFAULT_STRATEGY
12
+ end
13
+ const_get(strategy.to_s.camelcase).new(select_manager, characteristics, options)
14
+ end
15
+ end
16
+
17
+ module AlwaysTrue
18
+ def self.to_sql; '1 = 1' end
19
+ end
20
+ module Impossible
21
+ def self.to_sql; '1 = 2' end
22
+ end
23
+
24
+ DEFAULT_STRATEGY = :big
4
25
 
5
- def initialize(active_record_relation, characteristics, options = {})
6
- @active_record_relation = active_record_relation
7
- @characteristics = characteristics
8
- @reduced_characteristics = characteristics.dup
26
+ attr_reader :select_manager
27
+ attr_reader :original
28
+ attr_reader :current
29
+ attr_reader :minimum_size
30
+ attr_reader :table_name
31
+ attr_reader :table
32
+
33
+ def initialize(select_manager, characteristics, options = {})
34
+ @select_manager = select_manager
35
+ @table_name = select_manager.source.left.name
36
+ @table = Arel::Table.new table_name
37
+ @original = characteristics.dup
38
+ @current = characteristics.dup
9
39
  @minimum_size = options.fetch(:minimum_size, 1)
40
+ @final_mutex = ::Mutex.new
41
+ end
42
+
43
+ def final
44
+ @final || if @final_mutex.try_lock
45
+ begin
46
+ @final ||= resolve!
47
+ ensure
48
+ @final_mutex.unlock
49
+ end
50
+ else
51
+ Impossible
52
+ end
10
53
  end
11
54
 
12
55
  def expr
13
- @expr ||= resolve!
56
+ final.to_sql
57
+ end
58
+
59
+ def ==(other)
60
+ other.is_a?(Strategy) and
61
+ table_name == other.table_name and
62
+ minimum_size = other.minimum_size and
63
+ original == other.original
14
64
  end
15
- alias :to_sql :expr
16
65
 
17
66
  private
18
67
 
19
68
  # Recursively look for a scope that meets the characteristics and is at least <tt>minimum_size</tt>.
20
69
  def resolve!
21
- if @reduced_characteristics.empty?
22
- IMPOSSIBLE
23
- elsif (current = @active_record_relation.where(CohortAnalysis.conditions_for(@reduced_characteristics))).count >= @minimum_size
24
- current.constraints.inject(:and).to_sql
70
+ if original.empty?
71
+ AlwaysTrue
72
+ elsif current.empty?
73
+ Impossible
74
+ elsif count(current) >= minimum_size
75
+ Arel::Nodes::Grouping.new grasp(current).inject(:and)
25
76
  else
26
77
  reduce!
27
78
  resolve!
28
79
  end
29
80
  end
81
+
82
+ def grasp(subset)
83
+ subset.map do |k, v|
84
+ case v
85
+ when Array
86
+ table[k].in v
87
+ else
88
+ table[k].eq v
89
+ end
90
+ end
91
+ end
92
+
93
+ def count(subset)
94
+ constraints = grasp subset
95
+
96
+ select_manager.constraints.each do |constraint|
97
+ if self == constraint
98
+ next
99
+ end
100
+ if constraint.is_a? String
101
+ constraint = Arel::Nodes::Grouping.new constraint
102
+ end
103
+ constraints << constraint
104
+ end
105
+
106
+ relation = constraints.inject(nil) do |memo, constraint|
107
+ if memo
108
+ memo.and(constraint)
109
+ else
110
+ constraint
111
+ end
112
+ end
113
+
114
+ sql = table.dup.project('COUNT(*)').where(relation).to_sql
115
+ select_manager.engine.connection.select_value(sql).to_i
116
+ end
30
117
  end
31
118
  end
@@ -1,3 +1,3 @@
1
1
  module CohortAnalysis
2
- VERSION = '0.4.0'
2
+ VERSION = '1.0.0'
3
3
  end
@@ -1,26 +1,26 @@
1
1
  require 'arel'
2
- require 'active_record'
3
2
  require 'active_support/core_ext'
4
3
 
5
4
  require 'cohort_analysis/strategy'
6
5
  require 'cohort_analysis/strategy/big'
7
6
  require 'cohort_analysis/strategy/strict'
8
7
 
9
- require 'cohort_analysis/active_record_base_class_methods'
10
- require 'cohort_analysis/active_record_relation_instance_methods'
11
- require 'cohort_analysis/arel_visitors_visitor_instance_methods'
12
-
13
8
  module CohortAnalysis
14
- def self.conditions_for(characteristics)
15
- case characteristics
16
- when ::Array
17
- characteristics.inject({}) { |memo, (k, v)| memo[k] = v; memo }
18
- else
19
- characteristics
20
- end
21
- end
22
9
  end
23
10
 
24
- ActiveRecord::Base.extend CohortAnalysis::ActiveRecordBaseClassMethods
25
- ActiveRecord::Relation.send :include, CohortAnalysis::ActiveRecordRelationInstanceMethods
11
+ require 'cohort_analysis/arel_select_manager_instance_methods'
12
+ Arel::SelectManager.send :include, CohortAnalysis::ArelSelectManagerInstanceMethods
13
+
14
+ require 'cohort_analysis/arel_table_instance_methods'
15
+ Arel::Table.send :include, CohortAnalysis::ArelTableInstanceMethods
16
+
17
+ require 'cohort_analysis/arel_visitors_visitor_instance_methods'
26
18
  Arel::Visitors::Visitor.send :include, CohortAnalysis::ArelVisitorsVisitorInstanceMethods
19
+
20
+ if defined?(ActiveRecord)
21
+ require 'cohort_analysis/active_record_base_class_methods'
22
+ ActiveRecord::Base.extend CohortAnalysis::ActiveRecordBaseClassMethods
23
+
24
+ require 'cohort_analysis/active_record_relation_instance_methods'
25
+ ActiveRecord::Relation.send :include, CohortAnalysis::ActiveRecordRelationInstanceMethods
26
+ end
data/test/helper.rb CHANGED
@@ -9,20 +9,65 @@ MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
9
9
 
10
10
  require 'factory_girl'
11
11
 
12
- $LOAD_PATH.unshift(File.dirname(__FILE__))
13
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
12
+ require 'active_record'
13
+ require 'active_record_inline_schema'
14
+
14
15
  require 'cohort_analysis'
15
16
 
16
- if ::Bundler.definition.specs['ruby-debug19'].first or ::Bundler.definition.specs['ruby-debug'].first
17
+ if ::Bundler.definition.specs['debugger'].first
18
+ require 'debugger'
19
+ elsif ::Bundler.definition.specs['ruby-debug'].first
17
20
  require 'ruby-debug'
18
21
  end
19
22
 
20
23
  # require 'logger'
21
24
  # ActiveRecord::Base.logger = Logger.new($stdout)
22
25
 
23
- ActiveRecord::Base.establish_connection(
24
- 'adapter' => 'mysql2',
25
- 'database' => 'test_cohort_analysis',
26
- 'username' => 'root',
27
- 'password' => 'password'
28
- )
26
+ ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
27
+
28
+ Arel::Table.engine = ActiveRecord::Base
29
+
30
+ # https://gist.github.com/1560208 - shared examples in minispec
31
+
32
+ MiniTest::Spec.class_eval do
33
+ # start transaction
34
+ before do
35
+ # activerecord-3.2.3/lib/active_record/fixtures.rb
36
+ @fixture_connections = ActiveRecord::Base.connection_handler.connection_pools.values.map(&:connection)
37
+ @fixture_connections.each do |connection|
38
+ connection.increment_open_transactions
39
+ connection.transaction_joinable = false
40
+ connection.begin_db_transaction
41
+ end
42
+ end
43
+
44
+ # rollback
45
+ after do
46
+ @fixture_connections.each do |connection|
47
+ if connection.open_transactions != 0
48
+ connection.rollback_db_transaction
49
+ connection.decrement_open_transactions
50
+ end
51
+ end
52
+ @fixture_connections.clear
53
+ ActiveRecord::Base.clear_active_connections!
54
+ end
55
+
56
+ def self.shared_examples
57
+ @shared_examples ||= {}
58
+ end
59
+ end
60
+
61
+ module MiniTest::Spec::SharedExamples
62
+ def shared_examples_for(desc, &block)
63
+ MiniTest::Spec.shared_examples[desc] = block
64
+ end
65
+
66
+ def it_behaves_like(desc)
67
+ self.instance_eval do
68
+ MiniTest::Spec.shared_examples[desc].call
69
+ end
70
+ end
71
+ end
72
+
73
+ Object.class_eval { include(MiniTest::Spec::SharedExamples) }