cohort_analysis 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.yardopts ADDED
@@ -0,0 +1,2 @@
1
+ --no-private
2
+ --readme README.markdown
data/CHANGELOG CHANGED
@@ -1,13 +1,25 @@
1
+ 1.0.0 / 2012-05-30
2
+
3
+ * Breaking changes
4
+
5
+ * Cohorts with initially (originally) empty characteristics return everything. Before they returned nothing.
6
+ * Removed #cohort_constraint. There was really no correct way to use it. Now it's recommended you union together cohorts.
7
+ * ActiveRecord::{Base.,Relation#}cohort returns an Arel::SelectManager, NOT another ActiveRecord::Relation. That means you can't call, for example, #count on it - you have to treat it like an ARel object.
8
+
9
+ * Enhancements
10
+
11
+ * You can use #cohort when composing with ARel. Just be safe about how you use it - it's not an automagic ActiveRecord::Relation.
12
+ * True unit tests, including explicit tests for UNIONing together cohorts.
13
+ * Tested on MRI 1.8, MRI 1.9, and JRuby 1.6.7
14
+
1
15
  0.4.0 / 2012-02-28
2
16
 
3
17
  * renamed to cohort_analysis
4
-
5
18
  * new, simplified syntax - see README.markdown
6
19
 
7
20
  0.3.0 / 2012-02-27
8
21
 
9
22
  * Now my_strategy = Person.strategy({:favorite_color => 'heliotrope', :birthdate => @date_range}, :importance => [:birthdate, :favorite_color]) will return a Arel::Nodes::Node which can be combined like Person.where(my_strategy.and("gender = 'male")) - it does NOT return a "scope" like before.
10
-
11
23
  * Refactor to take advantage of ARel.
12
24
 
13
25
  0.2.0
data/Gemfile CHANGED
@@ -1,12 +1,3 @@
1
1
  source :rubygems
2
2
 
3
3
  gemspec
4
-
5
- # development dependencies
6
- gem 'minitest'
7
- gem 'minitest-reporters'
8
- gem 'factory_girl'
9
- gem 'mysql2'
10
- gem 'rake'
11
- gem 'yard'
12
- # gem 'ruby-debug19'
data/README.markdown CHANGED
@@ -18,70 +18,6 @@ Replaces [`cohort_scope`](https://github.com/seamusabshere/cohort_scope).
18
18
  <dd>Discards characteristics according to <code>:priority</code>.</dd>
19
19
  </dl>
20
20
 
21
- ### `:big` example
22
-
23
- This is straight from the tests:
24
-
25
- # make some fixtures
26
- 1_000.times { FactoryGirl.create(:lax) }
27
- 100.times { FactoryGirl.create(:lax_sfo) }
28
- 10.times { FactoryGirl.create(:lax_sfo_co) }
29
- 3.times { FactoryGirl.create(:lax_sfo_a320) }
30
- 1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
31
-
32
- lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
33
- # don't discard anything
34
- Flight.cohort(lax_sfo_aa_a320).count.must_equal 1
35
- # discard airline
36
- Flight.cohort(lax_sfo_aa_a320, :minimum_size => 2).count.must_equal 4
37
- # discard plane and airline
38
- Flight.cohort(lax_sfo_aa_a320, :minimum_size => 5).count.must_equal 114
39
- # discard plane and airline and dest
40
- Flight.cohort(lax_sfo_aa_a320, :minimum_size => 115).count.must_equal 1_114
41
-
42
- lax_sfo_a320 = {:origin => 'LAX', :dest => 'SFO', :plane => 'A320'}
43
- # don't discard anything
44
- Flight.cohort(lax_sfo_a320).count.must_equal 4
45
- # discard plane
46
- Flight.cohort(lax_sfo_a320, :minimum_size => 5).count.must_equal 114
47
- # discard plane and dest
48
- Flight.cohort(lax_sfo_a320, :minimum_size => 115).count.must_equal 1_114
49
-
50
- # off the rails here a bit
51
- woah_lax_co_a320 = {:origin => 'LAX', :airline => 'Continental', :plane => 'A320'}
52
- # discard plane
53
- Flight.cohort(woah_lax_co_a320).count.must_equal 10
54
- # discard plane and airline
55
- Flight.cohort(woah_lax_co_a320, :minimum_size => 11).count.must_equal 1_114
56
-
57
- ### `:strict` example
58
-
59
- Also from the tests...
60
-
61
- # make some fixtures
62
- 1_000.times { FactoryGirl.create(:lax) }
63
- 100.times { FactoryGirl.create(:lax_sfo) }
64
- 10.times { FactoryGirl.create(:lax_sfo_co) }
65
- 3.times { FactoryGirl.create(:lax_sfo_a320) }
66
- 1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
67
-
68
- lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
69
- priority = [:origin, :dest, :airline, :plane]
70
- # discard nothing
71
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
72
- # (force) discard plane, then (force) discard airline
73
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 114
74
- # (force) discard plane, then (force) discard airline, then (force) discard dest
75
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 115).count.must_equal 1_114
76
-
77
- priority = [:plane, :airline, :dest, :origin]
78
- # discard nothing
79
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
80
- # (force) discard origin, then (force) discard dest, then (force) discard airline
81
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 4
82
- # gives up!
83
- Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 5).count.must_equal 0
84
-
85
21
  ## Copyright
86
22
 
87
23
  Copyright (c) 2012 Brighter Planet, Inc.
@@ -16,6 +16,24 @@ Gem::Specification.new do |gem|
16
16
  gem.require_paths = ["lib"]
17
17
  gem.version = CohortAnalysis::VERSION
18
18
 
19
- gem.add_runtime_dependency "activesupport", '>=3'
20
- gem.add_runtime_dependency "activerecord", '>=3'
19
+ gem.add_runtime_dependency 'activesupport', '>=3'
20
+ gem.add_runtime_dependency 'arel', '>=3'
21
+
22
+ gem.add_development_dependency 'activerecord', '>=3'
23
+ gem.add_development_dependency 'active_record_inline_schema'
24
+ gem.add_development_dependency 'minitest'
25
+ gem.add_development_dependency 'minitest-reporters'
26
+ gem.add_development_dependency 'yard'
27
+ if RUBY_VERSION >= '1.9'
28
+ gem.add_development_dependency 'factory_girl'
29
+ else
30
+ gem.add_development_dependency 'factory_girl', '~>2'
31
+ end
32
+ if RUBY_PLATFORM == 'java'
33
+ gem.add_development_dependency 'activerecord-jdbcsqlite3-adapter'
34
+ else
35
+ gem.add_development_dependency 'sqlite3'
36
+ end
37
+
38
+ # gem.add_development_dependency 'debugger'
21
39
  end
@@ -3,9 +3,5 @@ module CohortAnalysis
3
3
  def cohort(*args)
4
4
  scoped.cohort *args
5
5
  end
6
-
7
- def cohort_constraint(*args)
8
- scoped.cohort_constraint *args
9
- end
10
6
  end
11
7
  end
@@ -1,13 +1,42 @@
1
1
  module CohortAnalysis
2
2
  module ActiveRecordRelationInstanceMethods
3
+ # @note This doesn't return a <code>ActiveRecord::Relation</code>, so you can't just call count.
4
+ #
5
+ # @example Count a Flight cohort
6
+ # cohort = Flight.cohort(:origin => 'MSN', :dest => 'ORD')
7
+ # cohort.count #=> BAD! just plain Arel::SelectManager doesn't provide #count, that's an ActiveRecord::Relation thing
8
+ # Flight.connection.select_value(cohort.project('COUNT(*)').to_sql) #=> what you wanted
9
+ #
10
+ # @return [Arel::SelectManager] A select manager without any projections.
3
11
  def cohort(characteristics, options = {})
4
- where cohort_constraint(characteristics, options)
12
+ select_manager = arel.clone
13
+ select_manager.projections = []
14
+ select_manager.where Strategy.create(select_manager, characteristics, options)
15
+ select_manager
5
16
  end
6
17
 
7
- def cohort_constraint(characteristics, options = {})
8
- options = options.symbolize_keys
9
- strategy = (options.delete(:strategy) || :big).to_s.camelcase
10
- Strategy.const_get(strategy).new(self, characteristics, options)
18
+ # @note Won't work properly unless it's the last constraint in your chain.
19
+ #
20
+ # @example Making sure it's the last thing you call
21
+ # Flight.cohort_relation(:origin => 'MSN', :dest => 'ORD').where(:year => 2009) #=> BAD! the cohort calculation CANNOT see :year => 2009
22
+ # Flight.where(:year => 2009).cohort_relation(:origin => 'MSN', :dest => 'ORD') #=> OK!
23
+ #
24
+ # @return [ActiveRecord::Relation]
25
+ def cohort_relation(characteristics, options = {})
26
+ where Strategy.create(arel, characteristics, options)
11
27
  end
12
28
  end
13
29
  end
30
+
31
+ =begin
32
+ if i return ActiveRecord::Relation#where(strategy), and somebody calls #where on it, a new relation is returned that includes the strategy, but the strategy can't see the new where values
33
+
34
+ relation = clone # which keeps where_values but clears @arel
35
+ relation.where_values += build_where(opts, rest) # which just adds the expr
36
+ relation
37
+
38
+ if i return Arel::SelectManager#where(strategy), it keeps the context, so the strategy can use that
39
+
40
+ @ctx.wheres << expr
41
+ self
42
+ =end
@@ -0,0 +1,8 @@
1
+ module CohortAnalysis
2
+ module ArelSelectManagerInstanceMethods
3
+ # @return [Arel::SelectManager]
4
+ def cohort(characteristics, options = {})
5
+ where Strategy.create(self, characteristics, options)
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,7 @@
1
+ module CohortAnalysis
2
+ module ArelTableInstanceMethods
3
+ def cohort(*args)
4
+ from(self).cohort *args
5
+ end
6
+ end
7
+ end
@@ -5,14 +5,13 @@ module CohortAnalysis
5
5
  #
6
6
  # The characteristic whose removal leads to the highest record count is removed from the overall characteristic set.
7
7
  def reduce!
8
- @reduced_characteristics = if @reduced_characteristics.keys.length < 2
8
+ @current = if current.keys.length < 2
9
9
  {}
10
10
  else
11
- most_restrictive_characteristic = @reduced_characteristics.keys.max_by do |key|
12
- conditions = CohortAnalysis.conditions_for @reduced_characteristics.except(key)
13
- @active_record_relation.where(conditions).count
11
+ most_restrictive = current.keys.max_by do |k|
12
+ count current.except(k)
14
13
  end
15
- @reduced_characteristics.except most_restrictive_characteristic
14
+ current.except most_restrictive
16
15
  end
17
16
  end
18
17
  end
@@ -1,7 +1,9 @@
1
1
  module CohortAnalysis
2
2
  class Strategy
3
3
  class Strict < Strategy
4
- def initialize(active_record_relation, characteristics, options = {})
4
+ attr_reader :reverse_priority
5
+
6
+ def initialize(select_manager, characteristics, options = {})
5
7
  super
6
8
  if priority = options[:priority]
7
9
  @reverse_priority = priority.reverse
@@ -12,15 +14,15 @@ module CohortAnalysis
12
14
 
13
15
  # Reduce characteristics by removing the least important one.
14
16
  def reduce!
15
- least_important_key = if @reverse_priority
16
- @reverse_priority.detect do |k|
17
- @reduced_characteristics.has_key? k
17
+ least_important_key = if reverse_priority
18
+ reverse_priority.detect do |k|
19
+ current.has_key? k
18
20
  end
19
21
  else
20
- @reduced_characteristics.keys.last
22
+ current.keys.last
21
23
  end
22
24
  if least_important_key
23
- @reduced_characteristics.delete least_important_key
25
+ current.delete least_important_key
24
26
  else
25
27
  raise ::RuntimeError, "[cohort_analysis] Priority improperly specified"
26
28
  end
@@ -1,31 +1,118 @@
1
1
  module CohortAnalysis
2
2
  class Strategy < ::Arel::Nodes::Node
3
- IMPOSSIBLE = '1 = 2'
3
+ class << self
4
+ def create(select_manager, characteristics, options = {})
5
+ options = options.symbolize_keys
6
+ strategy = if options.has_key? :strategy
7
+ options[:strategy]
8
+ elsif options.has_key? :priority
9
+ :strict
10
+ else
11
+ DEFAULT_STRATEGY
12
+ end
13
+ const_get(strategy.to_s.camelcase).new(select_manager, characteristics, options)
14
+ end
15
+ end
16
+
17
+ module AlwaysTrue
18
+ def self.to_sql; '1 = 1' end
19
+ end
20
+ module Impossible
21
+ def self.to_sql; '1 = 2' end
22
+ end
23
+
24
+ DEFAULT_STRATEGY = :big
4
25
 
5
- def initialize(active_record_relation, characteristics, options = {})
6
- @active_record_relation = active_record_relation
7
- @characteristics = characteristics
8
- @reduced_characteristics = characteristics.dup
26
+ attr_reader :select_manager
27
+ attr_reader :original
28
+ attr_reader :current
29
+ attr_reader :minimum_size
30
+ attr_reader :table_name
31
+ attr_reader :table
32
+
33
+ def initialize(select_manager, characteristics, options = {})
34
+ @select_manager = select_manager
35
+ @table_name = select_manager.source.left.name
36
+ @table = Arel::Table.new table_name
37
+ @original = characteristics.dup
38
+ @current = characteristics.dup
9
39
  @minimum_size = options.fetch(:minimum_size, 1)
40
+ @final_mutex = ::Mutex.new
41
+ end
42
+
43
+ def final
44
+ @final || if @final_mutex.try_lock
45
+ begin
46
+ @final ||= resolve!
47
+ ensure
48
+ @final_mutex.unlock
49
+ end
50
+ else
51
+ Impossible
52
+ end
10
53
  end
11
54
 
12
55
  def expr
13
- @expr ||= resolve!
56
+ final.to_sql
57
+ end
58
+
59
+ def ==(other)
60
+ other.is_a?(Strategy) and
61
+ table_name == other.table_name and
62
+ minimum_size = other.minimum_size and
63
+ original == other.original
14
64
  end
15
- alias :to_sql :expr
16
65
 
17
66
  private
18
67
 
19
68
  # Recursively look for a scope that meets the characteristics and is at least <tt>minimum_size</tt>.
20
69
  def resolve!
21
- if @reduced_characteristics.empty?
22
- IMPOSSIBLE
23
- elsif (current = @active_record_relation.where(CohortAnalysis.conditions_for(@reduced_characteristics))).count >= @minimum_size
24
- current.constraints.inject(:and).to_sql
70
+ if original.empty?
71
+ AlwaysTrue
72
+ elsif current.empty?
73
+ Impossible
74
+ elsif count(current) >= minimum_size
75
+ Arel::Nodes::Grouping.new grasp(current).inject(:and)
25
76
  else
26
77
  reduce!
27
78
  resolve!
28
79
  end
29
80
  end
81
+
82
+ def grasp(subset)
83
+ subset.map do |k, v|
84
+ case v
85
+ when Array
86
+ table[k].in v
87
+ else
88
+ table[k].eq v
89
+ end
90
+ end
91
+ end
92
+
93
+ def count(subset)
94
+ constraints = grasp subset
95
+
96
+ select_manager.constraints.each do |constraint|
97
+ if self == constraint
98
+ next
99
+ end
100
+ if constraint.is_a? String
101
+ constraint = Arel::Nodes::Grouping.new constraint
102
+ end
103
+ constraints << constraint
104
+ end
105
+
106
+ relation = constraints.inject(nil) do |memo, constraint|
107
+ if memo
108
+ memo.and(constraint)
109
+ else
110
+ constraint
111
+ end
112
+ end
113
+
114
+ sql = table.dup.project('COUNT(*)').where(relation).to_sql
115
+ select_manager.engine.connection.select_value(sql).to_i
116
+ end
30
117
  end
31
118
  end
@@ -1,3 +1,3 @@
1
1
  module CohortAnalysis
2
- VERSION = '0.4.0'
2
+ VERSION = '1.0.0'
3
3
  end
@@ -1,26 +1,26 @@
1
1
  require 'arel'
2
- require 'active_record'
3
2
  require 'active_support/core_ext'
4
3
 
5
4
  require 'cohort_analysis/strategy'
6
5
  require 'cohort_analysis/strategy/big'
7
6
  require 'cohort_analysis/strategy/strict'
8
7
 
9
- require 'cohort_analysis/active_record_base_class_methods'
10
- require 'cohort_analysis/active_record_relation_instance_methods'
11
- require 'cohort_analysis/arel_visitors_visitor_instance_methods'
12
-
13
8
  module CohortAnalysis
14
- def self.conditions_for(characteristics)
15
- case characteristics
16
- when ::Array
17
- characteristics.inject({}) { |memo, (k, v)| memo[k] = v; memo }
18
- else
19
- characteristics
20
- end
21
- end
22
9
  end
23
10
 
24
- ActiveRecord::Base.extend CohortAnalysis::ActiveRecordBaseClassMethods
25
- ActiveRecord::Relation.send :include, CohortAnalysis::ActiveRecordRelationInstanceMethods
11
+ require 'cohort_analysis/arel_select_manager_instance_methods'
12
+ Arel::SelectManager.send :include, CohortAnalysis::ArelSelectManagerInstanceMethods
13
+
14
+ require 'cohort_analysis/arel_table_instance_methods'
15
+ Arel::Table.send :include, CohortAnalysis::ArelTableInstanceMethods
16
+
17
+ require 'cohort_analysis/arel_visitors_visitor_instance_methods'
26
18
  Arel::Visitors::Visitor.send :include, CohortAnalysis::ArelVisitorsVisitorInstanceMethods
19
+
20
+ if defined?(ActiveRecord)
21
+ require 'cohort_analysis/active_record_base_class_methods'
22
+ ActiveRecord::Base.extend CohortAnalysis::ActiveRecordBaseClassMethods
23
+
24
+ require 'cohort_analysis/active_record_relation_instance_methods'
25
+ ActiveRecord::Relation.send :include, CohortAnalysis::ActiveRecordRelationInstanceMethods
26
+ end
data/test/helper.rb CHANGED
@@ -9,20 +9,65 @@ MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
9
9
 
10
10
  require 'factory_girl'
11
11
 
12
- $LOAD_PATH.unshift(File.dirname(__FILE__))
13
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
12
+ require 'active_record'
13
+ require 'active_record_inline_schema'
14
+
14
15
  require 'cohort_analysis'
15
16
 
16
- if ::Bundler.definition.specs['ruby-debug19'].first or ::Bundler.definition.specs['ruby-debug'].first
17
+ if ::Bundler.definition.specs['debugger'].first
18
+ require 'debugger'
19
+ elsif ::Bundler.definition.specs['ruby-debug'].first
17
20
  require 'ruby-debug'
18
21
  end
19
22
 
20
23
  # require 'logger'
21
24
  # ActiveRecord::Base.logger = Logger.new($stdout)
22
25
 
23
- ActiveRecord::Base.establish_connection(
24
- 'adapter' => 'mysql2',
25
- 'database' => 'test_cohort_analysis',
26
- 'username' => 'root',
27
- 'password' => 'password'
28
- )
26
+ ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
27
+
28
+ Arel::Table.engine = ActiveRecord::Base
29
+
30
+ # https://gist.github.com/1560208 - shared examples in minispec
31
+
32
+ MiniTest::Spec.class_eval do
33
+ # start transaction
34
+ before do
35
+ # activerecord-3.2.3/lib/active_record/fixtures.rb
36
+ @fixture_connections = ActiveRecord::Base.connection_handler.connection_pools.values.map(&:connection)
37
+ @fixture_connections.each do |connection|
38
+ connection.increment_open_transactions
39
+ connection.transaction_joinable = false
40
+ connection.begin_db_transaction
41
+ end
42
+ end
43
+
44
+ # rollback
45
+ after do
46
+ @fixture_connections.each do |connection|
47
+ if connection.open_transactions != 0
48
+ connection.rollback_db_transaction
49
+ connection.decrement_open_transactions
50
+ end
51
+ end
52
+ @fixture_connections.clear
53
+ ActiveRecord::Base.clear_active_connections!
54
+ end
55
+
56
+ def self.shared_examples
57
+ @shared_examples ||= {}
58
+ end
59
+ end
60
+
61
+ module MiniTest::Spec::SharedExamples
62
+ def shared_examples_for(desc, &block)
63
+ MiniTest::Spec.shared_examples[desc] = block
64
+ end
65
+
66
+ def it_behaves_like(desc)
67
+ self.instance_eval do
68
+ MiniTest::Spec.shared_examples[desc].call
69
+ end
70
+ end
71
+ end
72
+
73
+ Object.class_eval { include(MiniTest::Spec::SharedExamples) }