cohort_analysis 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +26 -0
- data/CHANGELOG +19 -0
- data/Gemfile +12 -0
- data/LICENSE +20 -0
- data/README.markdown +87 -0
- data/Rakefile +15 -0
- data/cohort_analysis.gemspec +21 -0
- data/lib/cohort_analysis.rb +26 -0
- data/lib/cohort_analysis/active_record_base_class_methods.rb +11 -0
- data/lib/cohort_analysis/active_record_relation_instance_methods.rb +13 -0
- data/lib/cohort_analysis/arel_visitors_visitor_instance_methods.rb +7 -0
- data/lib/cohort_analysis/strategy.rb +31 -0
- data/lib/cohort_analysis/strategy/big.rb +20 -0
- data/lib/cohort_analysis/strategy/strict.rb +30 -0
- data/lib/cohort_analysis/version.rb +3 -0
- data/test/helper.rb +28 -0
- data/test/test_cohort_analysis.rb +222 -0
- metadata +118 -0
data/.document
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
## MAC OS
|
2
|
+
.DS_Store
|
3
|
+
|
4
|
+
## TEXTMATE
|
5
|
+
*.tmproj
|
6
|
+
tmtags
|
7
|
+
|
8
|
+
## EMACS
|
9
|
+
*~
|
10
|
+
\#*
|
11
|
+
.\#*
|
12
|
+
|
13
|
+
## VIM
|
14
|
+
*.swp
|
15
|
+
|
16
|
+
## PROJECT::GENERAL
|
17
|
+
coverage
|
18
|
+
rdoc
|
19
|
+
pkg
|
20
|
+
|
21
|
+
## PROJECT::SPECIFIC
|
22
|
+
Gemfile.lock
|
23
|
+
test/test.log
|
24
|
+
*.gem
|
25
|
+
.yardoc
|
26
|
+
doc/
|
data/CHANGELOG
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
0.4.0 / 2012-02-28
|
2
|
+
|
3
|
+
* renamed to cohort_analysis
|
4
|
+
|
5
|
+
* new, simplified syntax - see README.markdown
|
6
|
+
|
7
|
+
0.3.0 / 2012-02-27
|
8
|
+
|
9
|
+
* Now my_strategy = Person.strategy({:favorite_color => 'heliotrope', :birthdate => @date_range}, :importance => [:birthdate, :favorite_color]) will return a Arel::Nodes::Node which can be combined like Person.where(my_strategy.and("gender = 'male")) - it does NOT return a "scope" like before.
|
10
|
+
|
11
|
+
* Refactor to take advantage of ARel.
|
12
|
+
|
13
|
+
0.2.0
|
14
|
+
|
15
|
+
* No longer "flattens" or "sanitizes" characteristics by turning records into integer IDs, etc. You should pass in exactly what you would pass into a normal ActiveRecord relation/scope.
|
16
|
+
|
17
|
+
0.1.0
|
18
|
+
|
19
|
+
* First version!
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Seamus Abshere
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# cohort_analysis
|
2
|
+
|
3
|
+
Lets you do cohort analysis based on two strategies: "big", which discards characteristics for the maximum cohort result, and "strict", which discards characteristics in order until a minimum cohort size is reached.
|
4
|
+
|
5
|
+
Replaces [`cohort_scope`](https://github.com/seamusabshere/cohort_scope).
|
6
|
+
|
7
|
+
## Where it's used
|
8
|
+
|
9
|
+
* [Brighter Planet CM1 Impact Estimate web service](http://impact.brighterplanet.com)
|
10
|
+
* [Flight environmental impact model](https://github.com/brighterplanet/flight)
|
11
|
+
|
12
|
+
## Strategies
|
13
|
+
|
14
|
+
<dl>
|
15
|
+
<dt><code>:big</code></dt>
|
16
|
+
<dd>Default. Iteratively discards the characteristic that is most "restrictive," yielding the largest possible cohort. Note that it stops discarding after the minimum cohort size is reached.</dd>
|
17
|
+
<dt><code>:strict</code></dt>
|
18
|
+
<dd>Discards characteristics according to <code>:priority</code>.</dd>
|
19
|
+
</dl>
|
20
|
+
|
21
|
+
### `:big` example
|
22
|
+
|
23
|
+
This is straight from the tests:
|
24
|
+
|
25
|
+
# make some fixtures
|
26
|
+
1_000.times { FactoryGirl.create(:lax) }
|
27
|
+
100.times { FactoryGirl.create(:lax_sfo) }
|
28
|
+
10.times { FactoryGirl.create(:lax_sfo_co) }
|
29
|
+
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
30
|
+
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
31
|
+
|
32
|
+
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
33
|
+
# don't discard anything
|
34
|
+
Flight.cohort(lax_sfo_aa_a320).count.must_equal 1
|
35
|
+
# discard airline
|
36
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 2).count.must_equal 4
|
37
|
+
# discard plane and airline
|
38
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 5).count.must_equal 114
|
39
|
+
# discard plane and airline and dest
|
40
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 115).count.must_equal 1_114
|
41
|
+
|
42
|
+
lax_sfo_a320 = {:origin => 'LAX', :dest => 'SFO', :plane => 'A320'}
|
43
|
+
# don't discard anything
|
44
|
+
Flight.cohort(lax_sfo_a320).count.must_equal 4
|
45
|
+
# discard plane
|
46
|
+
Flight.cohort(lax_sfo_a320, :minimum_size => 5).count.must_equal 114
|
47
|
+
# discard plane and dest
|
48
|
+
Flight.cohort(lax_sfo_a320, :minimum_size => 115).count.must_equal 1_114
|
49
|
+
|
50
|
+
# off the rails here a bit
|
51
|
+
woah_lax_co_a320 = {:origin => 'LAX', :airline => 'Continental', :plane => 'A320'}
|
52
|
+
# discard plane
|
53
|
+
Flight.cohort(woah_lax_co_a320).count.must_equal 10
|
54
|
+
# discard plane and airline
|
55
|
+
Flight.cohort(woah_lax_co_a320, :minimum_size => 11).count.must_equal 1_114
|
56
|
+
|
57
|
+
### `:strict` example
|
58
|
+
|
59
|
+
Also from the tests...
|
60
|
+
|
61
|
+
# make some fixtures
|
62
|
+
1_000.times { FactoryGirl.create(:lax) }
|
63
|
+
100.times { FactoryGirl.create(:lax_sfo) }
|
64
|
+
10.times { FactoryGirl.create(:lax_sfo_co) }
|
65
|
+
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
66
|
+
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
67
|
+
|
68
|
+
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
69
|
+
priority = [:origin, :dest, :airline, :plane]
|
70
|
+
# discard nothing
|
71
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
72
|
+
# (force) discard plane, then (force) discard airline
|
73
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 114
|
74
|
+
# (force) discard plane, then (force) discard airline, then (force) discard dest
|
75
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 115).count.must_equal 1_114
|
76
|
+
|
77
|
+
priority = [:plane, :airline, :dest, :origin]
|
78
|
+
# discard nothing
|
79
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
80
|
+
# (force) discard origin, then (force) discard dest, then (force) discard airline
|
81
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 4
|
82
|
+
# gives up!
|
83
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 5).count.must_equal 0
|
84
|
+
|
85
|
+
## Copyright
|
86
|
+
|
87
|
+
Copyright (c) 2012 Brighter Planet, Inc.
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
|
4
|
+
require 'rake'
|
5
|
+
require 'rake/testtask'
|
6
|
+
Rake::TestTask.new(:test) do |test|
|
7
|
+
test.libs << 'lib' << 'test'
|
8
|
+
test.pattern = 'test/**/test_*.rb'
|
9
|
+
test.verbose = true
|
10
|
+
end
|
11
|
+
|
12
|
+
task :default => :test
|
13
|
+
|
14
|
+
require 'yard'
|
15
|
+
YARD::Rake::YardocTask.new
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/cohort_analysis/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner", "Ian Hough"]
|
6
|
+
gem.email = ["seamus@abshere.net", 'andy@rossmeissl.net', 'dkastner@gmail.com', 'ijhough@gmail.com']
|
7
|
+
desc = %q{Lets you do cohort analysis based on two strategies: "big", which discards characteristics for the maximum cohort result, and "strict", which discards characteristics in order until a minimum cohort size is reached.}
|
8
|
+
gem.description = desc
|
9
|
+
gem.summary = desc
|
10
|
+
gem.homepage = "https://github.com/seamusabshere/cohort_analysis"
|
11
|
+
|
12
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
13
|
+
gem.files = `git ls-files`.split("\n")
|
14
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
gem.name = "cohort_analysis"
|
16
|
+
gem.require_paths = ["lib"]
|
17
|
+
gem.version = CohortAnalysis::VERSION
|
18
|
+
|
19
|
+
gem.add_runtime_dependency "activesupport", '>=3'
|
20
|
+
gem.add_runtime_dependency "activerecord", '>=3'
|
21
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'arel'
|
2
|
+
require 'active_record'
|
3
|
+
require 'active_support/core_ext'
|
4
|
+
|
5
|
+
require 'cohort_analysis/strategy'
|
6
|
+
require 'cohort_analysis/strategy/big'
|
7
|
+
require 'cohort_analysis/strategy/strict'
|
8
|
+
|
9
|
+
require 'cohort_analysis/active_record_base_class_methods'
|
10
|
+
require 'cohort_analysis/active_record_relation_instance_methods'
|
11
|
+
require 'cohort_analysis/arel_visitors_visitor_instance_methods'
|
12
|
+
|
13
|
+
module CohortAnalysis
|
14
|
+
def self.conditions_for(characteristics)
|
15
|
+
case characteristics
|
16
|
+
when ::Array
|
17
|
+
characteristics.inject({}) { |memo, (k, v)| memo[k] = v; memo }
|
18
|
+
else
|
19
|
+
characteristics
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
ActiveRecord::Base.extend CohortAnalysis::ActiveRecordBaseClassMethods
|
25
|
+
ActiveRecord::Relation.send :include, CohortAnalysis::ActiveRecordRelationInstanceMethods
|
26
|
+
Arel::Visitors::Visitor.send :include, CohortAnalysis::ArelVisitorsVisitorInstanceMethods
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module CohortAnalysis
|
2
|
+
module ActiveRecordRelationInstanceMethods
|
3
|
+
def cohort(characteristics, options = {})
|
4
|
+
where cohort_constraint(characteristics, options)
|
5
|
+
end
|
6
|
+
|
7
|
+
def cohort_constraint(characteristics, options = {})
|
8
|
+
options = options.symbolize_keys
|
9
|
+
strategy = (options.delete(:strategy) || :big).to_s.camelcase
|
10
|
+
Strategy.const_get(strategy).new(self, characteristics, options)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module CohortAnalysis
|
2
|
+
class Strategy < ::Arel::Nodes::Node
|
3
|
+
IMPOSSIBLE = '1 = 2'
|
4
|
+
|
5
|
+
def initialize(active_record_relation, characteristics, options = {})
|
6
|
+
@active_record_relation = active_record_relation
|
7
|
+
@characteristics = characteristics
|
8
|
+
@reduced_characteristics = characteristics.dup
|
9
|
+
@minimum_size = options.fetch(:minimum_size, 1)
|
10
|
+
end
|
11
|
+
|
12
|
+
def expr
|
13
|
+
@expr ||= resolve!
|
14
|
+
end
|
15
|
+
alias :to_sql :expr
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
# Recursively look for a scope that meets the characteristics and is at least <tt>minimum_size</tt>.
|
20
|
+
def resolve!
|
21
|
+
if @reduced_characteristics.empty?
|
22
|
+
IMPOSSIBLE
|
23
|
+
elsif (current = @active_record_relation.where(CohortAnalysis.conditions_for(@reduced_characteristics))).count >= @minimum_size
|
24
|
+
current.constraints.inject(:and).to_sql
|
25
|
+
else
|
26
|
+
reduce!
|
27
|
+
resolve!
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module CohortAnalysis
|
2
|
+
class Strategy
|
3
|
+
class Big < Strategy
|
4
|
+
# Reduce characteristics by removing them one by one and counting the results.
|
5
|
+
#
|
6
|
+
# The characteristic whose removal leads to the highest record count is removed from the overall characteristic set.
|
7
|
+
def reduce!
|
8
|
+
@reduced_characteristics = if @reduced_characteristics.keys.length < 2
|
9
|
+
{}
|
10
|
+
else
|
11
|
+
most_restrictive_characteristic = @reduced_characteristics.keys.max_by do |key|
|
12
|
+
conditions = CohortAnalysis.conditions_for @reduced_characteristics.except(key)
|
13
|
+
@active_record_relation.where(conditions).count
|
14
|
+
end
|
15
|
+
@reduced_characteristics.except most_restrictive_characteristic
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module CohortAnalysis
|
2
|
+
class Strategy
|
3
|
+
class Strict < Strategy
|
4
|
+
def initialize(active_record_relation, characteristics, options = {})
|
5
|
+
super
|
6
|
+
if priority = options[:priority]
|
7
|
+
@reverse_priority = priority.reverse
|
8
|
+
elsif ::RUBY_VERSION < '1.9' and not characteristics.is_a?(::ActiveSupport::OrderedHash)
|
9
|
+
raise ::ArgumentError, "[cohort_analysis] Since Ruby 1.8 hashes are not ordered, please use :priority => [...] or pass characteristics as an ActiveSupport::OrderedHash (not recommended)"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Reduce characteristics by removing the least important one.
|
14
|
+
def reduce!
|
15
|
+
least_important_key = if @reverse_priority
|
16
|
+
@reverse_priority.detect do |k|
|
17
|
+
@reduced_characteristics.has_key? k
|
18
|
+
end
|
19
|
+
else
|
20
|
+
@reduced_characteristics.keys.last
|
21
|
+
end
|
22
|
+
if least_important_key
|
23
|
+
@reduced_characteristics.delete least_important_key
|
24
|
+
else
|
25
|
+
raise ::RuntimeError, "[cohort_analysis] Priority improperly specified"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
|
4
|
+
require 'minitest/spec'
|
5
|
+
require 'minitest/autorun'
|
6
|
+
require 'minitest/reporters'
|
7
|
+
MiniTest::Unit.runner = MiniTest::SuiteRunner.new
|
8
|
+
MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
9
|
+
|
10
|
+
require 'factory_girl'
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
require 'cohort_analysis'
|
15
|
+
|
16
|
+
if ::Bundler.definition.specs['ruby-debug19'].first or ::Bundler.definition.specs['ruby-debug'].first
|
17
|
+
require 'ruby-debug'
|
18
|
+
end
|
19
|
+
|
20
|
+
# require 'logger'
|
21
|
+
# ActiveRecord::Base.logger = Logger.new($stdout)
|
22
|
+
|
23
|
+
ActiveRecord::Base.establish_connection(
|
24
|
+
'adapter' => 'mysql2',
|
25
|
+
'database' => 'test_cohort_analysis',
|
26
|
+
'username' => 'root',
|
27
|
+
'password' => 'password'
|
28
|
+
)
|
@@ -0,0 +1,222 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
c = ActiveRecord::Base.connection
|
4
|
+
c.create_table 'flights', :force => true do |t|
|
5
|
+
t.string 'origin'
|
6
|
+
t.string 'dest'
|
7
|
+
t.string 'airline'
|
8
|
+
t.string 'plane'
|
9
|
+
end
|
10
|
+
|
11
|
+
class Flight < ActiveRecord::Base
|
12
|
+
end
|
13
|
+
|
14
|
+
FactoryGirl.define do
|
15
|
+
factory :lax, :class => Flight do
|
16
|
+
origin 'LAX'
|
17
|
+
end
|
18
|
+
factory :lax_sfo, :class => Flight do
|
19
|
+
origin 'LAX'
|
20
|
+
dest 'SFO'
|
21
|
+
end
|
22
|
+
factory :lax_sfo_co, :class => Flight do
|
23
|
+
origin 'LAX'
|
24
|
+
dest 'SFO'
|
25
|
+
airline 'Continental'
|
26
|
+
end
|
27
|
+
factory :lax_sfo_a320, :class => Flight do
|
28
|
+
origin 'LAX'
|
29
|
+
dest 'SFO'
|
30
|
+
plane 'A320'
|
31
|
+
end
|
32
|
+
factory :lax_sfo_aa_a320, :class => Flight do
|
33
|
+
origin 'LAX'
|
34
|
+
dest 'SFO'
|
35
|
+
airline 'American'
|
36
|
+
plane 'A320'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe CohortAnalysis do
|
41
|
+
before do
|
42
|
+
Flight.delete_all
|
43
|
+
end
|
44
|
+
|
45
|
+
describe 'ActiveRecordBaseClassMethods' do
|
46
|
+
describe :cohort do
|
47
|
+
it "defaults to :minimum_size => 1" do
|
48
|
+
FactoryGirl.create(:lax)
|
49
|
+
Flight.cohort({:origin => 'LAX'}).count.must_equal 1
|
50
|
+
Flight.cohort({:origin => 'LAX'}, :minimum_size => 2).count.must_equal 0
|
51
|
+
end
|
52
|
+
|
53
|
+
it "doesn't discard characteristics if it doesn't need to" do
|
54
|
+
FactoryGirl.create(:lax)
|
55
|
+
FactoryGirl.create(:lax_sfo)
|
56
|
+
Flight.cohort(:origin => 'LAX', :dest => 'SFO').count.must_equal 1
|
57
|
+
end
|
58
|
+
|
59
|
+
it "discards characteristics until it can fulfil the minimum size" do
|
60
|
+
FactoryGirl.create(:lax)
|
61
|
+
FactoryGirl.create(:lax_sfo)
|
62
|
+
drops_dest = Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :minimum_size => 2)
|
63
|
+
drops_dest.count.must_equal 2
|
64
|
+
drops_dest.one? { |flight| flight.dest != 'SFO' }.must_equal true
|
65
|
+
end
|
66
|
+
|
67
|
+
it "defaults to :strategy => :big" do
|
68
|
+
FactoryGirl.create(:lax)
|
69
|
+
Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :strategy => :big).count.must_equal Flight.cohort(:origin => 'LAX', :dest => 'SFO').count
|
70
|
+
Flight.cohort({:dest => 'SFO', :origin => 'LAX'}, :strategy => :big).count.must_equal Flight.cohort(:dest => 'SFO', :origin => 'LAX').count
|
71
|
+
end
|
72
|
+
|
73
|
+
it "offers :strategy => :strict" do
|
74
|
+
FactoryGirl.create(:lax)
|
75
|
+
if RUBY_VERSION >= '1.9'
|
76
|
+
# native ordered hashes
|
77
|
+
Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :strategy => :strict).count.must_equal 1
|
78
|
+
Flight.cohort({:dest => 'SFO', :origin => 'LAX'}, :strategy => :strict).count.must_equal 0
|
79
|
+
else
|
80
|
+
# activesupport provides ActiveSupport::OrderedHash
|
81
|
+
origin_important = ActiveSupport::OrderedHash.new
|
82
|
+
origin_important[:origin] = 'LAX'
|
83
|
+
origin_important[:dest] = 'SFO'
|
84
|
+
dest_important = ActiveSupport::OrderedHash.new
|
85
|
+
dest_important[:dest] = 'SFO'
|
86
|
+
dest_important[:origin] = 'LAX'
|
87
|
+
Flight.cohort(origin_important, :strategy => :strict).count.must_equal 1
|
88
|
+
Flight.cohort(dest_important, :strategy => :strict).count.must_equal 0
|
89
|
+
|
90
|
+
lambda {
|
91
|
+
Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :strategy => :strict).count
|
92
|
+
}.must_raise(ArgumentError, 'hash')
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
it "lets you pick :priority of keys when using :strict strategy" do
|
97
|
+
FactoryGirl.create(:lax)
|
98
|
+
Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :strategy => :strict, :priority => [:origin, :dest]).count.must_equal 1
|
99
|
+
Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :strategy => :strict, :priority => [:dest, :origin]).count.must_equal 0
|
100
|
+
Flight.cohort({:dest => 'SFO', :origin => 'LAX'}, :strategy => :strict, :priority => [:origin, :dest]).count.must_equal 1
|
101
|
+
Flight.cohort({:dest => 'SFO', :origin => 'LAX'}, :strategy => :strict, :priority => [:dest, :origin]).count.must_equal 0
|
102
|
+
end
|
103
|
+
|
104
|
+
it "lets you play with more than 1 or 2 characteristics" do
|
105
|
+
ActiveRecord::Base.silence do
|
106
|
+
# make some fixtures
|
107
|
+
1_000.times { FactoryGirl.create(:lax) }
|
108
|
+
100.times { FactoryGirl.create(:lax_sfo) }
|
109
|
+
10.times { FactoryGirl.create(:lax_sfo_co) }
|
110
|
+
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
111
|
+
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
112
|
+
end
|
113
|
+
Flight.count.must_equal 1_114 # sanity check
|
114
|
+
|
115
|
+
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
116
|
+
# don't discard anything
|
117
|
+
Flight.cohort(lax_sfo_aa_a320).count.must_equal 1
|
118
|
+
# discard airline
|
119
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 2).count.must_equal 4
|
120
|
+
# discard plane and airline
|
121
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 5).count.must_equal 114
|
122
|
+
# discard plane and airline and dest
|
123
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 115).count.must_equal 1_114
|
124
|
+
|
125
|
+
lax_sfo_a320 = {:origin => 'LAX', :dest => 'SFO', :plane => 'A320'}
|
126
|
+
# don't discard anything
|
127
|
+
Flight.cohort(lax_sfo_a320).count.must_equal 4
|
128
|
+
# discard plane
|
129
|
+
Flight.cohort(lax_sfo_a320, :minimum_size => 5).count.must_equal 114
|
130
|
+
# discard plane and dest
|
131
|
+
Flight.cohort(lax_sfo_a320, :minimum_size => 115).count.must_equal 1_114
|
132
|
+
|
133
|
+
# off the rails here a bit
|
134
|
+
woah_lax_co_a320 = {:origin => 'LAX', :airline => 'Continental', :plane => 'A320'}
|
135
|
+
# discard plane
|
136
|
+
Flight.cohort(woah_lax_co_a320).count.must_equal 10
|
137
|
+
# discard plane and airline
|
138
|
+
Flight.cohort(woah_lax_co_a320, :minimum_size => 11).count.must_equal 1_114
|
139
|
+
end
|
140
|
+
|
141
|
+
it "lets you play with multiple characteristics in :strategy => :strict" do
|
142
|
+
ActiveRecord::Base.silence do
|
143
|
+
# make some fixtures
|
144
|
+
1_000.times { FactoryGirl.create(:lax) }
|
145
|
+
100.times { FactoryGirl.create(:lax_sfo) }
|
146
|
+
10.times { FactoryGirl.create(:lax_sfo_co) }
|
147
|
+
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
148
|
+
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
149
|
+
end
|
150
|
+
|
151
|
+
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
152
|
+
priority = [:origin, :dest, :airline, :plane]
|
153
|
+
# discard nothing
|
154
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
155
|
+
# (force) discard plane, then (force) discard airline
|
156
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 114
|
157
|
+
# (force) discard plane, then (force) discard airline, then (force) discard dest
|
158
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 115).count.must_equal 1_114
|
159
|
+
|
160
|
+
priority = [:plane, :airline, :dest, :origin]
|
161
|
+
# discard nothing
|
162
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
163
|
+
# (force) discard origin, then (force) discard dest, then (force) discard airline
|
164
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 4
|
165
|
+
# gives up!
|
166
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 5).count.must_equal 0
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
describe :cohort_constraint do
|
171
|
+
it "can be used like other ARel constraints" do
|
172
|
+
FactoryGirl.create(:lax)
|
173
|
+
Flight.where(Flight.cohort_constraint(:origin => 'LAX')).count.must_equal 1
|
174
|
+
Flight.where(Flight.cohort_constraint({:origin => 'LAX'}, :minimum_size => 2)).count.must_equal 0
|
175
|
+
end
|
176
|
+
|
177
|
+
it "can be combined with other ARel constraints" do
|
178
|
+
FactoryGirl.create(:lax)
|
179
|
+
FactoryGirl.create(:lax_sfo)
|
180
|
+
origin_lax_constraint = Flight.cohort_constraint(:origin => 'LAX')
|
181
|
+
dest_sfo_constraint = Flight.arel_table[:dest].eq('SFO')
|
182
|
+
Flight.where(dest_sfo_constraint.and(origin_lax_constraint)).count.must_equal 1
|
183
|
+
Flight.where(dest_sfo_constraint.or(origin_lax_constraint)).count.must_equal 2
|
184
|
+
Flight.where(origin_lax_constraint.and(dest_sfo_constraint)).count.must_equal 1
|
185
|
+
Flight.where(origin_lax_constraint.or(dest_sfo_constraint)).count.must_equal 2
|
186
|
+
end
|
187
|
+
|
188
|
+
# Caution!
|
189
|
+
it "is NOT smart enough to enforce minimum size when composed" do
|
190
|
+
FactoryGirl.create(:lax)
|
191
|
+
FactoryGirl.create(:lax_sfo)
|
192
|
+
origin_lax_constraint = Flight.cohort_constraint({:origin => 'LAX'}, :minimum_size => 2)
|
193
|
+
dest_sfo_constraint = Flight.arel_table[:dest].eq('SFO')
|
194
|
+
Flight.where(dest_sfo_constraint.and(origin_lax_constraint)).count.must_equal 1 # see how minimum_size is ignored?
|
195
|
+
Flight.where(origin_lax_constraint.and(dest_sfo_constraint)).count.must_equal 1 # it's because the cohort constraint resolves itself before allowing the ARel visitor to continue
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
describe 'ActiveRecordRelationInstanceMethods' do
|
201
|
+
describe :cohort do
|
202
|
+
it "is the proper way to compose when other ARel constraints are present" do
|
203
|
+
FactoryGirl.create(:lax)
|
204
|
+
FactoryGirl.create(:lax_sfo)
|
205
|
+
Flight.where(:dest => 'SFO').cohort(:origin => 'LAX').count.must_equal 1
|
206
|
+
Flight.where(:dest => 'SFO').cohort({:origin => 'LAX'}, :minimum_size => 2).count.must_equal 0
|
207
|
+
end
|
208
|
+
end
|
209
|
+
describe :cohort_constraint do
|
210
|
+
it "can also be used (carefully) to compose with other ARel constraints" do
|
211
|
+
FactoryGirl.create(:lax)
|
212
|
+
FactoryGirl.create(:lax_sfo)
|
213
|
+
dest_sfo_relation = Flight.where(:dest => 'SFO')
|
214
|
+
origin_lax_constraint_from_dest_sfo_relation = dest_sfo_relation.cohort_constraint(:origin => 'LAX')
|
215
|
+
Flight.where(origin_lax_constraint_from_dest_sfo_relation).count.must_equal 1
|
216
|
+
dest_sfo_relation = Flight.where(:dest => 'SFO')
|
217
|
+
origin_lax_constraint_from_dest_sfo_relation = dest_sfo_relation.cohort_constraint({:origin => 'LAX'}, :minimum_size => 2)
|
218
|
+
Flight.where(origin_lax_constraint_from_dest_sfo_relation).count.must_equal 0
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
metadata
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cohort_analysis
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 15
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 4
|
9
|
+
- 0
|
10
|
+
version: 0.4.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Seamus Abshere
|
14
|
+
- Andy Rossmeissl
|
15
|
+
- Derek Kastner
|
16
|
+
- Ian Hough
|
17
|
+
autorequire:
|
18
|
+
bindir: bin
|
19
|
+
cert_chain: []
|
20
|
+
|
21
|
+
date: 2012-02-29 00:00:00 Z
|
22
|
+
dependencies:
|
23
|
+
- !ruby/object:Gem::Dependency
|
24
|
+
name: activesupport
|
25
|
+
prerelease: false
|
26
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
hash: 5
|
32
|
+
segments:
|
33
|
+
- 3
|
34
|
+
version: "3"
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: activerecord
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 5
|
46
|
+
segments:
|
47
|
+
- 3
|
48
|
+
version: "3"
|
49
|
+
type: :runtime
|
50
|
+
version_requirements: *id002
|
51
|
+
description: "Lets you do cohort analysis based on two strategies: \"big\", which discards characteristics for the maximum cohort result, and \"strict\", which discards characteristics in order until a minimum cohort size is reached."
|
52
|
+
email:
|
53
|
+
- seamus@abshere.net
|
54
|
+
- andy@rossmeissl.net
|
55
|
+
- dkastner@gmail.com
|
56
|
+
- ijhough@gmail.com
|
57
|
+
executables: []
|
58
|
+
|
59
|
+
extensions: []
|
60
|
+
|
61
|
+
extra_rdoc_files: []
|
62
|
+
|
63
|
+
files:
|
64
|
+
- .document
|
65
|
+
- .gitignore
|
66
|
+
- CHANGELOG
|
67
|
+
- Gemfile
|
68
|
+
- LICENSE
|
69
|
+
- README.markdown
|
70
|
+
- Rakefile
|
71
|
+
- cohort_analysis.gemspec
|
72
|
+
- lib/cohort_analysis.rb
|
73
|
+
- lib/cohort_analysis/active_record_base_class_methods.rb
|
74
|
+
- lib/cohort_analysis/active_record_relation_instance_methods.rb
|
75
|
+
- lib/cohort_analysis/arel_visitors_visitor_instance_methods.rb
|
76
|
+
- lib/cohort_analysis/strategy.rb
|
77
|
+
- lib/cohort_analysis/strategy/big.rb
|
78
|
+
- lib/cohort_analysis/strategy/strict.rb
|
79
|
+
- lib/cohort_analysis/version.rb
|
80
|
+
- test/helper.rb
|
81
|
+
- test/test_cohort_analysis.rb
|
82
|
+
homepage: https://github.com/seamusabshere/cohort_analysis
|
83
|
+
licenses: []
|
84
|
+
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options: []
|
87
|
+
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
hash: 3
|
96
|
+
segments:
|
97
|
+
- 0
|
98
|
+
version: "0"
|
99
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
none: false
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
hash: 3
|
105
|
+
segments:
|
106
|
+
- 0
|
107
|
+
version: "0"
|
108
|
+
requirements: []
|
109
|
+
|
110
|
+
rubyforge_project:
|
111
|
+
rubygems_version: 1.8.15
|
112
|
+
signing_key:
|
113
|
+
specification_version: 3
|
114
|
+
summary: "Lets you do cohort analysis based on two strategies: \"big\", which discards characteristics for the maximum cohort result, and \"strict\", which discards characteristics in order until a minimum cohort size is reached."
|
115
|
+
test_files:
|
116
|
+
- test/helper.rb
|
117
|
+
- test/test_cohort_analysis.rb
|
118
|
+
has_rdoc:
|