cohort_analysis 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +26 -0
- data/CHANGELOG +19 -0
- data/Gemfile +12 -0
- data/LICENSE +20 -0
- data/README.markdown +87 -0
- data/Rakefile +15 -0
- data/cohort_analysis.gemspec +21 -0
- data/lib/cohort_analysis.rb +26 -0
- data/lib/cohort_analysis/active_record_base_class_methods.rb +11 -0
- data/lib/cohort_analysis/active_record_relation_instance_methods.rb +13 -0
- data/lib/cohort_analysis/arel_visitors_visitor_instance_methods.rb +7 -0
- data/lib/cohort_analysis/strategy.rb +31 -0
- data/lib/cohort_analysis/strategy/big.rb +20 -0
- data/lib/cohort_analysis/strategy/strict.rb +30 -0
- data/lib/cohort_analysis/version.rb +3 -0
- data/test/helper.rb +28 -0
- data/test/test_cohort_analysis.rb +222 -0
- metadata +118 -0
data/.document
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
## MAC OS
|
2
|
+
.DS_Store
|
3
|
+
|
4
|
+
## TEXTMATE
|
5
|
+
*.tmproj
|
6
|
+
tmtags
|
7
|
+
|
8
|
+
## EMACS
|
9
|
+
*~
|
10
|
+
\#*
|
11
|
+
.\#*
|
12
|
+
|
13
|
+
## VIM
|
14
|
+
*.swp
|
15
|
+
|
16
|
+
## PROJECT::GENERAL
|
17
|
+
coverage
|
18
|
+
rdoc
|
19
|
+
pkg
|
20
|
+
|
21
|
+
## PROJECT::SPECIFIC
|
22
|
+
Gemfile.lock
|
23
|
+
test/test.log
|
24
|
+
*.gem
|
25
|
+
.yardoc
|
26
|
+
doc/
|
data/CHANGELOG
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
0.4.0 / 2012-02-28
|
2
|
+
|
3
|
+
* renamed to cohort_analysis
|
4
|
+
|
5
|
+
* new, simplified syntax - see README.markdown
|
6
|
+
|
7
|
+
0.3.0 / 2012-02-27
|
8
|
+
|
9
|
+
* Now my_strategy = Person.strategy({:favorite_color => 'heliotrope', :birthdate => @date_range}, :importance => [:birthdate, :favorite_color]) will return a Arel::Nodes::Node which can be combined like Person.where(my_strategy.and("gender = 'male")) - it does NOT return a "scope" like before.
|
10
|
+
|
11
|
+
* Refactor to take advantage of ARel.
|
12
|
+
|
13
|
+
0.2.0
|
14
|
+
|
15
|
+
* No longer "flattens" or "sanitizes" characteristics by turning records into integer IDs, etc. You should pass in exactly what you would pass into a normal ActiveRecord relation/scope.
|
16
|
+
|
17
|
+
0.1.0
|
18
|
+
|
19
|
+
* First version!
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Seamus Abshere
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# cohort_analysis
|
2
|
+
|
3
|
+
Lets you do cohort analysis based on two strategies: "big", which discards characteristics for the maximum cohort result, and "strict", which discards characteristics in order until a minimum cohort size is reached.
|
4
|
+
|
5
|
+
Replaces [`cohort_scope`](https://github.com/seamusabshere/cohort_scope).
|
6
|
+
|
7
|
+
## Where it's used
|
8
|
+
|
9
|
+
* [Brighter Planet CM1 Impact Estimate web service](http://impact.brighterplanet.com)
|
10
|
+
* [Flight environmental impact model](https://github.com/brighterplanet/flight)
|
11
|
+
|
12
|
+
## Strategies
|
13
|
+
|
14
|
+
<dl>
|
15
|
+
<dt><code>:big</code></dt>
|
16
|
+
<dd>Default. Iteratively discards the characteristic that is most "restrictive," yielding the largest possible cohort. Note that it stops discarding after the minimum cohort size is reached.</dd>
|
17
|
+
<dt><code>:strict</code></dt>
|
18
|
+
<dd>Discards characteristics according to <code>:priority</code>.</dd>
|
19
|
+
</dl>
|
20
|
+
|
21
|
+
### `:big` example
|
22
|
+
|
23
|
+
This is straight from the tests:
|
24
|
+
|
25
|
+
# make some fixtures
|
26
|
+
1_000.times { FactoryGirl.create(:lax) }
|
27
|
+
100.times { FactoryGirl.create(:lax_sfo) }
|
28
|
+
10.times { FactoryGirl.create(:lax_sfo_co) }
|
29
|
+
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
30
|
+
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
31
|
+
|
32
|
+
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
33
|
+
# don't discard anything
|
34
|
+
Flight.cohort(lax_sfo_aa_a320).count.must_equal 1
|
35
|
+
# discard airline
|
36
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 2).count.must_equal 4
|
37
|
+
# discard plane and airline
|
38
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 5).count.must_equal 114
|
39
|
+
# discard plane and airline and dest
|
40
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 115).count.must_equal 1_114
|
41
|
+
|
42
|
+
lax_sfo_a320 = {:origin => 'LAX', :dest => 'SFO', :plane => 'A320'}
|
43
|
+
# don't discard anything
|
44
|
+
Flight.cohort(lax_sfo_a320).count.must_equal 4
|
45
|
+
# discard plane
|
46
|
+
Flight.cohort(lax_sfo_a320, :minimum_size => 5).count.must_equal 114
|
47
|
+
# discard plane and dest
|
48
|
+
Flight.cohort(lax_sfo_a320, :minimum_size => 115).count.must_equal 1_114
|
49
|
+
|
50
|
+
# off the rails here a bit
|
51
|
+
woah_lax_co_a320 = {:origin => 'LAX', :airline => 'Continental', :plane => 'A320'}
|
52
|
+
# discard plane
|
53
|
+
Flight.cohort(woah_lax_co_a320).count.must_equal 10
|
54
|
+
# discard plane and airline
|
55
|
+
Flight.cohort(woah_lax_co_a320, :minimum_size => 11).count.must_equal 1_114
|
56
|
+
|
57
|
+
### `:strict` example
|
58
|
+
|
59
|
+
Also from the tests...
|
60
|
+
|
61
|
+
# make some fixtures
|
62
|
+
1_000.times { FactoryGirl.create(:lax) }
|
63
|
+
100.times { FactoryGirl.create(:lax_sfo) }
|
64
|
+
10.times { FactoryGirl.create(:lax_sfo_co) }
|
65
|
+
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
66
|
+
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
67
|
+
|
68
|
+
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
69
|
+
priority = [:origin, :dest, :airline, :plane]
|
70
|
+
# discard nothing
|
71
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
72
|
+
# (force) discard plane, then (force) discard airline
|
73
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 114
|
74
|
+
# (force) discard plane, then (force) discard airline, then (force) discard dest
|
75
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 115).count.must_equal 1_114
|
76
|
+
|
77
|
+
priority = [:plane, :airline, :dest, :origin]
|
78
|
+
# discard nothing
|
79
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
80
|
+
# (force) discard origin, then (force) discard dest, then (force) discard airline
|
81
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 4
|
82
|
+
# gives up!
|
83
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 5).count.must_equal 0
|
84
|
+
|
85
|
+
## Copyright
|
86
|
+
|
87
|
+
Copyright (c) 2012 Brighter Planet, Inc.
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
|
4
|
+
require 'rake'
|
5
|
+
require 'rake/testtask'
|
6
|
+
Rake::TestTask.new(:test) do |test|
|
7
|
+
test.libs << 'lib' << 'test'
|
8
|
+
test.pattern = 'test/**/test_*.rb'
|
9
|
+
test.verbose = true
|
10
|
+
end
|
11
|
+
|
12
|
+
task :default => :test
|
13
|
+
|
14
|
+
require 'yard'
|
15
|
+
YARD::Rake::YardocTask.new
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/cohort_analysis/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner", "Ian Hough"]
|
6
|
+
gem.email = ["seamus@abshere.net", 'andy@rossmeissl.net', 'dkastner@gmail.com', 'ijhough@gmail.com']
|
7
|
+
desc = %q{Lets you do cohort analysis based on two strategies: "big", which discards characteristics for the maximum cohort result, and "strict", which discards characteristics in order until a minimum cohort size is reached.}
|
8
|
+
gem.description = desc
|
9
|
+
gem.summary = desc
|
10
|
+
gem.homepage = "https://github.com/seamusabshere/cohort_analysis"
|
11
|
+
|
12
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
13
|
+
gem.files = `git ls-files`.split("\n")
|
14
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
gem.name = "cohort_analysis"
|
16
|
+
gem.require_paths = ["lib"]
|
17
|
+
gem.version = CohortAnalysis::VERSION
|
18
|
+
|
19
|
+
gem.add_runtime_dependency "activesupport", '>=3'
|
20
|
+
gem.add_runtime_dependency "activerecord", '>=3'
|
21
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'arel'
|
2
|
+
require 'active_record'
|
3
|
+
require 'active_support/core_ext'
|
4
|
+
|
5
|
+
require 'cohort_analysis/strategy'
|
6
|
+
require 'cohort_analysis/strategy/big'
|
7
|
+
require 'cohort_analysis/strategy/strict'
|
8
|
+
|
9
|
+
require 'cohort_analysis/active_record_base_class_methods'
|
10
|
+
require 'cohort_analysis/active_record_relation_instance_methods'
|
11
|
+
require 'cohort_analysis/arel_visitors_visitor_instance_methods'
|
12
|
+
|
13
|
+
module CohortAnalysis
|
14
|
+
def self.conditions_for(characteristics)
|
15
|
+
case characteristics
|
16
|
+
when ::Array
|
17
|
+
characteristics.inject({}) { |memo, (k, v)| memo[k] = v; memo }
|
18
|
+
else
|
19
|
+
characteristics
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
ActiveRecord::Base.extend CohortAnalysis::ActiveRecordBaseClassMethods
|
25
|
+
ActiveRecord::Relation.send :include, CohortAnalysis::ActiveRecordRelationInstanceMethods
|
26
|
+
Arel::Visitors::Visitor.send :include, CohortAnalysis::ArelVisitorsVisitorInstanceMethods
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module CohortAnalysis
|
2
|
+
module ActiveRecordRelationInstanceMethods
|
3
|
+
def cohort(characteristics, options = {})
|
4
|
+
where cohort_constraint(characteristics, options)
|
5
|
+
end
|
6
|
+
|
7
|
+
def cohort_constraint(characteristics, options = {})
|
8
|
+
options = options.symbolize_keys
|
9
|
+
strategy = (options.delete(:strategy) || :big).to_s.camelcase
|
10
|
+
Strategy.const_get(strategy).new(self, characteristics, options)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module CohortAnalysis
|
2
|
+
class Strategy < ::Arel::Nodes::Node
|
3
|
+
IMPOSSIBLE = '1 = 2'
|
4
|
+
|
5
|
+
def initialize(active_record_relation, characteristics, options = {})
|
6
|
+
@active_record_relation = active_record_relation
|
7
|
+
@characteristics = characteristics
|
8
|
+
@reduced_characteristics = characteristics.dup
|
9
|
+
@minimum_size = options.fetch(:minimum_size, 1)
|
10
|
+
end
|
11
|
+
|
12
|
+
def expr
|
13
|
+
@expr ||= resolve!
|
14
|
+
end
|
15
|
+
alias :to_sql :expr
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
# Recursively look for a scope that meets the characteristics and is at least <tt>minimum_size</tt>.
|
20
|
+
def resolve!
|
21
|
+
if @reduced_characteristics.empty?
|
22
|
+
IMPOSSIBLE
|
23
|
+
elsif (current = @active_record_relation.where(CohortAnalysis.conditions_for(@reduced_characteristics))).count >= @minimum_size
|
24
|
+
current.constraints.inject(:and).to_sql
|
25
|
+
else
|
26
|
+
reduce!
|
27
|
+
resolve!
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module CohortAnalysis
|
2
|
+
class Strategy
|
3
|
+
class Big < Strategy
|
4
|
+
# Reduce characteristics by removing them one by one and counting the results.
|
5
|
+
#
|
6
|
+
# The characteristic whose removal leads to the highest record count is removed from the overall characteristic set.
|
7
|
+
def reduce!
|
8
|
+
@reduced_characteristics = if @reduced_characteristics.keys.length < 2
|
9
|
+
{}
|
10
|
+
else
|
11
|
+
most_restrictive_characteristic = @reduced_characteristics.keys.max_by do |key|
|
12
|
+
conditions = CohortAnalysis.conditions_for @reduced_characteristics.except(key)
|
13
|
+
@active_record_relation.where(conditions).count
|
14
|
+
end
|
15
|
+
@reduced_characteristics.except most_restrictive_characteristic
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module CohortAnalysis
|
2
|
+
class Strategy
|
3
|
+
class Strict < Strategy
|
4
|
+
def initialize(active_record_relation, characteristics, options = {})
|
5
|
+
super
|
6
|
+
if priority = options[:priority]
|
7
|
+
@reverse_priority = priority.reverse
|
8
|
+
elsif ::RUBY_VERSION < '1.9' and not characteristics.is_a?(::ActiveSupport::OrderedHash)
|
9
|
+
raise ::ArgumentError, "[cohort_analysis] Since Ruby 1.8 hashes are not ordered, please use :priority => [...] or pass characteristics as an ActiveSupport::OrderedHash (not recommended)"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Reduce characteristics by removing the least important one.
|
14
|
+
def reduce!
|
15
|
+
least_important_key = if @reverse_priority
|
16
|
+
@reverse_priority.detect do |k|
|
17
|
+
@reduced_characteristics.has_key? k
|
18
|
+
end
|
19
|
+
else
|
20
|
+
@reduced_characteristics.keys.last
|
21
|
+
end
|
22
|
+
if least_important_key
|
23
|
+
@reduced_characteristics.delete least_important_key
|
24
|
+
else
|
25
|
+
raise ::RuntimeError, "[cohort_analysis] Priority improperly specified"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
|
4
|
+
require 'minitest/spec'
|
5
|
+
require 'minitest/autorun'
|
6
|
+
require 'minitest/reporters'
|
7
|
+
MiniTest::Unit.runner = MiniTest::SuiteRunner.new
|
8
|
+
MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
9
|
+
|
10
|
+
require 'factory_girl'
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
require 'cohort_analysis'
|
15
|
+
|
16
|
+
if ::Bundler.definition.specs['ruby-debug19'].first or ::Bundler.definition.specs['ruby-debug'].first
|
17
|
+
require 'ruby-debug'
|
18
|
+
end
|
19
|
+
|
20
|
+
# require 'logger'
|
21
|
+
# ActiveRecord::Base.logger = Logger.new($stdout)
|
22
|
+
|
23
|
+
ActiveRecord::Base.establish_connection(
|
24
|
+
'adapter' => 'mysql2',
|
25
|
+
'database' => 'test_cohort_analysis',
|
26
|
+
'username' => 'root',
|
27
|
+
'password' => 'password'
|
28
|
+
)
|
@@ -0,0 +1,222 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
c = ActiveRecord::Base.connection
|
4
|
+
c.create_table 'flights', :force => true do |t|
|
5
|
+
t.string 'origin'
|
6
|
+
t.string 'dest'
|
7
|
+
t.string 'airline'
|
8
|
+
t.string 'plane'
|
9
|
+
end
|
10
|
+
|
11
|
+
class Flight < ActiveRecord::Base
|
12
|
+
end
|
13
|
+
|
14
|
+
FactoryGirl.define do
|
15
|
+
factory :lax, :class => Flight do
|
16
|
+
origin 'LAX'
|
17
|
+
end
|
18
|
+
factory :lax_sfo, :class => Flight do
|
19
|
+
origin 'LAX'
|
20
|
+
dest 'SFO'
|
21
|
+
end
|
22
|
+
factory :lax_sfo_co, :class => Flight do
|
23
|
+
origin 'LAX'
|
24
|
+
dest 'SFO'
|
25
|
+
airline 'Continental'
|
26
|
+
end
|
27
|
+
factory :lax_sfo_a320, :class => Flight do
|
28
|
+
origin 'LAX'
|
29
|
+
dest 'SFO'
|
30
|
+
plane 'A320'
|
31
|
+
end
|
32
|
+
factory :lax_sfo_aa_a320, :class => Flight do
|
33
|
+
origin 'LAX'
|
34
|
+
dest 'SFO'
|
35
|
+
airline 'American'
|
36
|
+
plane 'A320'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe CohortAnalysis do
|
41
|
+
before do
|
42
|
+
Flight.delete_all
|
43
|
+
end
|
44
|
+
|
45
|
+
describe 'ActiveRecordBaseClassMethods' do
|
46
|
+
describe :cohort do
|
47
|
+
it "defaults to :minimum_size => 1" do
|
48
|
+
FactoryGirl.create(:lax)
|
49
|
+
Flight.cohort({:origin => 'LAX'}).count.must_equal 1
|
50
|
+
Flight.cohort({:origin => 'LAX'}, :minimum_size => 2).count.must_equal 0
|
51
|
+
end
|
52
|
+
|
53
|
+
it "doesn't discard characteristics if it doesn't need to" do
|
54
|
+
FactoryGirl.create(:lax)
|
55
|
+
FactoryGirl.create(:lax_sfo)
|
56
|
+
Flight.cohort(:origin => 'LAX', :dest => 'SFO').count.must_equal 1
|
57
|
+
end
|
58
|
+
|
59
|
+
it "discards characteristics until it can fulfil the minimum size" do
|
60
|
+
FactoryGirl.create(:lax)
|
61
|
+
FactoryGirl.create(:lax_sfo)
|
62
|
+
drops_dest = Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :minimum_size => 2)
|
63
|
+
drops_dest.count.must_equal 2
|
64
|
+
drops_dest.one? { |flight| flight.dest != 'SFO' }.must_equal true
|
65
|
+
end
|
66
|
+
|
67
|
+
it "defaults to :strategy => :big" do
|
68
|
+
FactoryGirl.create(:lax)
|
69
|
+
Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :strategy => :big).count.must_equal Flight.cohort(:origin => 'LAX', :dest => 'SFO').count
|
70
|
+
Flight.cohort({:dest => 'SFO', :origin => 'LAX'}, :strategy => :big).count.must_equal Flight.cohort(:dest => 'SFO', :origin => 'LAX').count
|
71
|
+
end
|
72
|
+
|
73
|
+
it "offers :strategy => :strict" do
|
74
|
+
FactoryGirl.create(:lax)
|
75
|
+
if RUBY_VERSION >= '1.9'
|
76
|
+
# native ordered hashes
|
77
|
+
Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :strategy => :strict).count.must_equal 1
|
78
|
+
Flight.cohort({:dest => 'SFO', :origin => 'LAX'}, :strategy => :strict).count.must_equal 0
|
79
|
+
else
|
80
|
+
# activesupport provides ActiveSupport::OrderedHash
|
81
|
+
origin_important = ActiveSupport::OrderedHash.new
|
82
|
+
origin_important[:origin] = 'LAX'
|
83
|
+
origin_important[:dest] = 'SFO'
|
84
|
+
dest_important = ActiveSupport::OrderedHash.new
|
85
|
+
dest_important[:dest] = 'SFO'
|
86
|
+
dest_important[:origin] = 'LAX'
|
87
|
+
Flight.cohort(origin_important, :strategy => :strict).count.must_equal 1
|
88
|
+
Flight.cohort(dest_important, :strategy => :strict).count.must_equal 0
|
89
|
+
|
90
|
+
lambda {
|
91
|
+
Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :strategy => :strict).count
|
92
|
+
}.must_raise(ArgumentError, 'hash')
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
it "lets you pick :priority of keys when using :strict strategy" do
|
97
|
+
FactoryGirl.create(:lax)
|
98
|
+
Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :strategy => :strict, :priority => [:origin, :dest]).count.must_equal 1
|
99
|
+
Flight.cohort({:origin => 'LAX', :dest => 'SFO'}, :strategy => :strict, :priority => [:dest, :origin]).count.must_equal 0
|
100
|
+
Flight.cohort({:dest => 'SFO', :origin => 'LAX'}, :strategy => :strict, :priority => [:origin, :dest]).count.must_equal 1
|
101
|
+
Flight.cohort({:dest => 'SFO', :origin => 'LAX'}, :strategy => :strict, :priority => [:dest, :origin]).count.must_equal 0
|
102
|
+
end
|
103
|
+
|
104
|
+
it "lets you play with more than 1 or 2 characteristics" do
|
105
|
+
ActiveRecord::Base.silence do
|
106
|
+
# make some fixtures
|
107
|
+
1_000.times { FactoryGirl.create(:lax) }
|
108
|
+
100.times { FactoryGirl.create(:lax_sfo) }
|
109
|
+
10.times { FactoryGirl.create(:lax_sfo_co) }
|
110
|
+
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
111
|
+
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
112
|
+
end
|
113
|
+
Flight.count.must_equal 1_114 # sanity check
|
114
|
+
|
115
|
+
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
116
|
+
# don't discard anything
|
117
|
+
Flight.cohort(lax_sfo_aa_a320).count.must_equal 1
|
118
|
+
# discard airline
|
119
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 2).count.must_equal 4
|
120
|
+
# discard plane and airline
|
121
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 5).count.must_equal 114
|
122
|
+
# discard plane and airline and dest
|
123
|
+
Flight.cohort(lax_sfo_aa_a320, :minimum_size => 115).count.must_equal 1_114
|
124
|
+
|
125
|
+
lax_sfo_a320 = {:origin => 'LAX', :dest => 'SFO', :plane => 'A320'}
|
126
|
+
# don't discard anything
|
127
|
+
Flight.cohort(lax_sfo_a320).count.must_equal 4
|
128
|
+
# discard plane
|
129
|
+
Flight.cohort(lax_sfo_a320, :minimum_size => 5).count.must_equal 114
|
130
|
+
# discard plane and dest
|
131
|
+
Flight.cohort(lax_sfo_a320, :minimum_size => 115).count.must_equal 1_114
|
132
|
+
|
133
|
+
# off the rails here a bit
|
134
|
+
woah_lax_co_a320 = {:origin => 'LAX', :airline => 'Continental', :plane => 'A320'}
|
135
|
+
# discard plane
|
136
|
+
Flight.cohort(woah_lax_co_a320).count.must_equal 10
|
137
|
+
# discard plane and airline
|
138
|
+
Flight.cohort(woah_lax_co_a320, :minimum_size => 11).count.must_equal 1_114
|
139
|
+
end
|
140
|
+
|
141
|
+
it "lets you play with multiple characteristics in :strategy => :strict" do
|
142
|
+
ActiveRecord::Base.silence do
|
143
|
+
# make some fixtures
|
144
|
+
1_000.times { FactoryGirl.create(:lax) }
|
145
|
+
100.times { FactoryGirl.create(:lax_sfo) }
|
146
|
+
10.times { FactoryGirl.create(:lax_sfo_co) }
|
147
|
+
3.times { FactoryGirl.create(:lax_sfo_a320) }
|
148
|
+
1.times { FactoryGirl.create(:lax_sfo_aa_a320) }
|
149
|
+
end
|
150
|
+
|
151
|
+
lax_sfo_aa_a320 = {:origin => 'LAX', :dest => 'SFO', :airline => 'American', :plane => 'A320'}
|
152
|
+
priority = [:origin, :dest, :airline, :plane]
|
153
|
+
# discard nothing
|
154
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
155
|
+
# (force) discard plane, then (force) discard airline
|
156
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 114
|
157
|
+
# (force) discard plane, then (force) discard airline, then (force) discard dest
|
158
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 115).count.must_equal 1_114
|
159
|
+
|
160
|
+
priority = [:plane, :airline, :dest, :origin]
|
161
|
+
# discard nothing
|
162
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority).count.must_equal 1
|
163
|
+
# (force) discard origin, then (force) discard dest, then (force) discard airline
|
164
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 2).count.must_equal 4
|
165
|
+
# gives up!
|
166
|
+
Flight.cohort(lax_sfo_aa_a320, :strategy => :strict, :priority => priority, :minimum_size => 5).count.must_equal 0
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
describe :cohort_constraint do
|
171
|
+
it "can be used like other ARel constraints" do
|
172
|
+
FactoryGirl.create(:lax)
|
173
|
+
Flight.where(Flight.cohort_constraint(:origin => 'LAX')).count.must_equal 1
|
174
|
+
Flight.where(Flight.cohort_constraint({:origin => 'LAX'}, :minimum_size => 2)).count.must_equal 0
|
175
|
+
end
|
176
|
+
|
177
|
+
it "can be combined with other ARel constraints" do
|
178
|
+
FactoryGirl.create(:lax)
|
179
|
+
FactoryGirl.create(:lax_sfo)
|
180
|
+
origin_lax_constraint = Flight.cohort_constraint(:origin => 'LAX')
|
181
|
+
dest_sfo_constraint = Flight.arel_table[:dest].eq('SFO')
|
182
|
+
Flight.where(dest_sfo_constraint.and(origin_lax_constraint)).count.must_equal 1
|
183
|
+
Flight.where(dest_sfo_constraint.or(origin_lax_constraint)).count.must_equal 2
|
184
|
+
Flight.where(origin_lax_constraint.and(dest_sfo_constraint)).count.must_equal 1
|
185
|
+
Flight.where(origin_lax_constraint.or(dest_sfo_constraint)).count.must_equal 2
|
186
|
+
end
|
187
|
+
|
188
|
+
# Caution!
|
189
|
+
it "is NOT smart enough to enforce minimum size when composed" do
|
190
|
+
FactoryGirl.create(:lax)
|
191
|
+
FactoryGirl.create(:lax_sfo)
|
192
|
+
origin_lax_constraint = Flight.cohort_constraint({:origin => 'LAX'}, :minimum_size => 2)
|
193
|
+
dest_sfo_constraint = Flight.arel_table[:dest].eq('SFO')
|
194
|
+
Flight.where(dest_sfo_constraint.and(origin_lax_constraint)).count.must_equal 1 # see how minimum_size is ignored?
|
195
|
+
Flight.where(origin_lax_constraint.and(dest_sfo_constraint)).count.must_equal 1 # it's because the cohort constraint resolves itself before allowing the ARel visitor to continue
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
describe 'ActiveRecordRelationInstanceMethods' do
|
201
|
+
describe :cohort do
|
202
|
+
it "is the proper way to compose when other ARel constraints are present" do
|
203
|
+
FactoryGirl.create(:lax)
|
204
|
+
FactoryGirl.create(:lax_sfo)
|
205
|
+
Flight.where(:dest => 'SFO').cohort(:origin => 'LAX').count.must_equal 1
|
206
|
+
Flight.where(:dest => 'SFO').cohort({:origin => 'LAX'}, :minimum_size => 2).count.must_equal 0
|
207
|
+
end
|
208
|
+
end
|
209
|
+
describe :cohort_constraint do
|
210
|
+
it "can also be used (carefully) to compose with other ARel constraints" do
|
211
|
+
FactoryGirl.create(:lax)
|
212
|
+
FactoryGirl.create(:lax_sfo)
|
213
|
+
dest_sfo_relation = Flight.where(:dest => 'SFO')
|
214
|
+
origin_lax_constraint_from_dest_sfo_relation = dest_sfo_relation.cohort_constraint(:origin => 'LAX')
|
215
|
+
Flight.where(origin_lax_constraint_from_dest_sfo_relation).count.must_equal 1
|
216
|
+
dest_sfo_relation = Flight.where(:dest => 'SFO')
|
217
|
+
origin_lax_constraint_from_dest_sfo_relation = dest_sfo_relation.cohort_constraint({:origin => 'LAX'}, :minimum_size => 2)
|
218
|
+
Flight.where(origin_lax_constraint_from_dest_sfo_relation).count.must_equal 0
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
metadata
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cohort_analysis
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 15
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 4
|
9
|
+
- 0
|
10
|
+
version: 0.4.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Seamus Abshere
|
14
|
+
- Andy Rossmeissl
|
15
|
+
- Derek Kastner
|
16
|
+
- Ian Hough
|
17
|
+
autorequire:
|
18
|
+
bindir: bin
|
19
|
+
cert_chain: []
|
20
|
+
|
21
|
+
date: 2012-02-29 00:00:00 Z
|
22
|
+
dependencies:
|
23
|
+
- !ruby/object:Gem::Dependency
|
24
|
+
name: activesupport
|
25
|
+
prerelease: false
|
26
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
hash: 5
|
32
|
+
segments:
|
33
|
+
- 3
|
34
|
+
version: "3"
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: activerecord
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 5
|
46
|
+
segments:
|
47
|
+
- 3
|
48
|
+
version: "3"
|
49
|
+
type: :runtime
|
50
|
+
version_requirements: *id002
|
51
|
+
description: "Lets you do cohort analysis based on two strategies: \"big\", which discards characteristics for the maximum cohort result, and \"strict\", which discards characteristics in order until a minimum cohort size is reached."
|
52
|
+
email:
|
53
|
+
- seamus@abshere.net
|
54
|
+
- andy@rossmeissl.net
|
55
|
+
- dkastner@gmail.com
|
56
|
+
- ijhough@gmail.com
|
57
|
+
executables: []
|
58
|
+
|
59
|
+
extensions: []
|
60
|
+
|
61
|
+
extra_rdoc_files: []
|
62
|
+
|
63
|
+
files:
|
64
|
+
- .document
|
65
|
+
- .gitignore
|
66
|
+
- CHANGELOG
|
67
|
+
- Gemfile
|
68
|
+
- LICENSE
|
69
|
+
- README.markdown
|
70
|
+
- Rakefile
|
71
|
+
- cohort_analysis.gemspec
|
72
|
+
- lib/cohort_analysis.rb
|
73
|
+
- lib/cohort_analysis/active_record_base_class_methods.rb
|
74
|
+
- lib/cohort_analysis/active_record_relation_instance_methods.rb
|
75
|
+
- lib/cohort_analysis/arel_visitors_visitor_instance_methods.rb
|
76
|
+
- lib/cohort_analysis/strategy.rb
|
77
|
+
- lib/cohort_analysis/strategy/big.rb
|
78
|
+
- lib/cohort_analysis/strategy/strict.rb
|
79
|
+
- lib/cohort_analysis/version.rb
|
80
|
+
- test/helper.rb
|
81
|
+
- test/test_cohort_analysis.rb
|
82
|
+
homepage: https://github.com/seamusabshere/cohort_analysis
|
83
|
+
licenses: []
|
84
|
+
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options: []
|
87
|
+
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
hash: 3
|
96
|
+
segments:
|
97
|
+
- 0
|
98
|
+
version: "0"
|
99
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
none: false
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
hash: 3
|
105
|
+
segments:
|
106
|
+
- 0
|
107
|
+
version: "0"
|
108
|
+
requirements: []
|
109
|
+
|
110
|
+
rubyforge_project:
|
111
|
+
rubygems_version: 1.8.15
|
112
|
+
signing_key:
|
113
|
+
specification_version: 3
|
114
|
+
summary: "Lets you do cohort analysis based on two strategies: \"big\", which discards characteristics for the maximum cohort result, and \"strict\", which discards characteristics in order until a minimum cohort size is reached."
|
115
|
+
test_files:
|
116
|
+
- test/helper.rb
|
117
|
+
- test/test_cohort_analysis.rb
|
118
|
+
has_rdoc:
|