cohort_scope 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Seamus Abshere
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,47 @@
1
+ = cohort_scope
2
+
3
+ Provides cohorts (in the form of ActiveRecord scopes) that dynamically widen until they contain a certain number of records.
4
+
5
+ * <tt>big_cohort</tt> widens by finding the constraint that eliminates the most records and removing it.
6
+ * <tt>strict_cohort</tt> widens by eliminating constraints in order.
7
+
8
+ = Real-world use
9
+
10
+ This has been at use at http://carbon.brighterplanet.com since April 2010, where it helps sift through climate data to come up with meaningful emissions calculations.
11
+
12
+ = Quick start
13
+
14
+ Let's pretend the U.S. Census provided information about birthday and favorite color:
15
+
16
+ class Citizen < ActiveRecord::Base
17
+ extend CohortScope
18
+ self.minimum_cohort_size = 1_000
19
+ end
20
+
21
+ Now I need to run a calculation that ideally uses birthday and favorite color, but most importantly looks at a large cohort:
22
+
23
+ Citizen.big_cohort :birthdate => (Date.parse('1980-01-01')..Date.parse('1990-01-01')), :favorite_color => 'heliotrope'
24
+ # => [... a cohort of at least 1,000 records (otherwise it's empty),
25
+ where everybody's favorite color MAY be heliotrope
26
+ and everybody's birthday MAY be between 1980 and 1990
27
+ (at least one of those constraints will hold) ...]
28
+
29
+ What if my calculation privileges favorite color? In other words, if you can't give me a cohort of minimum size within the birthday constraint, at least give me one where everybody loves heliotrope:
30
+
31
+ ordered_constraints = ActiveSupport::OrderedHash.new
32
+ ordered_constraints[:favorite_color] = 'heliotrope'
33
+ ordered_constraints[:birthdate] = (Date.parse('1980-01-01')..Date.parse('1990-01-01'))
34
+
35
+ Citizen.strict_cohort favorite_color_matters_most
36
+ # => [... a cohort of at least 1,000 records (otherwise it's empty),
37
+ where everybody's favorite color IS heliotrope
38
+ and everybody's birthday MAY be between 1980 and 1990 ...]
39
+
40
+ = Wishlist
41
+
42
+ * support for ruby 1.9's implicitly ordered hashes
43
+ * support for constraining on <tt>IS NULL</tt> or <tt>IS NOT NULL</tt>
44
+
45
+ == Copyright
46
+
47
+ Copyright (c) 2010 Seamus Abshere and Andy Rossmeissl. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,55 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "cohort_scope"
8
+ gem.summary = %Q{Provides cohorts (in the form of ActiveRecord scopes) that dynamically widen until they contain a certain number of records.}
9
+ gem.description = %Q{Provides big_cohort, which widens by finding the constraint that eliminates the most records and removing it. Also provides strict_cohort, which widens by eliminating constraints in order.}
10
+ gem.email = "seamus@abshere.net"
11
+ gem.homepage = "http://github.com/seamusabshere/cohort_scope"
12
+ gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
+ gem.add_dependency "activesupport", ">=3.0.0.beta2"
14
+ gem.add_dependency "activerecord", ">=3.0.0.beta2"
15
+ gem.add_development_dependency "shoulda", ">= 2.10.3"
16
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
17
+ end
18
+ Jeweler::GemcutterTasks.new
19
+ rescue LoadError
20
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
21
+ end
22
+
23
+ require 'rake/testtask'
24
+ Rake::TestTask.new(:test) do |test|
25
+ test.libs << 'lib' << 'test'
26
+ test.pattern = 'test/**/test_*.rb'
27
+ test.verbose = true
28
+ end
29
+
30
+ begin
31
+ require 'rcov/rcovtask'
32
+ Rcov::RcovTask.new do |test|
33
+ test.libs << 'test'
34
+ test.pattern = 'test/**/test_*.rb'
35
+ test.verbose = true
36
+ end
37
+ rescue LoadError
38
+ task :rcov do
39
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
40
+ end
41
+ end
42
+
43
+ task :test => :check_dependencies
44
+
45
+ task :default => :test
46
+
47
+ require 'rake/rdoctask'
48
+ Rake::RDocTask.new do |rdoc|
49
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "cohort_scope #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,159 @@
1
+ require 'active_support'
2
+ require 'active_record'
3
+ require 'active_support/core_ext/module/delegation'
4
+
5
+ module ActiveRecord
6
+ module NamedScope
7
+ module ClassMethods
8
+ # Initialize a MassiveScope, which, when inspected, does not generate a huge string.
9
+ def massive_scoped(options = {}, &block)
10
+ if options.present?
11
+ MassiveScope.init(self, options, &block)
12
+ else
13
+ raise "MassiveScopes should be created with options"
14
+ end
15
+ end
16
+ end
17
+ class MassiveScope < Scope
18
+ # Don't try to output a massive string.
19
+ def inspect
20
+ "<Massive scope: #{count} members>"
21
+ end
22
+ # Don't try to put everything into json.
23
+ def to_json(*args)
24
+ { :members => count }.to_json
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ module CohortScope
31
+ def self.extended(base)
32
+ base.class_eval do
33
+ cattr_accessor :minimum_cohort_size, :instance_writer => false
34
+ end
35
+ end
36
+
37
+ # Find the biggest scope possible by removing constraints <b>in any order</b>.
38
+ # Returns an empty scope if it can't meet the minimum scope size.
39
+ def big_cohort(constraints)
40
+ raise ArgumentError, "You can't give a big_cohort an OrderedHash; do you want strict_cohort?" if constraints.is_a?(ActiveSupport::OrderedHash)
41
+ _cohort_massive_scope constraints
42
+ end
43
+
44
+ # Find the first acceptable scope by removing constraints <b>in strict order</b>, starting with the last constraint.
45
+ # Returns an empty scope if it can't meet the minimum scope size.
46
+ #
47
+ # <tt>constraints</tt> must be an <tt>ActiveSupport::OrderedHash</tt> (no support for ruby 1.9's natively ordered hashes yet).
48
+ #
49
+ # Note that the first constraint is implicitly required.
50
+ #
51
+ # Take this example, where favorite color is considered to be "more important" than birthdate:
52
+ #
53
+ # ordered_constraints = ActiveSupport::OrderedHash.new
54
+ # ordered_constraints[:favorite_color] = 'heliotrope'
55
+ # ordered_constraints[:birthdate] = '1999-01-01'
56
+ # Citizen.strict_cohort(ordered_constraints) #=> [...]
57
+ #
58
+ # If the original constraints don't meet the minimum scope size, then the only constraint that can be removed is birthdate.
59
+ # In other words, this would never return a scope that was constrained on birthdate but not on favorite_color.
60
+ def strict_cohort(constraints)
61
+ raise ArgumentError, "You need to give strict_cohort an OrderedHash" unless constraints.is_a?(ActiveSupport::OrderedHash)
62
+ _cohort_massive_scope constraints
63
+ end
64
+
65
+ protected
66
+
67
+ # Recursively look for a scope that meets the constraints and is at least <tt>minimum_cohort_size</tt>.
68
+ def _cohort_massive_scope(constraints)
69
+ raise RuntimeError, "You need to set #{name}.minimum_cohort_size = X" unless minimum_cohort_size.present?
70
+
71
+ if constraints.values.none? # failing base case
72
+ return massive_scoped(:conditions => 'false')
73
+ end
74
+
75
+ this_hash = _cohort_constraints constraints
76
+ this_count = scoped(this_hash).count
77
+
78
+ if this_count >= minimum_cohort_size # successful base case
79
+ massive_scoped this_hash
80
+ else
81
+ _cohort_massive_scope _cohort_reduce_constraints(constraints)
82
+ end
83
+ end
84
+
85
+ # Sanitize constraints by
86
+ # * removing nil constraints (so constraints like "X IS NULL" are impossible, sorry)
87
+ # * converting ActiveRecord::Base objects into integer foreign key constraints
88
+ def _cohort_constraints(constraints)
89
+ new_hash = constraints.is_a?(ActiveSupport::OrderedHash) ? ActiveSupport::OrderedHash.new : Hash.new
90
+ conditions = constraints.inject(new_hash) do |memo, tuple|
91
+ k, v = tuple
92
+ if v.kind_of?(ActiveRecord::Base)
93
+ condition = { _cohort_association_primary_key(k) => v.to_param }
94
+ elsif !v.nil?
95
+ condition = { k => v }
96
+ end
97
+ memo.merge! condition if condition.is_a? Hash
98
+ memo
99
+ end
100
+ { :conditions => conditions }
101
+ end
102
+
103
+ # Convert constraints that are provided as ActiveRecord::Base objects into their corresponding integer primary keys.
104
+ #
105
+ # Only works for <tt>belongs_to</tt> relationships.
106
+ #
107
+ # For example, :car => <#Car> might get translated into :car_id => 44.
108
+ def _cohort_association_primary_key(name)
109
+ @_cohort_association_primary_keys ||= {}
110
+ return @_cohort_association_primary_keys[name] if @_cohort_association_primary_keys.has_key? name
111
+ a = reflect_on_association name
112
+ raise "can't use cohort scope on :through associations (#{self.name} #{name})" if a.options.has_key? :through
113
+ if !a.primary_key_name.blank?
114
+ @_cohort_association_primary_keys[name] = a.primary_key_name
115
+ else
116
+ raise "we need some other way to find primary key"
117
+ end
118
+ end
119
+
120
+ # Choose how to reduce constraints based on whether we're looking for a big cohort or a strict cohort.
121
+ def _cohort_reduce_constraints(constraints)
122
+ case constraints
123
+ when ActiveSupport::OrderedHash
124
+ _cohort_reduce_constraints_in_order constraints
125
+ when Hash
126
+ _cohort_reduce_constraints_seeking_maximum_count constraints
127
+ else
128
+ raise "what did you pass me? #{constraints}"
129
+ end
130
+ end
131
+
132
+ # (Used by <tt>big_cohort</tt>)
133
+ #
134
+ # Reduce constraints by removing them one by one and counting the results.
135
+ #
136
+ # The constraint whose removal leads to the highest record count is removed from the overall constraint set.
137
+ def _cohort_reduce_constraints_seeking_maximum_count(constraints)
138
+ highest_count_after_removal = nil
139
+ losing_key = nil
140
+ constraints.keys.each do |key|
141
+ test_constraints = constraints.except(key)
142
+ count_after_removal = scoped(_cohort_constraints(test_constraints)).count
143
+ if highest_count_after_removal.nil? or count_after_removal > highest_count_after_removal
144
+ highest_count_after_removal = count_after_removal
145
+ losing_key = key
146
+ end
147
+ end
148
+ constraints.except losing_key
149
+ end
150
+
151
+ # (Used by <tt>strict_cohort</tt>)
152
+ #
153
+ # Reduce constraints by removing the least important one.
154
+ def _cohort_reduce_constraints_in_order(constraints)
155
+ reduced_constraints = constraints.dup
156
+ reduced_constraints.delete constraints.keys.last
157
+ reduced_constraints
158
+ end
159
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+ require 'ruby-debug'
5
+ require 'logger'
6
+
7
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
8
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
9
+ require 'cohort_scope'
10
+
11
+ class Test::Unit::TestCase
12
+ end
13
+
14
+ $logger = Logger.new STDOUT #'test/test.log'
15
+ ActiveSupport::Notifications.subscribe do |*args|
16
+ event = ActiveSupport::Notifications::Event.new(*args)
17
+ $logger.debug "#{event.payload[:name]} (#{event.duration}) #{event.payload[:sql]}"
18
+ end
19
+
20
+ ActiveRecord::Base.establish_connection(
21
+ 'adapter' => 'mysql',
22
+ 'database' => 'cohort_scope_test',
23
+ 'username' => 'root',
24
+ 'password' => ''
25
+ )
26
+
27
+ ActiveRecord::Schema.define(:version => 20090819143429) do
28
+ create_table 'citizens', :force => true do |t|
29
+ t.date 'birthdate'
30
+ t.string 'favorite_color'
31
+ t.integer 'teeth'
32
+ end
33
+ end
34
+
35
+ class Citizen < ActiveRecord::Base
36
+ extend CohortScope
37
+ self.minimum_cohort_size = 3
38
+ validates_presence_of :birthdate
39
+ end
40
+
41
+ [
42
+ [ '1982-09-29', 'blue', 31 ],
43
+ [ '1954-12-20', 'heliotrope', 32 ],
44
+ [ '1983-10-28', 'green', 24 ],
45
+ [ '1984-02-14', 'firetruck red', 27 ],
46
+ [ '1955-07-21', 'blue', 27 ],
47
+ [ '1983-06-08', 'purple', 42 ],
48
+ [ '1982-04-27', 'black', 24 ],
49
+ [ '1984-07-16', 'blue', 29 ],
50
+ [ '1975-02-18', 'green', 18 ],
51
+ [ '1988-02-01', nil, 31 ],
52
+ [ '1985-03-02', nil, 27 ],
53
+ [ '1982-05-01', nil, 28 ]
54
+ ].each do |birthdate, favorite_color, teeth|
55
+ Citizen.create! :birthdate => birthdate, :favorite_color => favorite_color, :teeth => teeth
56
+ end
@@ -0,0 +1,70 @@
1
+ require 'helper'
2
+
3
+ class TestCohortScope < Test::Unit::TestCase
4
+ def setup
5
+ Citizen.minimum_cohort_size = 3
6
+ @date_range = (Date.parse('1980-01-01')..Date.parse('1990-01-01'))
7
+ end
8
+
9
+ should "raise if no minimum_cohort_size is specified" do
10
+ Citizen.minimum_cohort_size = nil
11
+ assert_raises(RuntimeError) {
12
+ cohort = Citizen.big_cohort Hash.new
13
+ }
14
+ assert_raises(RuntimeError) {
15
+ cohort = Citizen.strict_cohort ActiveSupport::OrderedHash.new
16
+ }
17
+ end
18
+
19
+ context "big_cohort" do
20
+ should "return an empty cohort if it can't find one that meets size requirements" do
21
+ cohort = Citizen.big_cohort :favorite_color => 'heliotrope'
22
+ assert_equal 0, cohort.count
23
+ end
24
+
25
+ should "seek a cohort of maximum size" do
26
+ cohort = Citizen.big_cohort :birthdate => @date_range, :favorite_color => 'heliotrope'
27
+ assert_equal 9, cohort.count
28
+ assert cohort.any? { |m| m.favorite_color != 'heliotrope' }
29
+ assert cohort.all? { |m| @date_range.include? m.birthdate }
30
+ end
31
+
32
+ should "raise if an OrderedHash is given to big_cohort" do
33
+ assert_raises(ArgumentError) {
34
+ cohort = Citizen.big_cohort ActiveSupport::OrderedHash.new
35
+ }
36
+ end
37
+ end
38
+
39
+ context "strict_cohort" do
40
+ should "raise if a non-OrderedHash is given to strict_cohort" do
41
+ assert_raises(ArgumentError) {
42
+ cohort = Citizen.strict_cohort Hash.new
43
+ }
44
+ end
45
+
46
+ should "return an empty (strict) cohort if it can't find one that meets size requirements" do
47
+ ordered_attributes = ActiveSupport::OrderedHash.new
48
+ ordered_attributes[:favorite_color] = 'heliotrope'
49
+
50
+ cohort = Citizen.strict_cohort ordered_attributes
51
+ assert_equal 0, cohort.count
52
+ end
53
+
54
+ should "seek a cohort by discarding attributes in order" do
55
+ favorite_color_matters_most = ActiveSupport::OrderedHash.new
56
+ favorite_color_matters_most[:favorite_color] = 'heliotrope'
57
+ favorite_color_matters_most[:birthdate] = @date_range
58
+
59
+ birthdate_matters_most = ActiveSupport::OrderedHash.new
60
+ birthdate_matters_most[:birthdate] = @date_range
61
+ birthdate_matters_most[:favorite_color] = 'heliotrope'
62
+
63
+ cohort = Citizen.strict_cohort favorite_color_matters_most
64
+ assert_equal 0, cohort.count
65
+
66
+ cohort = Citizen.strict_cohort birthdate_matters_most
67
+ assert_equal 9, cohort.count
68
+ end
69
+ end
70
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cohort_scope
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Seamus Abshere
13
+ - Andy Rossmeissl
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-04-07 00:00:00 -04:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: activesupport
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 3
30
+ - 0
31
+ - 0
32
+ - beta2
33
+ version: 3.0.0.beta2
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: activerecord
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ segments:
44
+ - 3
45
+ - 0
46
+ - 0
47
+ - beta2
48
+ version: 3.0.0.beta2
49
+ type: :runtime
50
+ version_requirements: *id002
51
+ - !ruby/object:Gem::Dependency
52
+ name: shoulda
53
+ prerelease: false
54
+ requirement: &id003 !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ segments:
59
+ - 2
60
+ - 10
61
+ - 3
62
+ version: 2.10.3
63
+ type: :development
64
+ version_requirements: *id003
65
+ description: Provides big_cohort, which widens by finding the constraint that eliminates the most records and removing it. Also provides strict_cohort, which widens by eliminating constraints in order.
66
+ email: seamus@abshere.net
67
+ executables: []
68
+
69
+ extensions: []
70
+
71
+ extra_rdoc_files:
72
+ - LICENSE
73
+ - README.rdoc
74
+ files:
75
+ - .document
76
+ - .gitignore
77
+ - LICENSE
78
+ - README.rdoc
79
+ - Rakefile
80
+ - VERSION
81
+ - lib/cohort_scope.rb
82
+ - test/helper.rb
83
+ - test/test_cohort_scope.rb
84
+ has_rdoc: true
85
+ homepage: http://github.com/seamusabshere/cohort_scope
86
+ licenses: []
87
+
88
+ post_install_message:
89
+ rdoc_options:
90
+ - --charset=UTF-8
91
+ require_paths:
92
+ - lib
93
+ required_ruby_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ segments:
98
+ - 0
99
+ version: "0"
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ segments:
105
+ - 0
106
+ version: "0"
107
+ requirements: []
108
+
109
+ rubyforge_project:
110
+ rubygems_version: 1.3.6
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: Provides cohorts (in the form of ActiveRecord scopes) that dynamically widen until they contain a certain number of records.
114
+ test_files:
115
+ - test/helper.rb
116
+ - test/test_cohort_scope.rb