cohort_scope 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Seamus Abshere
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,47 @@
1
+ = cohort_scope
2
+
3
+ Provides cohorts (in the form of ActiveRecord scopes) that dynamically widen until they contain a certain number of records.
4
+
5
+ * <tt>big_cohort</tt> widens by finding the constraint that eliminates the most records and removing it.
6
+ * <tt>strict_cohort</tt> widens by eliminating constraints in order.
7
+
8
+ = Real-world use
9
+
10
+ This has been at use at http://carbon.brighterplanet.com since April 2010, where it helps sift through climate data to come up with meaningful emissions calculations.
11
+
12
+ = Quick start
13
+
14
+ Let's pretend the U.S. Census provided information about birthday and favorite color:
15
+
16
+ class Citizen < ActiveRecord::Base
17
+ extend CohortScope
18
+ self.minimum_cohort_size = 1_000
19
+ end
20
+
21
+ Now I need to run a calculation that ideally uses birthday and favorite color, but most importantly looks at a large cohort:
22
+
23
+ Citizen.big_cohort :birthdate => (Date.parse('1980-01-01')..Date.parse('1990-01-01')), :favorite_color => 'heliotrope'
24
+ # => [... a cohort of at least 1,000 records (otherwise it's empty),
25
+ where everybody's favorite color MAY be heliotrope
26
+ and everybody's birthday MAY be between 1980 and 1990
27
+ (at least one of those constraints will hold) ...]
28
+
29
+ What if my calculation privileges favorite color? In other words, if you can't give me a cohort of minimum size within the birthday constraint, at least give me one where everybody loves heliotrope:
30
+
31
+ ordered_constraints = ActiveSupport::OrderedHash.new
32
+ ordered_constraints[:favorite_color] = 'heliotrope'
33
+ ordered_constraints[:birthdate] = (Date.parse('1980-01-01')..Date.parse('1990-01-01'))
34
+
35
+ Citizen.strict_cohort favorite_color_matters_most
36
+ # => [... a cohort of at least 1,000 records (otherwise it's empty),
37
+ where everybody's favorite color IS heliotrope
38
+ and everybody's birthday MAY be between 1980 and 1990 ...]
39
+
40
+ = Wishlist
41
+
42
+ * support for ruby 1.9's implicitly ordered hashes
43
+ * support for constraining on <tt>IS NULL</tt> or <tt>IS NOT NULL</tt>
44
+
45
+ == Copyright
46
+
47
+ Copyright (c) 2010 Seamus Abshere and Andy Rossmeissl. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,55 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "cohort_scope"
8
+ gem.summary = %Q{Provides cohorts (in the form of ActiveRecord scopes) that dynamically widen until they contain a certain number of records.}
9
+ gem.description = %Q{Provides big_cohort, which widens by finding the constraint that eliminates the most records and removing it. Also provides strict_cohort, which widens by eliminating constraints in order.}
10
+ gem.email = "seamus@abshere.net"
11
+ gem.homepage = "http://github.com/seamusabshere/cohort_scope"
12
+ gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
+ gem.add_dependency "activesupport", ">=3.0.0.beta2"
14
+ gem.add_dependency "activerecord", ">=3.0.0.beta2"
15
+ gem.add_development_dependency "shoulda", ">= 2.10.3"
16
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
17
+ end
18
+ Jeweler::GemcutterTasks.new
19
+ rescue LoadError
20
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
21
+ end
22
+
23
+ require 'rake/testtask'
24
+ Rake::TestTask.new(:test) do |test|
25
+ test.libs << 'lib' << 'test'
26
+ test.pattern = 'test/**/test_*.rb'
27
+ test.verbose = true
28
+ end
29
+
30
+ begin
31
+ require 'rcov/rcovtask'
32
+ Rcov::RcovTask.new do |test|
33
+ test.libs << 'test'
34
+ test.pattern = 'test/**/test_*.rb'
35
+ test.verbose = true
36
+ end
37
+ rescue LoadError
38
+ task :rcov do
39
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
40
+ end
41
+ end
42
+
43
+ task :test => :check_dependencies
44
+
45
+ task :default => :test
46
+
47
+ require 'rake/rdoctask'
48
+ Rake::RDocTask.new do |rdoc|
49
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "cohort_scope #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,159 @@
1
+ require 'active_support'
2
+ require 'active_record'
3
+ require 'active_support/core_ext/module/delegation'
4
+
5
+ module ActiveRecord
6
+ module NamedScope
7
+ module ClassMethods
8
+ # Initialize a MassiveScope, which, when inspected, does not generate a huge string.
9
+ def massive_scoped(options = {}, &block)
10
+ if options.present?
11
+ MassiveScope.init(self, options, &block)
12
+ else
13
+ raise "MassiveScopes should be created with options"
14
+ end
15
+ end
16
+ end
17
+ class MassiveScope < Scope
18
+ # Don't try to output a massive string.
19
+ def inspect
20
+ "<Massive scope: #{count} members>"
21
+ end
22
+ # Don't try to put everything into json.
23
+ def to_json(*args)
24
+ { :members => count }.to_json
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ module CohortScope
31
+ def self.extended(base)
32
+ base.class_eval do
33
+ cattr_accessor :minimum_cohort_size, :instance_writer => false
34
+ end
35
+ end
36
+
37
+ # Find the biggest scope possible by removing constraints <b>in any order</b>.
38
+ # Returns an empty scope if it can't meet the minimum scope size.
39
+ def big_cohort(constraints)
40
+ raise ArgumentError, "You can't give a big_cohort an OrderedHash; do you want strict_cohort?" if constraints.is_a?(ActiveSupport::OrderedHash)
41
+ _cohort_massive_scope constraints
42
+ end
43
+
44
+ # Find the first acceptable scope by removing constraints <b>in strict order</b>, starting with the last constraint.
45
+ # Returns an empty scope if it can't meet the minimum scope size.
46
+ #
47
+ # <tt>constraints</tt> must be an <tt>ActiveSupport::OrderedHash</tt> (no support for ruby 1.9's natively ordered hashes yet).
48
+ #
49
+ # Note that the first constraint is implicitly required.
50
+ #
51
+ # Take this example, where favorite color is considered to be "more important" than birthdate:
52
+ #
53
+ # ordered_constraints = ActiveSupport::OrderedHash.new
54
+ # ordered_constraints[:favorite_color] = 'heliotrope'
55
+ # ordered_constraints[:birthdate] = '1999-01-01'
56
+ # Citizen.strict_cohort(ordered_constraints) #=> [...]
57
+ #
58
+ # If the original constraints don't meet the minimum scope size, then the only constraint that can be removed is birthdate.
59
+ # In other words, this would never return a scope that was constrained on birthdate but not on favorite_color.
60
+ def strict_cohort(constraints)
61
+ raise ArgumentError, "You need to give strict_cohort an OrderedHash" unless constraints.is_a?(ActiveSupport::OrderedHash)
62
+ _cohort_massive_scope constraints
63
+ end
64
+
65
+ protected
66
+
67
+ # Recursively look for a scope that meets the constraints and is at least <tt>minimum_cohort_size</tt>.
68
+ def _cohort_massive_scope(constraints)
69
+ raise RuntimeError, "You need to set #{name}.minimum_cohort_size = X" unless minimum_cohort_size.present?
70
+
71
+ if constraints.values.none? # failing base case
72
+ return massive_scoped(:conditions => 'false')
73
+ end
74
+
75
+ this_hash = _cohort_constraints constraints
76
+ this_count = scoped(this_hash).count
77
+
78
+ if this_count >= minimum_cohort_size # successful base case
79
+ massive_scoped this_hash
80
+ else
81
+ _cohort_massive_scope _cohort_reduce_constraints(constraints)
82
+ end
83
+ end
84
+
85
+ # Sanitize constraints by
86
+ # * removing nil constraints (so constraints like "X IS NULL" are impossible, sorry)
87
+ # * converting ActiveRecord::Base objects into integer foreign key constraints
88
+ def _cohort_constraints(constraints)
89
+ new_hash = constraints.is_a?(ActiveSupport::OrderedHash) ? ActiveSupport::OrderedHash.new : Hash.new
90
+ conditions = constraints.inject(new_hash) do |memo, tuple|
91
+ k, v = tuple
92
+ if v.kind_of?(ActiveRecord::Base)
93
+ condition = { _cohort_association_primary_key(k) => v.to_param }
94
+ elsif !v.nil?
95
+ condition = { k => v }
96
+ end
97
+ memo.merge! condition if condition.is_a? Hash
98
+ memo
99
+ end
100
+ { :conditions => conditions }
101
+ end
102
+
103
+ # Convert constraints that are provided as ActiveRecord::Base objects into their corresponding integer primary keys.
104
+ #
105
+ # Only works for <tt>belongs_to</tt> relationships.
106
+ #
107
+ # For example, :car => <#Car> might get translated into :car_id => 44.
108
+ def _cohort_association_primary_key(name)
109
+ @_cohort_association_primary_keys ||= {}
110
+ return @_cohort_association_primary_keys[name] if @_cohort_association_primary_keys.has_key? name
111
+ a = reflect_on_association name
112
+ raise "can't use cohort scope on :through associations (#{self.name} #{name})" if a.options.has_key? :through
113
+ if !a.primary_key_name.blank?
114
+ @_cohort_association_primary_keys[name] = a.primary_key_name
115
+ else
116
+ raise "we need some other way to find primary key"
117
+ end
118
+ end
119
+
120
+ # Choose how to reduce constraints based on whether we're looking for a big cohort or a strict cohort.
121
+ def _cohort_reduce_constraints(constraints)
122
+ case constraints
123
+ when ActiveSupport::OrderedHash
124
+ _cohort_reduce_constraints_in_order constraints
125
+ when Hash
126
+ _cohort_reduce_constraints_seeking_maximum_count constraints
127
+ else
128
+ raise "what did you pass me? #{constraints}"
129
+ end
130
+ end
131
+
132
+ # (Used by <tt>big_cohort</tt>)
133
+ #
134
+ # Reduce constraints by removing them one by one and counting the results.
135
+ #
136
+ # The constraint whose removal leads to the highest record count is removed from the overall constraint set.
137
+ def _cohort_reduce_constraints_seeking_maximum_count(constraints)
138
+ highest_count_after_removal = nil
139
+ losing_key = nil
140
+ constraints.keys.each do |key|
141
+ test_constraints = constraints.except(key)
142
+ count_after_removal = scoped(_cohort_constraints(test_constraints)).count
143
+ if highest_count_after_removal.nil? or count_after_removal > highest_count_after_removal
144
+ highest_count_after_removal = count_after_removal
145
+ losing_key = key
146
+ end
147
+ end
148
+ constraints.except losing_key
149
+ end
150
+
151
+ # (Used by <tt>strict_cohort</tt>)
152
+ #
153
+ # Reduce constraints by removing the least important one.
154
+ def _cohort_reduce_constraints_in_order(constraints)
155
+ reduced_constraints = constraints.dup
156
+ reduced_constraints.delete constraints.keys.last
157
+ reduced_constraints
158
+ end
159
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+ require 'ruby-debug'
5
+ require 'logger'
6
+
7
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
8
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
9
+ require 'cohort_scope'
10
+
11
+ class Test::Unit::TestCase
12
+ end
13
+
14
+ $logger = Logger.new STDOUT #'test/test.log'
15
+ ActiveSupport::Notifications.subscribe do |*args|
16
+ event = ActiveSupport::Notifications::Event.new(*args)
17
+ $logger.debug "#{event.payload[:name]} (#{event.duration}) #{event.payload[:sql]}"
18
+ end
19
+
20
+ ActiveRecord::Base.establish_connection(
21
+ 'adapter' => 'mysql',
22
+ 'database' => 'cohort_scope_test',
23
+ 'username' => 'root',
24
+ 'password' => ''
25
+ )
26
+
27
+ ActiveRecord::Schema.define(:version => 20090819143429) do
28
+ create_table 'citizens', :force => true do |t|
29
+ t.date 'birthdate'
30
+ t.string 'favorite_color'
31
+ t.integer 'teeth'
32
+ end
33
+ end
34
+
35
+ class Citizen < ActiveRecord::Base
36
+ extend CohortScope
37
+ self.minimum_cohort_size = 3
38
+ validates_presence_of :birthdate
39
+ end
40
+
41
+ [
42
+ [ '1982-09-29', 'blue', 31 ],
43
+ [ '1954-12-20', 'heliotrope', 32 ],
44
+ [ '1983-10-28', 'green', 24 ],
45
+ [ '1984-02-14', 'firetruck red', 27 ],
46
+ [ '1955-07-21', 'blue', 27 ],
47
+ [ '1983-06-08', 'purple', 42 ],
48
+ [ '1982-04-27', 'black', 24 ],
49
+ [ '1984-07-16', 'blue', 29 ],
50
+ [ '1975-02-18', 'green', 18 ],
51
+ [ '1988-02-01', nil, 31 ],
52
+ [ '1985-03-02', nil, 27 ],
53
+ [ '1982-05-01', nil, 28 ]
54
+ ].each do |birthdate, favorite_color, teeth|
55
+ Citizen.create! :birthdate => birthdate, :favorite_color => favorite_color, :teeth => teeth
56
+ end
@@ -0,0 +1,70 @@
1
+ require 'helper'
2
+
3
+ class TestCohortScope < Test::Unit::TestCase
4
+ def setup
5
+ Citizen.minimum_cohort_size = 3
6
+ @date_range = (Date.parse('1980-01-01')..Date.parse('1990-01-01'))
7
+ end
8
+
9
+ should "raise if no minimum_cohort_size is specified" do
10
+ Citizen.minimum_cohort_size = nil
11
+ assert_raises(RuntimeError) {
12
+ cohort = Citizen.big_cohort Hash.new
13
+ }
14
+ assert_raises(RuntimeError) {
15
+ cohort = Citizen.strict_cohort ActiveSupport::OrderedHash.new
16
+ }
17
+ end
18
+
19
+ context "big_cohort" do
20
+ should "return an empty cohort if it can't find one that meets size requirements" do
21
+ cohort = Citizen.big_cohort :favorite_color => 'heliotrope'
22
+ assert_equal 0, cohort.count
23
+ end
24
+
25
+ should "seek a cohort of maximum size" do
26
+ cohort = Citizen.big_cohort :birthdate => @date_range, :favorite_color => 'heliotrope'
27
+ assert_equal 9, cohort.count
28
+ assert cohort.any? { |m| m.favorite_color != 'heliotrope' }
29
+ assert cohort.all? { |m| @date_range.include? m.birthdate }
30
+ end
31
+
32
+ should "raise if an OrderedHash is given to big_cohort" do
33
+ assert_raises(ArgumentError) {
34
+ cohort = Citizen.big_cohort ActiveSupport::OrderedHash.new
35
+ }
36
+ end
37
+ end
38
+
39
+ context "strict_cohort" do
40
+ should "raise if a non-OrderedHash is given to strict_cohort" do
41
+ assert_raises(ArgumentError) {
42
+ cohort = Citizen.strict_cohort Hash.new
43
+ }
44
+ end
45
+
46
+ should "return an empty (strict) cohort if it can't find one that meets size requirements" do
47
+ ordered_attributes = ActiveSupport::OrderedHash.new
48
+ ordered_attributes[:favorite_color] = 'heliotrope'
49
+
50
+ cohort = Citizen.strict_cohort ordered_attributes
51
+ assert_equal 0, cohort.count
52
+ end
53
+
54
+ should "seek a cohort by discarding attributes in order" do
55
+ favorite_color_matters_most = ActiveSupport::OrderedHash.new
56
+ favorite_color_matters_most[:favorite_color] = 'heliotrope'
57
+ favorite_color_matters_most[:birthdate] = @date_range
58
+
59
+ birthdate_matters_most = ActiveSupport::OrderedHash.new
60
+ birthdate_matters_most[:birthdate] = @date_range
61
+ birthdate_matters_most[:favorite_color] = 'heliotrope'
62
+
63
+ cohort = Citizen.strict_cohort favorite_color_matters_most
64
+ assert_equal 0, cohort.count
65
+
66
+ cohort = Citizen.strict_cohort birthdate_matters_most
67
+ assert_equal 9, cohort.count
68
+ end
69
+ end
70
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cohort_scope
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Seamus Abshere
13
+ - Andy Rossmeissl
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-04-07 00:00:00 -04:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: activesupport
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 3
30
+ - 0
31
+ - 0
32
+ - beta2
33
+ version: 3.0.0.beta2
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: activerecord
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ segments:
44
+ - 3
45
+ - 0
46
+ - 0
47
+ - beta2
48
+ version: 3.0.0.beta2
49
+ type: :runtime
50
+ version_requirements: *id002
51
+ - !ruby/object:Gem::Dependency
52
+ name: shoulda
53
+ prerelease: false
54
+ requirement: &id003 !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ segments:
59
+ - 2
60
+ - 10
61
+ - 3
62
+ version: 2.10.3
63
+ type: :development
64
+ version_requirements: *id003
65
+ description: Provides big_cohort, which widens by finding the constraint that eliminates the most records and removing it. Also provides strict_cohort, which widens by eliminating constraints in order.
66
+ email: seamus@abshere.net
67
+ executables: []
68
+
69
+ extensions: []
70
+
71
+ extra_rdoc_files:
72
+ - LICENSE
73
+ - README.rdoc
74
+ files:
75
+ - .document
76
+ - .gitignore
77
+ - LICENSE
78
+ - README.rdoc
79
+ - Rakefile
80
+ - VERSION
81
+ - lib/cohort_scope.rb
82
+ - test/helper.rb
83
+ - test/test_cohort_scope.rb
84
+ has_rdoc: true
85
+ homepage: http://github.com/seamusabshere/cohort_scope
86
+ licenses: []
87
+
88
+ post_install_message:
89
+ rdoc_options:
90
+ - --charset=UTF-8
91
+ require_paths:
92
+ - lib
93
+ required_ruby_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ segments:
98
+ - 0
99
+ version: "0"
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ segments:
105
+ - 0
106
+ version: "0"
107
+ requirements: []
108
+
109
+ rubyforge_project:
110
+ rubygems_version: 1.3.6
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: Provides cohorts (in the form of ActiveRecord scopes) that dynamically widen until they contain a certain number of records.
114
+ test_files:
115
+ - test/helper.rb
116
+ - test/test_cohort_scope.rb