activerecord-summarize 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0f4063a016e57d85371ba91aa751c223c5830f29bf46a7f693c00af2b808fb48
4
+ data.tar.gz: c63ee2b1ed0e2c7e39f71125f759a4f933cd56281c414b0e694f8f46511b18f4
5
+ SHA512:
6
+ metadata.gz: 3252c9e8bc8eb0e5ca6eef4b3a089d68f5f67f17d42824f5478b02181f7a618a9feee961c9636f1b696acc6661975580ab75aef8c9edcc6a08b480eae11ae188
7
+ data.tar.gz: 062a2d2557969c0ae4fefd6e656dcd5bb8b4981e0b71899a726025fd3c6ef081e085c8c87729c85555cdd3999f58f8daf323f1a2f6b00f59f71b5de6f8a3a78c
data/.standard.yml ADDED
@@ -0,0 +1,5 @@
1
+ # For available configuration options, see:
2
+ # https://github.com/testdouble/standard
3
+ # ignore:
4
+ # - 'lib/**/*':
5
+ # - Layout/DotPosition
data/CHANGELOG.md ADDED
@@ -0,0 +1,16 @@
1
+ ## [0.2.1] - 2022-02-17
2
+
3
+ - Initial public release
4
+ - Wrap existing groups of related `ActiveRecord` calculations in a `summarize` block for an instant 2-5x speedup
5
+ - Supports combining all `.count` and `.sum` called on [descendants of] the summarizing relation in a `summarize` block
6
+ - Supports separate `.where`, `.group`, and custom scopes for any or all calculations in a `summarize` block
7
+ - Calculation methods return placeholder objects that will be replaced with the true calculation result at the end of the block.
8
+ - Supports chaining almost any method on the placeholder calculation results
9
+ - Some methods of `Object` that I haven't tried yet or that are injected into `Object` by other gems may not work, as they won't trigger `method_missing`.
10
+ - Transparently replaces calculation placeholders that have been saved to local variables in the block's scope or instance variables of the block's execution context
11
+ - Supports `pure: true` option to skip the step of looking outside the block return value for placeholders
12
+ - Supports `noop: true` option to disable all `summarize` functionality and just return the original relation
13
+ - `noop: true` and `noop: false` (default) produce the same final results, just `noop: false` is usually faster
14
+ - Build even more complex queries by using `summarize` on a relation that already has `.group` applied.
15
+ - Results are grouped just like a standard `.group(*expressions).count`, but instead of single numbers, the values are whatever set of calculations you return from the block, including further `.group(*more).calculate(:sum|:count,*args)` calculations, in whatever `Array` or `Hash` shape you arrange them.
16
+ - N.b., `pure: true` is implied and required in this mode, and `noop: true` is not possible, since ActiveRecord has no way to do this in the general case without `summarize`.
data/Gemfile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in activerecord-summarize.gemspec
6
+ gemspec
7
+
8
+ gem "rake", "~> 13.0"
9
+
10
+ gem "minitest", "~> 5.0"
11
+
12
+ gem "standard", "~> 1.3"
data/Gemfile.lock ADDED
@@ -0,0 +1,65 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ activerecord-summarize (0.2.1)
5
+ activerecord (>= 5.0)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ activemodel (7.0.2.2)
11
+ activesupport (= 7.0.2.2)
12
+ activerecord (7.0.2.2)
13
+ activemodel (= 7.0.2.2)
14
+ activesupport (= 7.0.2.2)
15
+ activesupport (7.0.2.2)
16
+ concurrent-ruby (~> 1.0, >= 1.0.2)
17
+ i18n (>= 1.6, < 2)
18
+ minitest (>= 5.1)
19
+ tzinfo (~> 2.0)
20
+ ast (2.4.2)
21
+ concurrent-ruby (1.1.9)
22
+ i18n (1.10.0)
23
+ concurrent-ruby (~> 1.0)
24
+ minitest (5.15.0)
25
+ parallel (1.21.0)
26
+ parser (3.1.1.0)
27
+ ast (~> 2.4.1)
28
+ rainbow (3.1.1)
29
+ rake (13.0.6)
30
+ regexp_parser (2.2.1)
31
+ rexml (3.2.5)
32
+ rubocop (1.25.1)
33
+ parallel (~> 1.10)
34
+ parser (>= 3.1.0.0)
35
+ rainbow (>= 2.2.2, < 4.0)
36
+ regexp_parser (>= 1.8, < 3.0)
37
+ rexml
38
+ rubocop-ast (>= 1.15.1, < 2.0)
39
+ ruby-progressbar (~> 1.7)
40
+ unicode-display_width (>= 1.4.0, < 3.0)
41
+ rubocop-ast (1.16.0)
42
+ parser (>= 3.1.1.0)
43
+ rubocop-performance (1.13.2)
44
+ rubocop (>= 1.7.0, < 2.0)
45
+ rubocop-ast (>= 0.4.0)
46
+ ruby-progressbar (1.11.0)
47
+ standard (1.7.2)
48
+ rubocop (= 1.25.1)
49
+ rubocop-performance (= 1.13.2)
50
+ tzinfo (2.0.4)
51
+ concurrent-ruby (~> 1.0)
52
+ unicode-display_width (2.1.0)
53
+
54
+ PLATFORMS
55
+ arm64-darwin-21
56
+ x86_64-linux
57
+
58
+ DEPENDENCIES
59
+ activerecord-summarize!
60
+ minitest (~> 5.0)
61
+ rake (~> 13.0)
62
+ standard (~> 1.3)
63
+
64
+ BUNDLED WITH
65
+ 2.3.3
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2022 Joshua Paine
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,177 @@
1
+ # ActiveRecord::Summarize
2
+
3
+ ## Why `summarize`?
4
+
5
+ 1. Make existing groups of related `ActiveRecord` calculations twice as fast (or more) with minimal code alteration. It's like a `go_faster` block.
6
+
7
+ 2. For more complex reporting requirements, including nested `.group` calls, use `summarize` for fast, legible code that you just couldn't have written before without unacceptable performance or lengthy custom SQL and data-wrangling.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your Rails application's Gemfile:
12
+
13
+ ```ruby
14
+ gem 'activerecord-summarize'
15
+ ```
16
+
17
+ And then execute:
18
+
19
+ $ bundle install
20
+
21
+ ## Usage
22
+
23
+ #### Suppose your controller method looks like this:
24
+
25
+ ```ruby
26
+ purchases = Purchase.complete
27
+ promotions = purchases.where.not(promotion_id: nil)
28
+ @promotion_sales = promotions.count
29
+ @promotion_revenue = promotions.sum(:amount)
30
+ @by_region = purchases.group(:region_id).count
31
+ ```
32
+
33
+ #### Make it this instead:
34
+
35
+ ```ruby
36
+ Purchase.complete.summarize do |purchases|
37
+ promotions = purchases.where.not(promotion_id: nil)
38
+ @promotion_sales = promotions.count
39
+ @promotion_revenue = promotions.sum(:amount)
40
+ @by_region = purchases.group(:region_id).count
41
+ end
42
+ ```
43
+ #### ...and you'll have exactly the same instance variables set, but only one SQL query will have been executed.
44
+
45
+ You can run as many calculations in a `summarize` block as it makes sense to run, so long as they all chain to the relation on which you called `summarize`. They can use different, possibly-overlapping subsets of the original relation, i.e., they can have their own `where` clauses and even `group`. The final result of each will be exactly as it would have been if you had run each query independently, but only one query will actually be issued to the database.
46
+
47
+ ### Limitations & details
48
+
49
+ The only restriction is that each of the queries must be structurally compatible with the parent relation, in the same sense as is required for `relation.or(other)`. So if you wanted to display the region's name, you'd need to group by a sub-select (ew) or do the join at the top level:
50
+
51
+ ```ruby
52
+ Purchase.complete.left_joins(:region).summarize do |purchases|
53
+ promotions = purchases.where.not(promotion_id: nil)
54
+ @promotion_sales = promotions.count
55
+ @promotion_revenue = promotions.sum(:amount)
56
+ @by_region = purchases.group("regions.name").count
57
+ end
58
+ ```
59
+
60
+ Until the `summarize` block ends, the return value of your calculations are `ChainableResult::Future` instances, a bit like a Promise with a more convenient API. You can call any method you like on a `ChainableResult`, and you'll get back another `ChainableResult`, and they'll all turn out alright in the end—provided you called methods that would have worked if you had run that calculation without `summarize`. OTOH, using a `ChainableResult` as an argument to another method generally will not work.
61
+
62
+ ```ruby
63
+ Purchase.last_quarter.complete.summarize do |purchases|
64
+ @sales = purchases.sum(:amount)
65
+ # x * y is syntactic sugar for x.*(y), so this will work:
66
+ @vc_projection = @sales * 3
67
+ # And this won't:
68
+ @vc_projection = 3 * @sales
69
+ end
70
+ ```
71
+
72
+ If, within a `summarize` block, you want to combine data from more than one `ChainableResult`, you must use the otherwise-optional second argument yielded to the block, a `proc` I like to name `with`. Pass it all the results you want to combine and a block that combines them and returns the new result:
73
+
74
+ ```ruby
75
+ Purchase.complete.left_joins(:promotion).summarize do |purchases, with|
76
+ @all_revenue = purchases.sum(:amount)
77
+ promotions = purchases.where.not(promotions: {id: nil})
78
+ @promotion_sales = promotions.count
79
+ @promotion_discounts = promotions.sum("promotions.discount_amount")
80
+ @avg_discount = with[@promotion_sales, @promotion_discounts] do |sales, discounts|
81
+ sales.zero? ? 0 : discounts / sales
82
+ end
83
+ end
84
+ ```
85
+
86
+ Treat a `with` block as a pure function: i.e., return the value you care about, and don't set or change any other state within the block. Behavior in any other case is undefined.
87
+
88
+ ## Escape hatch
89
+
90
+ The query generated by `summarize` is often much faster than equivalent queries written without it, but for few-query cases where each query is well-served by its own index, `summarize` could possibly be slower.
91
+
92
+ By design, every operation performed with `summarize` is correct and corresponds to normal `ActiveRecord` behavior, and any operations that can't be done correctly this way or aren't yet will raise exceptions. But only imperfect humans have worked on this gem, so you might also wonder if `summarize` is producing correct results.
93
+
94
+ Fortunately, you can easily check both with `summarize(noop: true)`, which causes `summarize` to yield the original relation it was called on and a trivial `with` proc. The block will be executed as though `summarize` were not involved, with each calculation executing separately and immediately returning numbers or hashes.
95
+
96
+ If you do find any case where you get different results with `summarize(noop: true)`, I'd be grateful if you filed an issue.
97
+
98
+ ## How
99
+
100
+ `ActiveRecord::Relation#summarize` yields a lightly-modified copy of the relation that intercepts all calls to `sum` or `count` which, instead of a number or hash, return a `ChainableResult::Future`. A `ChainableResult` accepts any method called on it, returning a new `ChainableResult` that will evaluate to the result of running the method on the eventual result of its parent.
101
+
102
+ At the end of the `summarize` block:
103
+
104
+ 1. All the calculations are combined into a single query.
105
+ 2. The results of the query are collected into the same shapes they would have if they had been called independently. E.g., a bare `.count` returns a number, but `.group(*expressions).count` returns a hash with single value (one group expression) or array (two-plus expressions) keys.
106
+ 3. Any `ChainableResult` in the return value of the block (usually a single `ChainableResult` or an `Array` or `Hash` with `ChainableResult` values) is replaced with its resolved value.
107
+ 4. Any `ChainableResult` in the local scope of the block (i.e., `block.binding`) or an instance variable of the block context (i.e., `block.binding.receiver`) is replaced with its resolved value.
108
+
109
+ N.b., if you are using `summarize` in a more functional style and will return all values you care about, you can let `summarize` know to skip step 4 by invoking it with `summarize(pure: true)`.
110
+
111
+ When the parent relation already has `.group` applied, `pure: true` is implied and step 4 does not take place.
112
+
113
+ ## Power usage with `group`
114
+
115
+ Build even more complex queries by using `summarize` on a relation that already has `.group` applied. Results are grouped just like a standard `.group(*expressions).count`, but instead of single numbers, the values are whatever set of calculations you return from the block, including further `.group(*more).calculate(:sum|:count,*args)` calculations, in whatever `Array` or `Hash` shape you arrange them. For example:
116
+
117
+ ```ruby
118
+ puts Purchase.last_year.complete.group(:region_id).summarize do |purchases,with|
119
+ total = purchases.count
120
+ by_quarter = purchases.group(CREATED_TO_YEAR_SQL, CREATED_TO_QUARTER_SQL).count.sort.to_h
121
+ target = with[total / 4, by_quarter.values.max] {|avg_q, best_q| [avg_q * 1.25, best_q].max.round }
122
+ {last_year: total, quarters: by_quarter, unit_target: target}
123
+ end
124
+ # Output:
125
+ # {
126
+ # 1 => {
127
+ # last_year: 2717316,
128
+ # quarters: {
129
+ # [2021, 1] => 634057,
130
+ # [2021, 2] => 590012,
131
+ # [2021, 3] => 659010,
132
+ # [2021, 4] => 834237
133
+ # },
134
+ # unit_target: 849161
135
+ # },
136
+ # 2 => { ... },
137
+ # 3 => { ... }
138
+ # }
139
+ ```
140
+
141
+ The ActiveRecord API has no direct analog for this, so `noop: true` is not allowed when `summarize` is called on a grouped relation.
142
+
143
+ When the relation already has `group` applied, for correct results, `summarize` requires that the block mutate no state and return all values you care about: functional purity, no side effects. `ChainableResult` values referenced by instance variables or local variables not returned from the block won't be evaluated. I.e., `pure: true` is implied and `pure: false` is not allowed. To see why:
144
+
145
+ ```ruby
146
+ # A trivial example:
147
+ Purchase.complete.group(:region_id).summarize {|purchases| purchases.sum(:amount) }
148
+
149
+ # ...is exactly equivalent to:
150
+ Purchase.complete.group(:region_id).sum(:amount)
151
+
152
+ # But if there were three regions, what should the value of @target be in this case?
153
+ region_targets = Purchase.last_quarter.complete.group(:region_id).summarize do |purchases|
154
+ @target = purchases.sum(:amount) * 1.25
155
+ end
156
+ ```
157
+
158
+ As a rubyist, that last example looks like the block will be evaluated for each group, so `@target` should keep whatever value it got the last time the block was run. However:
159
+
160
+ 1. This is not often useful.
161
+ 2. The block is not actually linearly evaluated for each group.
162
+
163
+ Instead the block is evaluated once to determine what calculations need to be run, the query is built and evaluated, and then, for each group of the parent relation, the return value of the block is evaluated with respect to just those rows belonging to the group. In practice this is quite powerful and makes a pleasant, legible API for complex reporting.
164
+
165
+ ## Development
166
+
167
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
168
+
169
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
170
+
171
+ ## Contributing
172
+
173
+ Bug reports and pull requests are welcome on GitHub at https://github.com/midnightmonster/activerecord-summarize.
174
+
175
+ ## License
176
+
177
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.libs << "lib"
9
+ t.test_files = FileList["test/**/test_*.rb"]
10
+ end
11
+
12
+ require "standard/rake"
13
+
14
+ task default: %i[test standard]
15
+ task full: %i[test standard:fix]
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/activerecord/summarize/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "activerecord-summarize"
7
+ spec.version = ActiveRecord::Summarize::VERSION
8
+ spec.authors = ["Joshua Paine"]
9
+ spec.email = ["joshua@letterblock.com"]
10
+
11
+ spec.summary = "Run many .count and/or .sum queries in a single efficient query with minimal code changes, even with different .group and only-partly-overlapping .where filters. Nearly-free speedups for mature Rails apps."
12
+ spec.description = "Just wrap your existing code in `@relation.summarize do |relation| ... end` and run your queries against relation instead of @relation."
13
+ spec.homepage = "https://letterblock.com"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = ">= 2.6.0"
16
+
17
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = "https://github.com/midnightmonster/activerecord-summarize"
21
+ spec.metadata["changelog_uri"] = "https://github.com/midnightmonster/activerecord-summarize/blob/master/CHANGELOG.md"
22
+
23
+ # Specify which files should be added to the gem when it is released.
24
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
25
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
26
+ `git ls-files -z`.split("\x0").reject do |f|
27
+ (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
28
+ end
29
+ end
30
+ spec.bindir = "exe"
31
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
+ spec.require_paths = ["lib"]
33
+
34
+ # Uncomment to register a new dependency of your gem
35
+ # spec.add_dependency "example-gem", "~> 1.0"
36
+ spec.add_runtime_dependency "activerecord", ">= 5.0"
37
+ spec.add_development_dependency "rake"
38
+
39
+ # For more information and examples about making a new gem, check out our
40
+ # guide at: https://bundler.io/guides/creating_gem.html
41
+ end
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "activerecord/summarize"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecord
4
+ module Summarize
5
+ VERSION = "0.2.1"
6
+ end
7
+ end
@@ -0,0 +1,288 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "summarize/version"
4
+ require_relative "../chainable_result"
5
+
6
+ module ActiveRecord::Summarize
7
+ class Unsummarizable < StandardError; end
8
+
9
+ class Summarize
10
+ attr_reader :current_result_row, :pure, :noop, :from_where
11
+ alias_method :pure?, :pure
12
+ alias_method :noop?, :noop
13
+
14
+ # noop: true
15
+ # causes `summarize` simply to yield the original relation and a trivial,
16
+ # synchronous `with` proc. It is meant as a convenient way to test/prove
17
+ # the correctness of `summarize` and to compare performance of the single
18
+ # combined query vs the original individual queries.
19
+ # N.b., if `relation` already has a grouping applied, there is no direct
20
+ # ActiveRecord translation for what `summarize` does, so noop: true is
21
+ # impossible and raises an exception.
22
+ # pure: true
23
+ # lets `summarize` know that you're not mutating state within the block,
24
+ # so it doesn't need to go spelunking in the block binding for
25
+ # ChainableResults. See `if !pure?` section below.
26
+ # N.b., if `relation` already has a grouping applied, pure: true is
27
+ # implied and pure: false throws an exception, as the impure behavior
28
+ # would be non-obvious and of doubtful value.
29
+ def initialize(relation, pure: nil, noop: false)
30
+ @relation = relation
31
+ @noop = noop
32
+ has_base_groups = relation.group_values.any?
33
+ raise Unsummarizable, "`summarize` must be pure when called on a grouped relation" if pure == false && has_base_groups
34
+ raise ArgumentError, "`summarize(noop: true)` is impossible on a grouped relation" if noop && has_base_groups
35
+ @pure = has_base_groups || !!pure
36
+ @calculations = []
37
+ end
38
+
39
+ def process(&block)
40
+ # For noop, just yield the original relation and a transparent `with` proc.
41
+ return yield(@relation, ->(*results, &block) { [*results].then(&block) }) if noop?
42
+ # Within the block, the relation and its future clones intercept calls to
43
+ # `count` and `sum`, registering them and returning a ChainableResult via
44
+ # summarize.add_calculation.
45
+ future_block_result = ChainableResult.wrap(yield(
46
+ @relation.unscope(:group).tap do |r|
47
+ r.instance_variable_set(:@summarize, self)
48
+ class << r
49
+ include InstanceMethods
50
+ end
51
+ end,
52
+ ChainableResult::WITH
53
+ ))
54
+ ChainableResult.with_cache(!pure?) do
55
+ # `resolve` builds the single query that answers all collected calculations,
56
+ # executes it, and aggregates the results by the values of
57
+ # `@relation.group_values``. In the common case of no `@relation.group_values`,
58
+ # the result is just `{[]=>[*final_value_for_each_calculation]}`
59
+ result = resolve.transform_values! do |row|
60
+ # Each row (in the common case, only one) is used to resolve any
61
+ # ChainableResults returned by the block. These may be a one-to-one mapping,
62
+ # or the block return may have combined some results via `with` or chained
63
+ # additional methods on results, etc..
64
+ @current_result_row = row
65
+ future_block_result.value
66
+ end.then do |result|
67
+ # Change ungrouped result from `{[]=>v}` to `v` and grouped-by-one-column
68
+ # result from `{[k1]=>v1,[k2]=>v2,...}` to `{k1=>v1,k2=>v2,...}`.
69
+ # (Those are both probably more common than multiple-column base grouping.)
70
+ case @relation.group_values.size
71
+ when 0 then result.values.first
72
+ when 1 then result.transform_keys! { |k| k.first }
73
+ else result
74
+ end
75
+ end
76
+ if !pure?
77
+ # Check block scope's local vars and block's self's instance vars for
78
+ # any ChainableResult, and replace it with its resolved value.
79
+ #
80
+ # Also check the values of any of those vars that are Hashes, since IME
81
+ # it's not rare to assign counts to hashes, and it is rare to have giant
82
+ # hashes that would be particularly wasteful to traverse. Do not do the
83
+ # same for Arrays, since IME pushing counts to arrays is rare, and large
84
+ # arrays, e.g., of many eagerly-fetched ActiveRecord objects, are not
85
+ # rare in controllers.
86
+ #
87
+ # Preconditions:
88
+ # - @current_result_row is still set to the single result row
89
+ # - we are within a ChainableResult.with_cache(true) block
90
+ block_binding = block.binding
91
+ block_self = block_binding.receiver
92
+ block_binding.local_variables.each do |k|
93
+ v = block_binding.local_variable_get(k)
94
+ next block_binding.local_variable_set(k, v.value) if v.is_a?(ChainableResult)
95
+ lightly_touch_impure_hash(v) if v.is_a?(Hash)
96
+ end
97
+ block_self.instance_variables.each do |k|
98
+ v = block_self.instance_variable_get(k)
99
+ next block_self.instance_variable_set(k, v.value) if v.is_a?(ChainableResult)
100
+ lightly_touch_impure_hash(v) if v.is_a?(Hash)
101
+ end
102
+ end
103
+ @current_result_row = nil
104
+ result
105
+ end
106
+ end
107
+
108
+ def add_calculation(relation, operation, column_name)
109
+ merge_from_where!(relation)
110
+ calculation = CalculationResult.new(relation, operation, column_name)
111
+ index = @calculations.size
112
+ @calculations << calculation
113
+ ChainableResult.wrap(calculation) { current_result_row[index] }
114
+ end
115
+
116
+ def resolve
117
+ # Build & execute query
118
+ groups = all_groups
119
+ # MariaDB, SQLite, and Postgres all support `GROUP BY 1, 2, 3`-style syntax,
120
+ # where the numbers are 1-indexed references to SELECT values. It makes these
121
+ # generated queries much shorter and more readable, and it avoids the
122
+ # ambiguity of using aliases (for GROUP BY, they can get clobbered by columns
123
+ # from underlying tables) even where those are supported. But in case we find
124
+ # a database that doesn't support numeric references, the fully-explicit
125
+ # grouping code is commented out below.
126
+ #
127
+ # grouped_query = groups.any? ? from_where.group(*groups) : from_where
128
+ grouped_query = groups.any? ? from_where.group(*1..groups.size) : from_where
129
+ data = grouped_query.pluck(*groups, *value_selects)
130
+
131
+ # Aggregate & assign results
132
+ group_idx = groups.each_with_index.to_h
133
+ starting_values, reducers = @calculations.each_with_index.map do |f, i|
134
+ value_column = groups.size + i
135
+ group_columns = f.relation.group_values.map { |k| group_idx[k] }
136
+ case group_columns.size
137
+ when 0 then [
138
+ 0,
139
+ ->(memo, row) { memo + row[value_column] }
140
+ ]
141
+ when 1 then [
142
+ Hash.new(0), # Default 0 makes the reducer much cleaner, but we have to clean it up later
143
+ ->(memo, row) {
144
+ memo[row[group_columns[0]]] += row[value_column] unless row[value_column].zero?
145
+ memo
146
+ }
147
+ ]
148
+ else [
149
+ Hash.new(0),
150
+ ->(memo, row) {
151
+ memo[group_columns.map { |i| row[i] }] += row[value_column] unless row[value_column].zero?
152
+ memo
153
+ }
154
+ ]
155
+ end
156
+ end.transpose # For an array of pairs, `transpose` is the reverse of `zip`
157
+ cols = (0...reducers.size)
158
+ base_group_columns = (0...base_groups.size)
159
+ data
160
+ .group_by { |row| row[base_group_columns] }
161
+ .tap { |h| h[[]] = [] if h.empty? && base_groups.size.zero? }
162
+ .transform_values! do |rows|
163
+ values = starting_values.map(&:dup) # map(&:dup) since some are hashes and we don't want to mutate starting_values
164
+ rows.each do |row|
165
+ cols.each do |i|
166
+ values[i] = reducers[i].call(values[i], row)
167
+ end
168
+ end
169
+ # Set any hash's default back to nil, since callers will expect a normal hash
170
+ values.each { |v| v.default = nil if v.is_a? Hash }
171
+ end
172
+ end
173
+
174
+ private
175
+
176
+ def compatible_base
177
+ @compatible_base ||= @relation.except(:select, :group)
178
+ end
179
+
180
+ def merge_from_where!(other)
181
+ other_from_where = other.except(:select, :group)
182
+ incompatible_values = compatible_base.send(:structurally_incompatible_values_for, other_from_where)
183
+ unless incompatible_values.empty?
184
+ raise Unsummarizable, "Within a `summarize` block, each calculation must be structurally compatible. Incompatible values: #{incompatible_values}"
185
+ end
186
+ # Logical OR the criteria of all calculations. Most often this is equivalent
187
+ # to `compatible_base`, since usually one is a total or grouped count without
188
+ # additional `where` criteria, but that needn't necessarily be so.
189
+ @from_where = if @from_where.nil?
190
+ other_from_where
191
+ else
192
+ @from_where.or(other_from_where)
193
+ end
194
+ end
195
+
196
+ def base_groups
197
+ @relation.group_values.dup
198
+ end
199
+
200
+ def all_groups
201
+ # keep all base groups, even if they did something silly like group by
202
+ # the same key twice, but otherwise don't repeat any groups
203
+ groups = base_groups
204
+ groups_set = Set.new(groups)
205
+ @calculations.map { |f| f.relation.group_values }.flatten.each do |k|
206
+ next if groups_set.include? k
207
+ groups_set << k
208
+ groups << k
209
+ end
210
+ groups
211
+ end
212
+
213
+ def value_selects
214
+ @calculations.map { |f| f.select_value(@relation) }
215
+ end
216
+
217
+ def lightly_touch_impure_hash(h)
218
+ h.each do |k, v|
219
+ h[k] = v.value if v.is_a? ChainableResult
220
+ end
221
+ end
222
+ end
223
+
224
+ class CalculationResult
225
+ attr_reader :relation, :method, :column
226
+
227
+ def initialize(relation, method, column)
228
+ @relation = relation
229
+ @method = method
230
+ @column = column
231
+ end
232
+
233
+ def select_value(base_relation)
234
+ where = relation.where_clause - base_relation.where_clause
235
+ for_select = column
236
+ for_select = Arel::Nodes::Case.new(where.ast, unmatch_value).when(true, for_select) unless where.empty?
237
+ function.new([for_select]).tap { |f| f.distinct = relation.distinct_value }
238
+ end
239
+
240
+ def unmatch_value
241
+ case method
242
+ when "sum" then 0
243
+ when "count" then nil
244
+ else raise "Unknown calculation method"
245
+ end
246
+ end
247
+
248
+ def function
249
+ case method
250
+ when "sum" then Arel::Nodes::Sum
251
+ when "count" then Arel::Nodes::Count
252
+ else raise "Unknown calculation method"
253
+ end
254
+ end
255
+ end
256
+
257
+ module RelationMethods
258
+ def summarize(**opts, &block)
259
+ raise Unsummarizable, "Cannot summarize within a summarize block" if @summarize
260
+ ActiveRecord::Summarize::Summarize.new(self, **opts).process(&block)
261
+ end
262
+ end
263
+
264
+ module InstanceMethods
265
+ private
266
+
267
+ def perform_calculation(operation, column_name)
268
+ case operation = operation.to_s.downcase
269
+ when "count", "sum"
270
+ column_name = :id if [nil, "*", :all].include? column_name
271
+ @summarize.add_calculation(self, operation, aggregate_column(column_name))
272
+ else super
273
+ end
274
+ end
275
+ end
276
+ end
277
+
278
+ class ActiveRecord::Base
279
+ class << self
280
+ def summarize(**opts, &block)
281
+ ActiveRecord::Summarize::Summarize.new(all, **opts).process(&block)
282
+ end
283
+ end
284
+ end
285
+
286
+ class ActiveRecord::Relation
287
+ include ActiveRecord::Summarize::RelationMethods
288
+ end
@@ -0,0 +1,111 @@
1
+ class ChainableResult
2
+ def initialize(source, method = nil, args = [], opts = {}, &block)
3
+ @source = source
4
+ @method = method || (block ? :then : :itself)
5
+ @args = args
6
+ @opts = opts
7
+ @block = block
8
+ @cached = false
9
+ end
10
+
11
+ def value
12
+ if use_cache?
13
+ return @value if @cached
14
+ @cached = true
15
+ @value = resolve_source.send(@method, *@args, **@opts, &@block)
16
+ else
17
+ resolve_source.send(@method, *@args, **@opts, &@block)
18
+ end
19
+ end
20
+
21
+ def to_json(**opts)
22
+ ChainableResult::Future.new(self, :to_json, [], opts)
23
+ end
24
+
25
+ def then(&block)
26
+ ChainableResult::Future.new(self, :then, &block)
27
+ end
28
+
29
+ def yield_self(&block)
30
+ ChainableResult::Future.new(self, :yield_self, &block)
31
+ end
32
+
33
+ def tap(&block)
34
+ ChainableResult::Future.new(self, :tap, &block)
35
+ end
36
+
37
+ def method_missing(method, *args, **opts, &block)
38
+ ChainableResult::Future.new(self, method, args, opts, &block)
39
+ end
40
+
41
+ def respond_to_missing?(method_name, include_private = false)
42
+ true
43
+ end
44
+
45
+ class Future < self
46
+ def resolve_source
47
+ @source.value
48
+ end
49
+ end
50
+
51
+ class Array < self
52
+ def resolve_source
53
+ @source.map(&RESOLVE_ITEM)
54
+ end
55
+ end
56
+
57
+ class Hash < self
58
+ def resolve_source
59
+ @source.transform_values(&RESOLVE_ITEM)
60
+ end
61
+ end
62
+
63
+ class Other < self
64
+ def resolve_source
65
+ @source
66
+ end
67
+ end
68
+
69
+ def self.wrap(v, method = nil, *args, **opts, &block)
70
+ method ||= block ? :then : :itself
71
+ klass = case v
72
+ when ChainableResult then return v # don't wrap, exit early
73
+ when ::Array then ChainableResult::Array
74
+ when ::Hash then ChainableResult::Hash
75
+ else ChainableResult::Other
76
+ end
77
+ klass.new(v, method, args, opts, &block)
78
+ end
79
+
80
+ def self.with(*results, &block)
81
+ ChainableResult.wrap(results.size == 1 ? results.first : results, :then, &block)
82
+ end
83
+
84
+ WITH = method(:with)
85
+
86
+ def self.resolve_item(item)
87
+ case item
88
+ when ChainableResult then item.value
89
+ when ::Array then ChainableResult::Array.new(item).value
90
+ when ::Hash then ChainableResult::Hash.new(item).value
91
+ else item
92
+ end
93
+ end
94
+
95
+ RESOLVE_ITEM = method(:resolve_item)
96
+ CACHE_MODE_KEY = :"ChainableResult::USE_CACHE"
97
+
98
+ def self.with_cache(mode = true)
99
+ prev = Thread.current[CACHE_MODE_KEY]
100
+ Thread.current[CACHE_MODE_KEY] = mode
101
+ result = yield
102
+ Thread.current[CACHE_MODE_KEY] = prev
103
+ result
104
+ end
105
+
106
+ private
107
+
108
+ def use_cache?
109
+ !!Thread.current[CACHE_MODE_KEY]
110
+ end
111
+ end
@@ -0,0 +1,6 @@
1
+ module ActiveRecord
2
+ module Summarize
3
+ VERSION: String
4
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
5
+ end
6
+ end
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: activerecord-summarize
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ platform: ruby
6
+ authors:
7
+ - Joshua Paine
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-03-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '5.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '5.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Just wrap your existing code in `@relation.summarize do |relation| ...
42
+ end` and run your queries against relation instead of @relation.
43
+ email:
44
+ - joshua@letterblock.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ".standard.yml"
50
+ - CHANGELOG.md
51
+ - Gemfile
52
+ - Gemfile.lock
53
+ - LICENSE.txt
54
+ - README.md
55
+ - Rakefile
56
+ - activerecord-summarize.gemspec
57
+ - bin/console
58
+ - bin/setup
59
+ - lib/activerecord/summarize.rb
60
+ - lib/activerecord/summarize/version.rb
61
+ - lib/chainable_result.rb
62
+ - sig/activerecord/summarize.rbs
63
+ homepage: https://letterblock.com
64
+ licenses:
65
+ - MIT
66
+ metadata:
67
+ allowed_push_host: https://rubygems.org
68
+ homepage_uri: https://letterblock.com
69
+ source_code_uri: https://github.com/midnightmonster/activerecord-summarize
70
+ changelog_uri: https://github.com/midnightmonster/activerecord-summarize/blob/master/CHANGELOG.md
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 2.6.0
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubygems_version: 3.3.3
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: Run many .count and/or .sum queries in a single efficient query with minimal
90
+ code changes, even with different .group and only-partly-overlapping .where filters.
91
+ Nearly-free speedups for mature Rails apps.
92
+ test_files: []