activerecord-summarize 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.standard.yml +5 -0
- data/CHANGELOG.md +16 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +177 -0
- data/Rakefile +15 -0
- data/activerecord-summarize.gemspec +41 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/lib/activerecord/summarize/version.rb +7 -0
- data/lib/activerecord/summarize.rb +288 -0
- data/lib/chainable_result.rb +111 -0
- data/sig/activerecord/summarize.rbs +6 -0
- metadata +92 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 0f4063a016e57d85371ba91aa751c223c5830f29bf46a7f693c00af2b808fb48
|
4
|
+
data.tar.gz: c63ee2b1ed0e2c7e39f71125f759a4f933cd56281c414b0e694f8f46511b18f4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3252c9e8bc8eb0e5ca6eef4b3a089d68f5f67f17d42824f5478b02181f7a618a9feee961c9636f1b696acc6661975580ab75aef8c9edcc6a08b480eae11ae188
|
7
|
+
data.tar.gz: 062a2d2557969c0ae4fefd6e656dcd5bb8b4981e0b71899a726025fd3c6ef081e085c8c87729c85555cdd3999f58f8daf323f1a2f6b00f59f71b5de6f8a3a78c
|
data/.standard.yml
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
## [0.2.1] - 2022-02-17
|
2
|
+
|
3
|
+
- Initial public release
|
4
|
+
- Wrap existing groups of related `ActiveRecord` calculations in a `summarize` block for an instant 2-5x speedup
|
5
|
+
- Supports combining all `.count` and `.sum` called on [descendants of] the summarizing relation in a `summarize` block
|
6
|
+
- Supports separate `.where`, `.group`, and custom scopes for any or all calculations in a `summarize` block
|
7
|
+
- Calculation methods return placeholder objects that will be replaced with the true calculation result at the end of the block.
|
8
|
+
- Supports chaining almost any method on the placeholder calculation results
|
9
|
+
- Some methods of `Object` that I haven't tried yet or that are injected into `Object` by other gems may not work, as they won't trigger `method_missing`.
|
10
|
+
- Transparently replaces calculation placeholders that have been saved to local variables in the block's scope or instance variables of the block's execution context
|
11
|
+
- Supports `pure: true` option to skip the step of looking outside the block return value for placeholders
|
12
|
+
- Supports `noop: true` option to disable all `summarize` functionality and just return the original relation
|
13
|
+
- `noop: true` and `noop: false` (default) produce the same final results, just `noop: false` is usually faster
|
14
|
+
- Build even more complex queries by using `summarize` on a relation that already has `.group` applied.
|
15
|
+
- Results are grouped just like a standard `.group(*expressions).count`, but instead of single numbers, the values are whatever set of calculations you return from the block, including further `.group(*more).calculate(:sum|:count,*args)` calculations, in whatever `Array` or `Hash` shape you arrange them.
|
16
|
+
- N.b., `pure: true` is implied and required in this mode, and `noop: true` is not possible, since ActiveRecord has no way to do this in the general case without `summarize`.
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
activerecord-summarize (0.2.1)
|
5
|
+
activerecord (>= 5.0)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activemodel (7.0.2.2)
|
11
|
+
activesupport (= 7.0.2.2)
|
12
|
+
activerecord (7.0.2.2)
|
13
|
+
activemodel (= 7.0.2.2)
|
14
|
+
activesupport (= 7.0.2.2)
|
15
|
+
activesupport (7.0.2.2)
|
16
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
17
|
+
i18n (>= 1.6, < 2)
|
18
|
+
minitest (>= 5.1)
|
19
|
+
tzinfo (~> 2.0)
|
20
|
+
ast (2.4.2)
|
21
|
+
concurrent-ruby (1.1.9)
|
22
|
+
i18n (1.10.0)
|
23
|
+
concurrent-ruby (~> 1.0)
|
24
|
+
minitest (5.15.0)
|
25
|
+
parallel (1.21.0)
|
26
|
+
parser (3.1.1.0)
|
27
|
+
ast (~> 2.4.1)
|
28
|
+
rainbow (3.1.1)
|
29
|
+
rake (13.0.6)
|
30
|
+
regexp_parser (2.2.1)
|
31
|
+
rexml (3.2.5)
|
32
|
+
rubocop (1.25.1)
|
33
|
+
parallel (~> 1.10)
|
34
|
+
parser (>= 3.1.0.0)
|
35
|
+
rainbow (>= 2.2.2, < 4.0)
|
36
|
+
regexp_parser (>= 1.8, < 3.0)
|
37
|
+
rexml
|
38
|
+
rubocop-ast (>= 1.15.1, < 2.0)
|
39
|
+
ruby-progressbar (~> 1.7)
|
40
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
41
|
+
rubocop-ast (1.16.0)
|
42
|
+
parser (>= 3.1.1.0)
|
43
|
+
rubocop-performance (1.13.2)
|
44
|
+
rubocop (>= 1.7.0, < 2.0)
|
45
|
+
rubocop-ast (>= 0.4.0)
|
46
|
+
ruby-progressbar (1.11.0)
|
47
|
+
standard (1.7.2)
|
48
|
+
rubocop (= 1.25.1)
|
49
|
+
rubocop-performance (= 1.13.2)
|
50
|
+
tzinfo (2.0.4)
|
51
|
+
concurrent-ruby (~> 1.0)
|
52
|
+
unicode-display_width (2.1.0)
|
53
|
+
|
54
|
+
PLATFORMS
|
55
|
+
arm64-darwin-21
|
56
|
+
x86_64-linux
|
57
|
+
|
58
|
+
DEPENDENCIES
|
59
|
+
activerecord-summarize!
|
60
|
+
minitest (~> 5.0)
|
61
|
+
rake (~> 13.0)
|
62
|
+
standard (~> 1.3)
|
63
|
+
|
64
|
+
BUNDLED WITH
|
65
|
+
2.3.3
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2022 Joshua Paine
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
# ActiveRecord::Summarize
|
2
|
+
|
3
|
+
## Why `summarize`?
|
4
|
+
|
5
|
+
1. Make existing groups of related `ActiveRecord` calculations twice as fast (or more) with minimal code alteration. It's like a `go_faster` block.
|
6
|
+
|
7
|
+
2. For more complex reporting requirements, including nested `.group` calls, use `summarize` for fast, legible code that you just couldn't have written before without unacceptable performance or lengthy custom SQL and data-wrangling.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your Rails application's Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'activerecord-summarize'
|
15
|
+
```
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle install
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
#### Suppose your controller method looks like this:
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
purchases = Purchase.complete
|
27
|
+
promotions = purchases.where.not(promotion_id: nil)
|
28
|
+
@promotion_sales = promotions.count
|
29
|
+
@promotion_revenue = promotions.sum(:amount)
|
30
|
+
@by_region = purchases.group(:region_id).count
|
31
|
+
```
|
32
|
+
|
33
|
+
#### Make it this instead:
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
Purchase.complete.summarize do |purchases|
|
37
|
+
promotions = purchases.where.not(promotion_id: nil)
|
38
|
+
@promotion_sales = promotions.count
|
39
|
+
@promotion_revenue = promotions.sum(:amount)
|
40
|
+
@by_region = purchases.group(:region_id).count
|
41
|
+
end
|
42
|
+
```
|
43
|
+
#### ...and you'll have exactly the same instance variables set, but only one SQL query will have been executed.
|
44
|
+
|
45
|
+
You can run as many calculations in a `summarize` block as it makes sense to run, so long as they all chain to the relation on which you called `summarize`. They can use different, possibly-overlapping subsets of the original relation, i.e., they can have their own `where` clauses and even `group`. The final result of each will be exactly as it would have been if you had run each query independently, but only one query will actually be issued to the database.
|
46
|
+
|
47
|
+
### Limitations & details
|
48
|
+
|
49
|
+
The only restriction is that each of the queries must be structurally compatible with the parent relation, in the same sense as is required for `relation.or(other)`. So if you wanted to display the region's name, you'd need to group by a sub-select (ew) or do the join at the top level:
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
Purchase.complete.left_joins(:region).summarize do |purchases|
|
53
|
+
promotions = purchases.where.not(promotion_id: nil)
|
54
|
+
@promotion_sales = promotions.count
|
55
|
+
@promotion_revenue = promotions.sum(:amount)
|
56
|
+
@by_region = purchases.group("regions.name").count
|
57
|
+
end
|
58
|
+
```
|
59
|
+
|
60
|
+
Until the `summarize` block ends, the return value of your calculations are `ChainableResult::Future` instances, a bit like a Promise with a more convenient API. You can call any method you like on a `ChainableResult`, and you'll get back another `ChainableResult`, and they'll all turn out alright in the end—provided you called methods that would have worked if you had run that calculation without `summarize`. OTOH, using a `ChainableResult` as an argument to another method generally will not work.
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
Purchase.last_quarter.complete.summarize do |purchases|
|
64
|
+
@sales = purchases.sum(:amount)
|
65
|
+
# x * y is syntactic sugar for x.*(y), so this will work:
|
66
|
+
@vc_projection = @sales * 3
|
67
|
+
# And this won't:
|
68
|
+
@vc_projection = 3 * @sales
|
69
|
+
end
|
70
|
+
```
|
71
|
+
|
72
|
+
If, within a `summarize` block, you want to combine data from more than one `ChainableResult`, you must use the otherwise-optional second argument yielded to the block, a `proc` I like to name `with`. Pass it all the results you want to combine and a block that combines them and returns the new result:
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
Purchase.complete.left_joins(:promotion).summarize do |purchases, with|
|
76
|
+
@all_revenue = purchases.sum(:amount)
|
77
|
+
promotions = purchases.where.not(promotions: {id: nil})
|
78
|
+
@promotion_sales = promotions.count
|
79
|
+
@promotion_discounts = promotions.sum("promotions.discount_amount")
|
80
|
+
@avg_discount = with[@promotion_sales, @promotion_discounts] do |sales, discounts|
|
81
|
+
sales.zero? ? 0 : discounts / sales
|
82
|
+
end
|
83
|
+
end
|
84
|
+
```
|
85
|
+
|
86
|
+
Treat a `with` block as a pure function: i.e., return the value you care about, and don't set or change any other state within the block. Behavior in any other case is undefined.
|
87
|
+
|
88
|
+
## Escape hatch
|
89
|
+
|
90
|
+
The query generated by `summarize` is often much faster than equivalent queries written without it, but for few-query cases where each query is well-served by its own index, `summarize` could possibly be slower.
|
91
|
+
|
92
|
+
By design, every operation performed with `summarize` is correct and corresponds to normal `ActiveRecord` behavior, and any operations that can't be done correctly this way or aren't yet will raise exceptions. But only imperfect humans have worked on this gem, so you might also wonder if `summarize` is producing correct results.
|
93
|
+
|
94
|
+
Fortunately, you can easily check both with `summarize(noop: true)`, which causes `summarize` to yield the original relation it was called on and a trivial `with` proc. The block will be executed as though `summarize` were not involved, with each calculation executing separately and immediately returning numbers or hashes.
|
95
|
+
|
96
|
+
If you do find any case where you get different results with `summarize(noop: true)`, I'd be grateful if you filed an issue.
|
97
|
+
|
98
|
+
## How
|
99
|
+
|
100
|
+
`ActiveRecord::Relation#summarize` yields a lightly-modified copy of the relation that intercepts all calls to `sum` or `count` which, instead of a number or hash, return a `ChainableResult::Future`. A `ChainableResult` accepts any method called on it, returning a new `ChainableResult` that will evaluate to the result of running the method on the eventual result of its parent.
|
101
|
+
|
102
|
+
At the end of the `summarize` block:
|
103
|
+
|
104
|
+
1. All the calculations are combined into a single query.
|
105
|
+
2. The results of the query are collected into the same shapes they would have if they had been called independently. E.g., a bare `.count` returns a number, but `.group(*expressions).count` returns a hash with single value (one group expression) or array (two-plus expressions) keys.
|
106
|
+
3. Any `ChainableResult` in the return value of the block (usually a single `ChainableResult` or an `Array` or `Hash` with `ChainableResult` values) is replaced with its resolved value.
|
107
|
+
4. Any `ChainableResult` in the local scope of the block (i.e., `block.binding`) or an instance variable of the block context (i.e., `block.binding.receiver`) is replaced with its resolved value.
|
108
|
+
|
109
|
+
N.b., if you are using `summarize` in a more functional style and will return all values you care about, you can let `summarize` know to skip step 4 by invoking it with `summarize(pure: true)`.
|
110
|
+
|
111
|
+
When the parent relation already has `.group` applied, `pure: true` is implied and step 4 does not take place.
|
112
|
+
|
113
|
+
## Power usage with `group`
|
114
|
+
|
115
|
+
Build even more complex queries by using `summarize` on a relation that already has `.group` applied. Results are grouped just like a standard `.group(*expressions).count`, but instead of single numbers, the values are whatever set of calculations you return from the block, including further `.group(*more).calculate(:sum|:count,*args)` calculations, in whatever `Array` or `Hash` shape you arrange them. For example:
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
puts Purchase.last_year.complete.group(:region_id).summarize do |purchases,with|
|
119
|
+
total = purchases.count
|
120
|
+
by_quarter = purchases.group(CREATED_TO_YEAR_SQL, CREATED_TO_QUARTER_SQL).count.sort.to_h
|
121
|
+
target = with[total / 4, by_quarter.values.max] {|avg_q, best_q| [avg_q * 1.25, best_q].max.round }
|
122
|
+
{last_year: total, quarters: by_quarter, unit_target: target}
|
123
|
+
end
|
124
|
+
# Output:
|
125
|
+
# {
|
126
|
+
# 1 => {
|
127
|
+
# last_year: 2717316,
|
128
|
+
# quarters: {
|
129
|
+
# [2021, 1] => 634057,
|
130
|
+
# [2021, 2] => 590012,
|
131
|
+
# [2021, 3] => 659010,
|
132
|
+
# [2021, 4] => 834237
|
133
|
+
# },
|
134
|
+
# unit_target: 849161
|
135
|
+
# },
|
136
|
+
# 2 => { ... },
|
137
|
+
# 3 => { ... }
|
138
|
+
# }
|
139
|
+
```
|
140
|
+
|
141
|
+
The ActiveRecord API has no direct analog for this, so `noop: true` is not allowed when `summarize` is called on a grouped relation.
|
142
|
+
|
143
|
+
When the relation already has `group` applied, for correct results, `summarize` requires that the block mutate no state and return all values you care about: functional purity, no side effects. `ChainableResult` values referenced by instance variables or local variables not returned from the block won't be evaluated. I.e., `pure: true` is implied and `pure: false` is not allowed. To see why:
|
144
|
+
|
145
|
+
```ruby
|
146
|
+
# A trivial example:
|
147
|
+
Purchase.complete.group(:region_id).summarize {|purchases| purchases.sum(:amount) }
|
148
|
+
|
149
|
+
# ...is exactly equivalent to:
|
150
|
+
Purchase.complete.group(:region_id).sum(:amount)
|
151
|
+
|
152
|
+
# But if there were three regions, what should the value of @target be in this case?
|
153
|
+
region_targets = Purchase.last_quarter.complete.group(:region_id).summarize do |purchases|
|
154
|
+
@target = purchases.sum(:amount) * 1.25
|
155
|
+
end
|
156
|
+
```
|
157
|
+
|
158
|
+
As a rubyist, that last example looks like the block will be evaluated for each group, so `@target` should keep whatever value it got the last time the block was run. However:
|
159
|
+
|
160
|
+
1. This is not often useful.
|
161
|
+
2. The block is not actually linearly evaluated for each group.
|
162
|
+
|
163
|
+
Instead the block is evaluated once to determine what calculations need to be run, the query is built and evaluated, and then, for each group of the parent relation, the return value of the block is evaluated with respect to just those rows belonging to the group. In practice this is quite powerful and makes a pleasant, legible API for complex reporting.
|
164
|
+
|
165
|
+
## Development
|
166
|
+
|
167
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
168
|
+
|
169
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
170
|
+
|
171
|
+
## Contributing
|
172
|
+
|
173
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/midnightmonster/activerecord-summarize.
|
174
|
+
|
175
|
+
## License
|
176
|
+
|
177
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rake/testtask"
|
5
|
+
|
6
|
+
Rake::TestTask.new(:test) do |t|
|
7
|
+
t.libs << "test"
|
8
|
+
t.libs << "lib"
|
9
|
+
t.test_files = FileList["test/**/test_*.rb"]
|
10
|
+
end
|
11
|
+
|
12
|
+
require "standard/rake"
|
13
|
+
|
14
|
+
task default: %i[test standard]
|
15
|
+
task full: %i[test standard:fix]
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/activerecord/summarize/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "activerecord-summarize"
|
7
|
+
spec.version = ActiveRecord::Summarize::VERSION
|
8
|
+
spec.authors = ["Joshua Paine"]
|
9
|
+
spec.email = ["joshua@letterblock.com"]
|
10
|
+
|
11
|
+
spec.summary = "Run many .count and/or .sum queries in a single efficient query with minimal code changes, even with different .group and only-partly-overlapping .where filters. Nearly-free speedups for mature Rails apps."
|
12
|
+
spec.description = "Just wrap your existing code in `@relation.summarize do |relation| ... end` and run your queries against relation instead of @relation."
|
13
|
+
spec.homepage = "https://letterblock.com"
|
14
|
+
spec.license = "MIT"
|
15
|
+
spec.required_ruby_version = ">= 2.6.0"
|
16
|
+
|
17
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
18
|
+
|
19
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
20
|
+
spec.metadata["source_code_uri"] = "https://github.com/midnightmonster/activerecord-summarize"
|
21
|
+
spec.metadata["changelog_uri"] = "https://github.com/midnightmonster/activerecord-summarize/blob/master/CHANGELOG.md"
|
22
|
+
|
23
|
+
# Specify which files should be added to the gem when it is released.
|
24
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
25
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
26
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
(f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
|
28
|
+
end
|
29
|
+
end
|
30
|
+
spec.bindir = "exe"
|
31
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
32
|
+
spec.require_paths = ["lib"]
|
33
|
+
|
34
|
+
# Uncomment to register a new dependency of your gem
|
35
|
+
# spec.add_dependency "example-gem", "~> 1.0"
|
36
|
+
spec.add_runtime_dependency "activerecord", ">= 5.0"
|
37
|
+
spec.add_development_dependency "rake"
|
38
|
+
|
39
|
+
# For more information and examples about making a new gem, check out our
|
40
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
41
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "activerecord/summarize"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require "irb"
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,288 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "summarize/version"
|
4
|
+
require_relative "../chainable_result"
|
5
|
+
|
6
|
+
module ActiveRecord::Summarize
|
7
|
+
class Unsummarizable < StandardError; end
|
8
|
+
|
9
|
+
class Summarize
|
10
|
+
attr_reader :current_result_row, :pure, :noop, :from_where
|
11
|
+
alias_method :pure?, :pure
|
12
|
+
alias_method :noop?, :noop
|
13
|
+
|
14
|
+
# noop: true
|
15
|
+
# causes `summarize` simply to yield the original relation and a trivial,
|
16
|
+
# synchronous `with` proc. It is meant as a convenient way to test/prove
|
17
|
+
# the correctness of `summarize` and to compare performance of the single
|
18
|
+
# combined query vs the original individual queries.
|
19
|
+
# N.b., if `relation` already has a grouping applied, there is no direct
|
20
|
+
# ActiveRecord translation for what `summarize` does, so noop: true is
|
21
|
+
# impossible and raises an exception.
|
22
|
+
# pure: true
|
23
|
+
# lets `summarize` know that you're not mutating state within the block,
|
24
|
+
# so it doesn't need to go spelunking in the block binding for
|
25
|
+
# ChainableResults. See `if !pure?` section below.
|
26
|
+
# N.b., if `relation` already has a grouping applied, pure: true is
|
27
|
+
# implied and pure: false throws an exception, as the impure behavior
|
28
|
+
# would be non-obvious and of doubtful value.
|
29
|
+
def initialize(relation, pure: nil, noop: false)
|
30
|
+
@relation = relation
|
31
|
+
@noop = noop
|
32
|
+
has_base_groups = relation.group_values.any?
|
33
|
+
raise Unsummarizable, "`summarize` must be pure when called on a grouped relation" if pure == false && has_base_groups
|
34
|
+
raise ArgumentError, "`summarize(noop: true)` is impossible on a grouped relation" if noop && has_base_groups
|
35
|
+
@pure = has_base_groups || !!pure
|
36
|
+
@calculations = []
|
37
|
+
end
|
38
|
+
|
39
|
+
def process(&block)
|
40
|
+
# For noop, just yield the original relation and a transparent `with` proc.
|
41
|
+
return yield(@relation, ->(*results, &block) { [*results].then(&block) }) if noop?
|
42
|
+
# Within the block, the relation and its future clones intercept calls to
|
43
|
+
# `count` and `sum`, registering them and returning a ChainableResult via
|
44
|
+
# summarize.add_calculation.
|
45
|
+
future_block_result = ChainableResult.wrap(yield(
|
46
|
+
@relation.unscope(:group).tap do |r|
|
47
|
+
r.instance_variable_set(:@summarize, self)
|
48
|
+
class << r
|
49
|
+
include InstanceMethods
|
50
|
+
end
|
51
|
+
end,
|
52
|
+
ChainableResult::WITH
|
53
|
+
))
|
54
|
+
ChainableResult.with_cache(!pure?) do
|
55
|
+
# `resolve` builds the single query that answers all collected calculations,
|
56
|
+
# executes it, and aggregates the results by the values of
|
57
|
+
# `@relation.group_values``. In the common case of no `@relation.group_values`,
|
58
|
+
# the result is just `{[]=>[*final_value_for_each_calculation]}`
|
59
|
+
result = resolve.transform_values! do |row|
|
60
|
+
# Each row (in the common case, only one) is used to resolve any
|
61
|
+
# ChainableResults returned by the block. These may be a one-to-one mapping,
|
62
|
+
# or the block return may have combined some results via `with` or chained
|
63
|
+
# additional methods on results, etc..
|
64
|
+
@current_result_row = row
|
65
|
+
future_block_result.value
|
66
|
+
end.then do |result|
|
67
|
+
# Change ungrouped result from `{[]=>v}` to `v` and grouped-by-one-column
|
68
|
+
# result from `{[k1]=>v1,[k2]=>v2,...}` to `{k1=>v1,k2=>v2,...}`.
|
69
|
+
# (Those are both probably more common than multiple-column base grouping.)
|
70
|
+
case @relation.group_values.size
|
71
|
+
when 0 then result.values.first
|
72
|
+
when 1 then result.transform_keys! { |k| k.first }
|
73
|
+
else result
|
74
|
+
end
|
75
|
+
end
|
76
|
+
if !pure?
|
77
|
+
# Check block scope's local vars and block's self's instance vars for
|
78
|
+
# any ChainableResult, and replace it with its resolved value.
|
79
|
+
#
|
80
|
+
# Also check the values of any of those vars that are Hashes, since IME
|
81
|
+
# it's not rare to assign counts to hashes, and it is rare to have giant
|
82
|
+
# hashes that would be particularly wasteful to traverse. Do not do the
|
83
|
+
# same for Arrays, since IME pushing counts to arrays is rare, and large
|
84
|
+
# arrays, e.g., of many eagerly-fetched ActiveRecord objects, are not
|
85
|
+
# rare in controllers.
|
86
|
+
#
|
87
|
+
# Preconditions:
|
88
|
+
# - @current_result_row is still set to the single result row
|
89
|
+
# - we are within a ChainableResult.with_cache(true) block
|
90
|
+
block_binding = block.binding
|
91
|
+
block_self = block_binding.receiver
|
92
|
+
block_binding.local_variables.each do |k|
|
93
|
+
v = block_binding.local_variable_get(k)
|
94
|
+
next block_binding.local_variable_set(k, v.value) if v.is_a?(ChainableResult)
|
95
|
+
lightly_touch_impure_hash(v) if v.is_a?(Hash)
|
96
|
+
end
|
97
|
+
block_self.instance_variables.each do |k|
|
98
|
+
v = block_self.instance_variable_get(k)
|
99
|
+
next block_self.instance_variable_set(k, v.value) if v.is_a?(ChainableResult)
|
100
|
+
lightly_touch_impure_hash(v) if v.is_a?(Hash)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
@current_result_row = nil
|
104
|
+
result
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def add_calculation(relation, operation, column_name)
|
109
|
+
merge_from_where!(relation)
|
110
|
+
calculation = CalculationResult.new(relation, operation, column_name)
|
111
|
+
index = @calculations.size
|
112
|
+
@calculations << calculation
|
113
|
+
ChainableResult.wrap(calculation) { current_result_row[index] }
|
114
|
+
end
|
115
|
+
|
116
|
+
def resolve
|
117
|
+
# Build & execute query
|
118
|
+
groups = all_groups
|
119
|
+
# MariaDB, SQLite, and Postgres all support `GROUP BY 1, 2, 3`-style syntax,
|
120
|
+
# where the numbers are 1-indexed references to SELECT values. It makes these
|
121
|
+
# generated queries much shorter and more readable, and it avoids the
|
122
|
+
# ambiguity of using aliases (for GROUP BY, they can get clobbered by columns
|
123
|
+
# from underlying tables) even where those are supported. But in case we find
|
124
|
+
# a database that doesn't support numeric references, the fully-explicit
|
125
|
+
# grouping code is commented out below.
|
126
|
+
#
|
127
|
+
# grouped_query = groups.any? ? from_where.group(*groups) : from_where
|
128
|
+
grouped_query = groups.any? ? from_where.group(*1..groups.size) : from_where
|
129
|
+
data = grouped_query.pluck(*groups, *value_selects)
|
130
|
+
|
131
|
+
# Aggregate & assign results
|
132
|
+
group_idx = groups.each_with_index.to_h
|
133
|
+
starting_values, reducers = @calculations.each_with_index.map do |f, i|
|
134
|
+
value_column = groups.size + i
|
135
|
+
group_columns = f.relation.group_values.map { |k| group_idx[k] }
|
136
|
+
case group_columns.size
|
137
|
+
when 0 then [
|
138
|
+
0,
|
139
|
+
->(memo, row) { memo + row[value_column] }
|
140
|
+
]
|
141
|
+
when 1 then [
|
142
|
+
Hash.new(0), # Default 0 makes the reducer much cleaner, but we have to clean it up later
|
143
|
+
->(memo, row) {
|
144
|
+
memo[row[group_columns[0]]] += row[value_column] unless row[value_column].zero?
|
145
|
+
memo
|
146
|
+
}
|
147
|
+
]
|
148
|
+
else [
|
149
|
+
Hash.new(0),
|
150
|
+
->(memo, row) {
|
151
|
+
memo[group_columns.map { |i| row[i] }] += row[value_column] unless row[value_column].zero?
|
152
|
+
memo
|
153
|
+
}
|
154
|
+
]
|
155
|
+
end
|
156
|
+
end.transpose # For an array of pairs, `transpose` is the reverse of `zip`
|
157
|
+
cols = (0...reducers.size)
|
158
|
+
base_group_columns = (0...base_groups.size)
|
159
|
+
data
|
160
|
+
.group_by { |row| row[base_group_columns] }
|
161
|
+
.tap { |h| h[[]] = [] if h.empty? && base_groups.size.zero? }
|
162
|
+
.transform_values! do |rows|
|
163
|
+
values = starting_values.map(&:dup) # map(&:dup) since some are hashes and we don't want to mutate starting_values
|
164
|
+
rows.each do |row|
|
165
|
+
cols.each do |i|
|
166
|
+
values[i] = reducers[i].call(values[i], row)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
# Set any hash's default back to nil, since callers will expect a normal hash
|
170
|
+
values.each { |v| v.default = nil if v.is_a? Hash }
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
|
176
|
+
def compatible_base
|
177
|
+
@compatible_base ||= @relation.except(:select, :group)
|
178
|
+
end
|
179
|
+
|
180
|
+
def merge_from_where!(other)
|
181
|
+
other_from_where = other.except(:select, :group)
|
182
|
+
incompatible_values = compatible_base.send(:structurally_incompatible_values_for, other_from_where)
|
183
|
+
unless incompatible_values.empty?
|
184
|
+
raise Unsummarizable, "Within a `summarize` block, each calculation must be structurally compatible. Incompatible values: #{incompatible_values}"
|
185
|
+
end
|
186
|
+
# Logical OR the criteria of all calculations. Most often this is equivalent
|
187
|
+
# to `compatible_base`, since usually one is a total or grouped count without
|
188
|
+
# additional `where` criteria, but that needn't necessarily be so.
|
189
|
+
@from_where = if @from_where.nil?
|
190
|
+
other_from_where
|
191
|
+
else
|
192
|
+
@from_where.or(other_from_where)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def base_groups
|
197
|
+
@relation.group_values.dup
|
198
|
+
end
|
199
|
+
|
200
|
+
def all_groups
|
201
|
+
# keep all base groups, even if they did something silly like group by
|
202
|
+
# the same key twice, but otherwise don't repeat any groups
|
203
|
+
groups = base_groups
|
204
|
+
groups_set = Set.new(groups)
|
205
|
+
@calculations.map { |f| f.relation.group_values }.flatten.each do |k|
|
206
|
+
next if groups_set.include? k
|
207
|
+
groups_set << k
|
208
|
+
groups << k
|
209
|
+
end
|
210
|
+
groups
|
211
|
+
end
|
212
|
+
|
213
|
+
def value_selects
|
214
|
+
@calculations.map { |f| f.select_value(@relation) }
|
215
|
+
end
|
216
|
+
|
217
|
+
def lightly_touch_impure_hash(h)
|
218
|
+
h.each do |k, v|
|
219
|
+
h[k] = v.value if v.is_a? ChainableResult
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
class CalculationResult
|
225
|
+
attr_reader :relation, :method, :column
|
226
|
+
|
227
|
+
def initialize(relation, method, column)
|
228
|
+
@relation = relation
|
229
|
+
@method = method
|
230
|
+
@column = column
|
231
|
+
end
|
232
|
+
|
233
|
+
def select_value(base_relation)
|
234
|
+
where = relation.where_clause - base_relation.where_clause
|
235
|
+
for_select = column
|
236
|
+
for_select = Arel::Nodes::Case.new(where.ast, unmatch_value).when(true, for_select) unless where.empty?
|
237
|
+
function.new([for_select]).tap { |f| f.distinct = relation.distinct_value }
|
238
|
+
end
|
239
|
+
|
240
|
+
def unmatch_value
|
241
|
+
case method
|
242
|
+
when "sum" then 0
|
243
|
+
when "count" then nil
|
244
|
+
else raise "Unknown calculation method"
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def function
|
249
|
+
case method
|
250
|
+
when "sum" then Arel::Nodes::Sum
|
251
|
+
when "count" then Arel::Nodes::Count
|
252
|
+
else raise "Unknown calculation method"
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
module RelationMethods
|
258
|
+
def summarize(**opts, &block)
|
259
|
+
raise Unsummarizable, "Cannot summarize within a summarize block" if @summarize
|
260
|
+
ActiveRecord::Summarize::Summarize.new(self, **opts).process(&block)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
module InstanceMethods
|
265
|
+
private
|
266
|
+
|
267
|
+
def perform_calculation(operation, column_name)
|
268
|
+
case operation = operation.to_s.downcase
|
269
|
+
when "count", "sum"
|
270
|
+
column_name = :id if [nil, "*", :all].include? column_name
|
271
|
+
@summarize.add_calculation(self, operation, aggregate_column(column_name))
|
272
|
+
else super
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
class ActiveRecord::Base
|
279
|
+
class << self
|
280
|
+
def summarize(**opts, &block)
|
281
|
+
ActiveRecord::Summarize::Summarize.new(all, **opts).process(&block)
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
class ActiveRecord::Relation
|
287
|
+
include ActiveRecord::Summarize::RelationMethods
|
288
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
class ChainableResult
|
2
|
+
def initialize(source, method = nil, args = [], opts = {}, &block)
|
3
|
+
@source = source
|
4
|
+
@method = method || (block ? :then : :itself)
|
5
|
+
@args = args
|
6
|
+
@opts = opts
|
7
|
+
@block = block
|
8
|
+
@cached = false
|
9
|
+
end
|
10
|
+
|
11
|
+
def value
|
12
|
+
if use_cache?
|
13
|
+
return @value if @cached
|
14
|
+
@cached = true
|
15
|
+
@value = resolve_source.send(@method, *@args, **@opts, &@block)
|
16
|
+
else
|
17
|
+
resolve_source.send(@method, *@args, **@opts, &@block)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_json(**opts)
|
22
|
+
ChainableResult::Future.new(self, :to_json, [], opts)
|
23
|
+
end
|
24
|
+
|
25
|
+
def then(&block)
|
26
|
+
ChainableResult::Future.new(self, :then, &block)
|
27
|
+
end
|
28
|
+
|
29
|
+
def yield_self(&block)
|
30
|
+
ChainableResult::Future.new(self, :yield_self, &block)
|
31
|
+
end
|
32
|
+
|
33
|
+
def tap(&block)
|
34
|
+
ChainableResult::Future.new(self, :tap, &block)
|
35
|
+
end
|
36
|
+
|
37
|
+
def method_missing(method, *args, **opts, &block)
|
38
|
+
ChainableResult::Future.new(self, method, args, opts, &block)
|
39
|
+
end
|
40
|
+
|
41
|
+
def respond_to_missing?(method_name, include_private = false)
|
42
|
+
true
|
43
|
+
end
|
44
|
+
|
45
|
+
class Future < self
|
46
|
+
def resolve_source
|
47
|
+
@source.value
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Array < self
|
52
|
+
def resolve_source
|
53
|
+
@source.map(&RESOLVE_ITEM)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class Hash < self
|
58
|
+
def resolve_source
|
59
|
+
@source.transform_values(&RESOLVE_ITEM)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
class Other < self
|
64
|
+
def resolve_source
|
65
|
+
@source
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.wrap(v, method = nil, *args, **opts, &block)
|
70
|
+
method ||= block ? :then : :itself
|
71
|
+
klass = case v
|
72
|
+
when ChainableResult then return v # don't wrap, exit early
|
73
|
+
when ::Array then ChainableResult::Array
|
74
|
+
when ::Hash then ChainableResult::Hash
|
75
|
+
else ChainableResult::Other
|
76
|
+
end
|
77
|
+
klass.new(v, method, args, opts, &block)
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.with(*results, &block)
|
81
|
+
ChainableResult.wrap(results.size == 1 ? results.first : results, :then, &block)
|
82
|
+
end
|
83
|
+
|
84
|
+
WITH = method(:with)
|
85
|
+
|
86
|
+
def self.resolve_item(item)
|
87
|
+
case item
|
88
|
+
when ChainableResult then item.value
|
89
|
+
when ::Array then ChainableResult::Array.new(item).value
|
90
|
+
when ::Hash then ChainableResult::Hash.new(item).value
|
91
|
+
else item
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
RESOLVE_ITEM = method(:resolve_item)
|
96
|
+
CACHE_MODE_KEY = :"ChainableResult::USE_CACHE"
|
97
|
+
|
98
|
+
def self.with_cache(mode = true)
|
99
|
+
prev = Thread.current[CACHE_MODE_KEY]
|
100
|
+
Thread.current[CACHE_MODE_KEY] = mode
|
101
|
+
result = yield
|
102
|
+
Thread.current[CACHE_MODE_KEY] = prev
|
103
|
+
result
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def use_cache?
|
109
|
+
!!Thread.current[CACHE_MODE_KEY]
|
110
|
+
end
|
111
|
+
end
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: activerecord-summarize
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Joshua Paine
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-03-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '5.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '5.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: Just wrap your existing code in `@relation.summarize do |relation| ...
|
42
|
+
end` and run your queries against relation instead of @relation.
|
43
|
+
email:
|
44
|
+
- joshua@letterblock.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- ".standard.yml"
|
50
|
+
- CHANGELOG.md
|
51
|
+
- Gemfile
|
52
|
+
- Gemfile.lock
|
53
|
+
- LICENSE.txt
|
54
|
+
- README.md
|
55
|
+
- Rakefile
|
56
|
+
- activerecord-summarize.gemspec
|
57
|
+
- bin/console
|
58
|
+
- bin/setup
|
59
|
+
- lib/activerecord/summarize.rb
|
60
|
+
- lib/activerecord/summarize/version.rb
|
61
|
+
- lib/chainable_result.rb
|
62
|
+
- sig/activerecord/summarize.rbs
|
63
|
+
homepage: https://letterblock.com
|
64
|
+
licenses:
|
65
|
+
- MIT
|
66
|
+
metadata:
|
67
|
+
allowed_push_host: https://rubygems.org
|
68
|
+
homepage_uri: https://letterblock.com
|
69
|
+
source_code_uri: https://github.com/midnightmonster/activerecord-summarize
|
70
|
+
changelog_uri: https://github.com/midnightmonster/activerecord-summarize/blob/master/CHANGELOG.md
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 2.6.0
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubygems_version: 3.3.3
|
87
|
+
signing_key:
|
88
|
+
specification_version: 4
|
89
|
+
summary: Run many .count and/or .sum queries in a single efficient query with minimal
|
90
|
+
code changes, even with different .group and only-partly-overlapping .where filters.
|
91
|
+
Nearly-free speedups for mature Rails apps.
|
92
|
+
test_files: []
|