activerecord-summarize 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +5 -2
- data/README.md +1 -0
- data/lib/activerecord/summarize/calculation_implementation.rb +129 -0
- data/lib/activerecord/summarize/version.rb +1 -1
- data/lib/activerecord/summarize.rb +46 -64
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9a4373f45ac68b039a480eabff7429cf8c0459c3ca43ab1d2e44ac7fce5987cd
|
|
4
|
+
data.tar.gz: da0aee342ea440116ffb2e557f628d5f18781de3c143e23a2b6b3350a607aea7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b8812147378b9d2c1b4069b9074f126cd20e37f763c6b4df374ada72bec3b2d354505cf67470cc6708198f4d997c14d8d0b402b71efe7274ef2ce096c4ac73d1
|
|
7
|
+
data.tar.gz: d7a893afa09e883a5a9db77f802d0f3a9ab85ff9ad744cb30f30247c0b2d85fea30147ea15ca866f9e4498538dc7d5b68c191a7219050cc4ef35cebe2474ed47
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
## [0.6.0] - 2026-05-20
|
|
2
|
+
|
|
3
|
+
- **FEATURE:** Support for `.average`, `.minimum`, and `.maximum` inside `summarize` blocks. All three work with the same flexibility as `.count` and `.sum`: they can be combined freely with each other and with other calculation methods, support `.where` and `.group` sub-scopes, and their results are chainable just like any other `ChainableResult`.
|
|
4
|
+
|
|
1
5
|
## [0.5.1] - 2023-08-16
|
|
2
6
|
|
|
3
7
|
- **BUGFIX:** Starting with version 7.0.5, the behavior of ActiveRecord's `pluck` changed: when you pluck multiple values with the same aggregate function (e.g., `sum`), in PostgreSQL, the data type of the last such value is now applied to all such values, though they used to be inferred correctly. Our solution is to add an explicit alias to each result column.
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
activerecord-summarize (0.
|
|
4
|
+
activerecord-summarize (0.6.0)
|
|
5
5
|
activerecord (>= 5.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
@@ -24,6 +24,7 @@ GEM
|
|
|
24
24
|
json (2.6.3)
|
|
25
25
|
language_server-protocol (3.17.0.3)
|
|
26
26
|
lint_roller (1.1.0)
|
|
27
|
+
mini_portile2 (2.8.7)
|
|
27
28
|
minitest (5.19.0)
|
|
28
29
|
parallel (1.23.0)
|
|
29
30
|
parser (3.2.2.3)
|
|
@@ -51,8 +52,9 @@ GEM
|
|
|
51
52
|
rubocop (>= 1.7.0, < 2.0)
|
|
52
53
|
rubocop-ast (>= 0.4.0)
|
|
53
54
|
ruby-progressbar (1.13.0)
|
|
55
|
+
sqlite3 (1.6.3)
|
|
56
|
+
mini_portile2 (~> 2.8.0)
|
|
54
57
|
sqlite3 (1.6.3-arm64-darwin)
|
|
55
|
-
sqlite3 (1.6.3-x86_64-linux)
|
|
56
58
|
standard (1.30.1)
|
|
57
59
|
language_server-protocol (~> 3.17.0.2)
|
|
58
60
|
lint_roller (~> 1.0)
|
|
@@ -71,6 +73,7 @@ GEM
|
|
|
71
73
|
|
|
72
74
|
PLATFORMS
|
|
73
75
|
arm64-darwin-21
|
|
76
|
+
arm64-darwin-23
|
|
74
77
|
x86_64-linux
|
|
75
78
|
|
|
76
79
|
DEPENDENCIES
|
data/README.md
CHANGED
|
@@ -127,6 +127,7 @@ puts Purchase.last_year.complete.group(:region_id).summarize do |purchases,with_
|
|
|
127
127
|
target = with_resolved[total / 4, by_quarter.values.max] {|avg_q, best_q| [avg_q * 1.25, best_q].max.round }
|
|
128
128
|
{last_year: total, quarters: by_quarter, unit_target: target}
|
|
129
129
|
end
|
|
130
|
+
|
|
130
131
|
# Output:
|
|
131
132
|
# {
|
|
132
133
|
# 1 => {
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
module ActiveRecord::Summarize::CalculationImplementation
|
|
2
|
+
def self.new(operation, relation, column_name)
|
|
3
|
+
case operation
|
|
4
|
+
when "sum" then Sum
|
|
5
|
+
when "count" then Count
|
|
6
|
+
when "minimum" then Minimum
|
|
7
|
+
when "maximum" then Maximum
|
|
8
|
+
else raise "Unknown calculation #{operation}"
|
|
9
|
+
end.new(relation, column_name)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
class Base
|
|
13
|
+
attr_reader :relation, :column
|
|
14
|
+
|
|
15
|
+
def initialize(relation, column)
|
|
16
|
+
@relation = relation
|
|
17
|
+
@column = column
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def select_column_arel_node(base_relation)
|
|
21
|
+
where = relation.where_clause - base_relation.where_clause
|
|
22
|
+
for_select = column
|
|
23
|
+
for_select = Arel::Nodes::Case.new(where.ast).when(true, for_select).else(unmatch_arel_node) unless where.empty?
|
|
24
|
+
function_arel_node_class.new([for_select]).tap { |f| f.distinct = relation.distinct_value }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def function_arel_node_class
|
|
28
|
+
# Arel::Node class representing the SQL function
|
|
29
|
+
raise "`#{self.class}` must implement `function_arel_node_class`"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def unmatch_arel_node
|
|
33
|
+
# In case of `where` filters, this is the does-not-count value for when
|
|
34
|
+
# filters don't match, so far always 0 or nil (becomes NULL)
|
|
35
|
+
raise "`#{self.class}` must implement `unmatch_arel_node`"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def initial
|
|
39
|
+
# Initial value for reducing potentially many split-into-groups rows to
|
|
40
|
+
# a single value, so far always 0 or nil.
|
|
41
|
+
raise "`#{self.class}` must implement `initial`"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def reducer(memo, v)
|
|
45
|
+
# Reducer method for reducing potentially many split-into-groups rows to
|
|
46
|
+
# a single value. Method should return a value the same type as memo
|
|
47
|
+
# and/or v. A reducer is necessary at all because .group in columns
|
|
48
|
+
# _other than_ this one results in fragmenting this result into several
|
|
49
|
+
# rows.
|
|
50
|
+
raise "`#{self.class}` must implement `reducer`"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
class Sum < Base
|
|
55
|
+
def unmatch_arel_node
|
|
56
|
+
0 # Adding zero to a sum does nothing
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def function_arel_node_class
|
|
60
|
+
Arel::Nodes::Sum
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def initial
|
|
64
|
+
0
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def reducer(memo, v)
|
|
68
|
+
memo + (v || 0)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
class Count < Base
|
|
73
|
+
def unmatch_arel_node
|
|
74
|
+
nil # In SQL, null is no value and is not counted
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def function_arel_node_class
|
|
78
|
+
Arel::Nodes::Count
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def initial
|
|
82
|
+
0
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def reducer(memo, v)
|
|
86
|
+
memo + (v || 0)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
class Minimum < Base
|
|
91
|
+
def unmatch_arel_node
|
|
92
|
+
nil # In SQL, null is no value and is not considered for min()
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def function_arel_node_class
|
|
96
|
+
Arel::Nodes::Min
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def initial
|
|
100
|
+
nil
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def reducer(memo, v)
|
|
104
|
+
return memo if v.nil?
|
|
105
|
+
return v if memo.nil?
|
|
106
|
+
(v < memo) ? v : memo
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
class Maximum < Base
|
|
111
|
+
def unmatch_arel_node
|
|
112
|
+
nil # In SQL, null is no value and is not considered for max()
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def function_arel_node_class
|
|
116
|
+
Arel::Nodes::Max
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def initial
|
|
120
|
+
nil
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def reducer(memo, v)
|
|
124
|
+
return memo if v.nil?
|
|
125
|
+
return v if memo.nil?
|
|
126
|
+
(v > memo) ? v : memo
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "summarize/version"
|
|
4
|
+
require_relative "summarize/calculation_implementation"
|
|
4
5
|
require_relative "../chainable_result"
|
|
5
6
|
|
|
6
7
|
module ActiveRecord::Summarize
|
|
@@ -130,58 +131,59 @@ module ActiveRecord::Summarize
|
|
|
130
131
|
end
|
|
131
132
|
end
|
|
132
133
|
|
|
133
|
-
def add_calculation(
|
|
134
|
+
def add_calculation(operation, relation, column_name)
|
|
134
135
|
merge_from_where!(relation)
|
|
135
|
-
calculation =
|
|
136
|
+
calculation = CalculationImplementation.new(operation, relation, column_name)
|
|
136
137
|
index = @calculations.size
|
|
137
138
|
@calculations << calculation
|
|
138
139
|
ChainableResult.wrap(calculation) { current_result_row[index] }
|
|
139
140
|
end
|
|
140
141
|
|
|
141
142
|
def resolve
|
|
142
|
-
|
|
143
|
+
#########################
|
|
144
|
+
# Build & execute query #
|
|
145
|
+
#########################
|
|
143
146
|
groups = all_groups
|
|
144
147
|
# MariaDB, SQLite, and Postgres all support `GROUP BY 1, 2, 3`-style syntax,
|
|
145
|
-
# where the numbers are 1-indexed references to SELECT values.
|
|
146
|
-
# generated queries much shorter and more readable, and it avoids the
|
|
147
|
-
# ambiguity of using aliases (for GROUP BY, they can get clobbered by columns
|
|
148
|
-
# from underlying tables) even where those are supported. But in case we find
|
|
149
|
-
# a database that doesn't support numeric references, the fully-explicit
|
|
150
|
-
# grouping code is commented out below.
|
|
151
|
-
#
|
|
152
|
-
# grouped_query = groups.any? ? from_where.group(*groups) : from_where
|
|
148
|
+
# where the numbers are 1-indexed references to SELECT values.
|
|
153
149
|
grouped_query = groups.any? ? from_where.group(*1..groups.size) : from_where
|
|
154
150
|
data = grouped_query.pluck(*groups, *value_selects)
|
|
155
|
-
|
|
156
|
-
#
|
|
157
|
-
#
|
|
151
|
+
|
|
152
|
+
# .pluck(:just_one_column) returns an array of values instead of an array
|
|
153
|
+
# of arrays, which breaks the aggregation and assignment below.
|
|
158
154
|
data = data.map { |d| [d] } if (groups.size + value_selects.size) == 1
|
|
159
155
|
|
|
160
|
-
|
|
161
|
-
|
|
156
|
+
##############################
|
|
157
|
+
# Build aggregation reducers #
|
|
158
|
+
##############################
|
|
159
|
+
# groups includes all base groups and all sub-groups
|
|
160
|
+
group_idx = groups.each_with_index.to_h # Inverts the groups list: `[:foo, :bar]` becomes `{:foo => 0, :bar => 1}`
|
|
162
161
|
starting_values, reducers = @calculations.each_with_index.map do |f, i|
|
|
163
162
|
value_column = groups.size + i
|
|
163
|
+
# each calculation shares any base groups that exist and may have sub-groups, which won't be shared by others
|
|
164
164
|
group_columns = f.relation.group_values.map { |k| group_idx[k] }
|
|
165
|
-
# `row[value_column] || 0` pattern in reducers because SQL SUM(NULL)
|
|
166
|
-
# returns NULL, but like ActiveRecord we always want .sum to return a
|
|
167
|
-
# number, and our "starting_values and reducers" implementation means
|
|
168
|
-
# we sometimes will have to add NULL to our numbers.
|
|
169
165
|
case group_columns.size
|
|
170
166
|
when 0 then [
|
|
171
|
-
|
|
172
|
-
->(memo, row) { memo
|
|
167
|
+
f.initial,
|
|
168
|
+
->(memo, row) { f.reducer(memo, row[value_column]) }
|
|
173
169
|
]
|
|
174
170
|
when 1 then [
|
|
175
|
-
|
|
171
|
+
{},
|
|
176
172
|
->(memo, row) {
|
|
177
|
-
|
|
173
|
+
key = row[group_columns[0]]
|
|
174
|
+
prev_val = memo[key] || f.initial
|
|
175
|
+
next_val = f.reducer(prev_val, row[value_column])
|
|
176
|
+
memo[key] = next_val unless next_val == prev_val
|
|
178
177
|
memo
|
|
179
178
|
}
|
|
180
179
|
]
|
|
181
180
|
else [
|
|
182
|
-
|
|
181
|
+
{},
|
|
183
182
|
->(memo, row) {
|
|
184
|
-
|
|
183
|
+
key = group_columns.map { |i| row[i] }
|
|
184
|
+
prev_val = memo[key] || f.initial
|
|
185
|
+
next_val = f.reducer(prev_val, row[value_column])
|
|
186
|
+
memo[key] = next_val unless next_val == prev_val
|
|
185
187
|
memo
|
|
186
188
|
}
|
|
187
189
|
]
|
|
@@ -199,8 +201,7 @@ module ActiveRecord::Summarize
|
|
|
199
201
|
values[i] = reducers[i].call(values[i], row)
|
|
200
202
|
end
|
|
201
203
|
end
|
|
202
|
-
|
|
203
|
-
values.each { |v| v.default = nil if v.is_a? Hash }
|
|
204
|
+
values
|
|
204
205
|
end
|
|
205
206
|
end
|
|
206
207
|
|
|
@@ -241,7 +242,7 @@ module ActiveRecord::Summarize
|
|
|
241
242
|
|
|
242
243
|
def value_selects
|
|
243
244
|
@calculations.each_with_index.map do |f, i|
|
|
244
|
-
f.
|
|
245
|
+
f.select_column_arel_node(@relation)
|
|
245
246
|
.as("_v#{i}") # In Postgres with certain Rails versions, alias is needed to disambiguate result column names for type information
|
|
246
247
|
end
|
|
247
248
|
end
|
|
@@ -253,39 +254,6 @@ module ActiveRecord::Summarize
|
|
|
253
254
|
end
|
|
254
255
|
end
|
|
255
256
|
|
|
256
|
-
class CalculationResult
|
|
257
|
-
attr_reader :relation, :method, :column
|
|
258
|
-
|
|
259
|
-
def initialize(relation, method, column)
|
|
260
|
-
@relation = relation
|
|
261
|
-
@method = method
|
|
262
|
-
@column = column
|
|
263
|
-
end
|
|
264
|
-
|
|
265
|
-
def select_value(base_relation)
|
|
266
|
-
where = relation.where_clause - base_relation.where_clause
|
|
267
|
-
for_select = column
|
|
268
|
-
for_select = Arel::Nodes::Case.new(where.ast).when(true, for_select).else(unmatch_arel_node) unless where.empty?
|
|
269
|
-
function.new([for_select]).tap { |f| f.distinct = relation.distinct_value }
|
|
270
|
-
end
|
|
271
|
-
|
|
272
|
-
def unmatch_arel_node
|
|
273
|
-
case method
|
|
274
|
-
when "sum" then 0 # Adding zero to a sum does nothing
|
|
275
|
-
when "count" then nil # In SQL, null is no value and is not counted
|
|
276
|
-
else raise "Unknown calculation method"
|
|
277
|
-
end
|
|
278
|
-
end
|
|
279
|
-
|
|
280
|
-
def function
|
|
281
|
-
case method
|
|
282
|
-
when "sum" then Arel::Nodes::Sum
|
|
283
|
-
when "count" then Arel::Nodes::Count
|
|
284
|
-
else raise "Unknown calculation method"
|
|
285
|
-
end
|
|
286
|
-
end
|
|
287
|
-
end
|
|
288
|
-
|
|
289
257
|
module RelationMethods
|
|
290
258
|
def summarize(**opts, &block)
|
|
291
259
|
raise Unsummarizable, "Cannot summarize within a summarize block" if @summarize
|
|
@@ -299,9 +267,23 @@ module ActiveRecord::Summarize
|
|
|
299
267
|
def perform_calculation(operation, column_name)
|
|
300
268
|
case operation = operation.to_s.downcase
|
|
301
269
|
when "count", "sum"
|
|
302
|
-
column_name = :id if [nil, "*", :all].include? column_name
|
|
270
|
+
column_name = :id if [nil, "*", :all].include? column_name # only applies to count
|
|
303
271
|
raise Unsummarizable, "DISTINCT in SQL is not reliably correct with summarize" if column_name.is_a?(String) && /\bdistinct\b/i === column_name
|
|
304
|
-
@summarize.add_calculation(
|
|
272
|
+
@summarize.add_calculation(operation, self, aggregate_column(column_name))
|
|
273
|
+
when "average"
|
|
274
|
+
ChainableResult::WITH_RESOLVED[
|
|
275
|
+
perform_calculation("sum", column_name),
|
|
276
|
+
perform_calculation("count", column_name)
|
|
277
|
+
] do |sum, count|
|
|
278
|
+
if sum.is_a? Hash
|
|
279
|
+
sum.to_h { |key, s| [key, s.to_d / count[key]] }
|
|
280
|
+
else
|
|
281
|
+
next nil if count == 0
|
|
282
|
+
sum.to_d / count
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
when "minimum", "maximum"
|
|
286
|
+
@summarize.add_calculation(operation, self, aggregate_column(column_name))
|
|
305
287
|
else super
|
|
306
288
|
end
|
|
307
289
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: activerecord-summarize
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Joshua Paine
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-05-20 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: activerecord
|
|
@@ -58,6 +58,7 @@ files:
|
|
|
58
58
|
- docs/summarize_compared_with_load_async.md
|
|
59
59
|
- docs/use_case_moderator_dashboard.md
|
|
60
60
|
- lib/activerecord/summarize.rb
|
|
61
|
+
- lib/activerecord/summarize/calculation_implementation.rb
|
|
61
62
|
- lib/activerecord/summarize/version.rb
|
|
62
63
|
- lib/chainable_result.rb
|
|
63
64
|
- sig/activerecord/summarize.rbs
|