activerecord-summarize 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f469f566e328d4cc697d23295d95c477f6eb66c5d9246874b36bc59b5fbdcac0
4
- data.tar.gz: 7a7315482f217384462c869796531e1ea27511ba1787f435aec1eed754601506
3
+ metadata.gz: 9a4373f45ac68b039a480eabff7429cf8c0459c3ca43ab1d2e44ac7fce5987cd
4
+ data.tar.gz: da0aee342ea440116ffb2e557f628d5f18781de3c143e23a2b6b3350a607aea7
5
5
  SHA512:
6
- metadata.gz: 66a61050cec2736eed06d01b5889f85259e0625daa530376fb840f02cb93a041483f335bda4614d2925b9b4d1ae22f811d8fa5d005dad82d559bd1a5d64f0bb5
7
- data.tar.gz: b3f3e98e768a019f4c2a32c06d61fb84a50fcbc9f6866df35c1c254ee7d8feb2fd300a286a33cefd6407d12f8e67353348e7e600290ccfb2ae192e9dffd9fd9b
6
+ metadata.gz: b8812147378b9d2c1b4069b9074f126cd20e37f763c6b4df374ada72bec3b2d354505cf67470cc6708198f4d997c14d8d0b402b71efe7274ef2ce096c4ac73d1
7
+ data.tar.gz: d7a893afa09e883a5a9db77f802d0f3a9ab85ff9ad744cb30f30247c0b2d85fea30147ea15ca866f9e4498538dc7d5b68c191a7219050cc4ef35cebe2474ed47
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## [0.6.0] - 2026-05-20
2
+
3
+ - **FEATURE:** Support for `.average`, `.minimum`, and `.maximum` inside `summarize` blocks. All three work with the same flexibility as `.count` and `.sum`: they can be combined freely with each other and with other calculation methods, support `.where` and `.group` sub-scopes, and their results are chainable just like any other `ChainableResult`.
4
+
1
5
  ## [0.5.1] - 2023-08-16
2
6
 
3
7
  - **BUGFIX:** Starting with version 7.0.5, the behavior of ActiveRecord's `pluck` changed: when you pluck multiple values with the same aggregate function (e.g., `sum`), in PostgreSQL, the data type of the last such value is now applied to all such values, though they used to be inferred correctly. Our solution is to add an explicit alias to each result column.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- activerecord-summarize (0.5.1)
4
+ activerecord-summarize (0.6.0)
5
5
  activerecord (>= 5.0)
6
6
 
7
7
  GEM
@@ -24,6 +24,7 @@ GEM
24
24
  json (2.6.3)
25
25
  language_server-protocol (3.17.0.3)
26
26
  lint_roller (1.1.0)
27
+ mini_portile2 (2.8.7)
27
28
  minitest (5.19.0)
28
29
  parallel (1.23.0)
29
30
  parser (3.2.2.3)
@@ -51,8 +52,9 @@ GEM
51
52
  rubocop (>= 1.7.0, < 2.0)
52
53
  rubocop-ast (>= 0.4.0)
53
54
  ruby-progressbar (1.13.0)
55
+ sqlite3 (1.6.3)
56
+ mini_portile2 (~> 2.8.0)
54
57
  sqlite3 (1.6.3-arm64-darwin)
55
- sqlite3 (1.6.3-x86_64-linux)
56
58
  standard (1.30.1)
57
59
  language_server-protocol (~> 3.17.0.2)
58
60
  lint_roller (~> 1.0)
@@ -71,6 +73,7 @@ GEM
71
73
 
72
74
  PLATFORMS
73
75
  arm64-darwin-21
76
+ arm64-darwin-23
74
77
  x86_64-linux
75
78
 
76
79
  DEPENDENCIES
data/README.md CHANGED
@@ -127,6 +127,7 @@ puts Purchase.last_year.complete.group(:region_id).summarize do |purchases,with_
127
127
  target = with_resolved[total / 4, by_quarter.values.max] {|avg_q, best_q| [avg_q * 1.25, best_q].max.round }
128
128
  {last_year: total, quarters: by_quarter, unit_target: target}
129
129
  end
130
+
130
131
  # Output:
131
132
  # {
132
133
  # 1 => {
@@ -0,0 +1,129 @@
1
+ module ActiveRecord::Summarize::CalculationImplementation
2
+ def self.new(operation, relation, column_name)
3
+ case operation
4
+ when "sum" then Sum
5
+ when "count" then Count
6
+ when "minimum" then Minimum
7
+ when "maximum" then Maximum
8
+ else raise "Unknown calculation #{operation}"
9
+ end.new(relation, column_name)
10
+ end
11
+
12
+ class Base
13
+ attr_reader :relation, :column
14
+
15
+ def initialize(relation, column)
16
+ @relation = relation
17
+ @column = column
18
+ end
19
+
20
+ def select_column_arel_node(base_relation)
21
+ where = relation.where_clause - base_relation.where_clause
22
+ for_select = column
23
+ for_select = Arel::Nodes::Case.new(where.ast).when(true, for_select).else(unmatch_arel_node) unless where.empty?
24
+ function_arel_node_class.new([for_select]).tap { |f| f.distinct = relation.distinct_value }
25
+ end
26
+
27
+ def function_arel_node_class
28
+ # Arel::Node class representing the SQL function
29
+ raise "`#{self.class}` must implement `function_arel_node_class`"
30
+ end
31
+
32
+ def unmatch_arel_node
33
+ # In case of `where` filters, this is the does-not-count value for when
34
+ # filters don't match, so far always 0 or nil (becomes NULL)
35
+ raise "`#{self.class}` must implement `unmatch_arel_node`"
36
+ end
37
+
38
+ def initial
39
+ # Initial value for reducing potentially many split-into-groups rows to
40
+ # a single value, so far always 0 or nil.
41
+ raise "`#{self.class}` must implement `initial`"
42
+ end
43
+
44
+ def reducer(memo, v)
45
+ # Reducer method for reducing potentially many split-into-groups rows to
46
+ # a single value. Method should return a value the same type as memo
47
+ # and/or v. A reducer is necessary at all because .group in columns
48
+ # _other than_ this one results in fragmenting this result into several
49
+ # rows.
50
+ raise "`#{self.class}` must implement `reducer`"
51
+ end
52
+ end
53
+
54
+ class Sum < Base
55
+ def unmatch_arel_node
56
+ 0 # Adding zero to a sum does nothing
57
+ end
58
+
59
+ def function_arel_node_class
60
+ Arel::Nodes::Sum
61
+ end
62
+
63
+ def initial
64
+ 0
65
+ end
66
+
67
+ def reducer(memo, v)
68
+ memo + (v || 0)
69
+ end
70
+ end
71
+
72
+ class Count < Base
73
+ def unmatch_arel_node
74
+ nil # In SQL, null is no value and is not counted
75
+ end
76
+
77
+ def function_arel_node_class
78
+ Arel::Nodes::Count
79
+ end
80
+
81
+ def initial
82
+ 0
83
+ end
84
+
85
+ def reducer(memo, v)
86
+ memo + (v || 0)
87
+ end
88
+ end
89
+
90
+ class Minimum < Base
91
+ def unmatch_arel_node
92
+ nil # In SQL, null is no value and is not considered for min()
93
+ end
94
+
95
+ def function_arel_node_class
96
+ Arel::Nodes::Min
97
+ end
98
+
99
+ def initial
100
+ nil
101
+ end
102
+
103
+ def reducer(memo, v)
104
+ return memo if v.nil?
105
+ return v if memo.nil?
106
+ (v < memo) ? v : memo
107
+ end
108
+ end
109
+
110
+ class Maximum < Base
111
+ def unmatch_arel_node
112
+ nil # In SQL, null is no value and is not considered for max()
113
+ end
114
+
115
+ def function_arel_node_class
116
+ Arel::Nodes::Max
117
+ end
118
+
119
+ def initial
120
+ nil
121
+ end
122
+
123
+ def reducer(memo, v)
124
+ return memo if v.nil?
125
+ return v if memo.nil?
126
+ (v > memo) ? v : memo
127
+ end
128
+ end
129
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module ActiveRecord
4
4
  module Summarize
5
- VERSION = "0.5.1"
5
+ VERSION = "0.6.0"
6
6
  end
7
7
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "summarize/version"
4
+ require_relative "summarize/calculation_implementation"
4
5
  require_relative "../chainable_result"
5
6
 
6
7
  module ActiveRecord::Summarize
@@ -130,58 +131,59 @@ module ActiveRecord::Summarize
130
131
  end
131
132
  end
132
133
 
133
- def add_calculation(relation, operation, column_name)
134
+ def add_calculation(operation, relation, column_name)
134
135
  merge_from_where!(relation)
135
- calculation = CalculationResult.new(relation, operation, column_name)
136
+ calculation = CalculationImplementation.new(operation, relation, column_name)
136
137
  index = @calculations.size
137
138
  @calculations << calculation
138
139
  ChainableResult.wrap(calculation) { current_result_row[index] }
139
140
  end
140
141
 
141
142
  def resolve
142
- # Build & execute query
143
+ #########################
144
+ # Build & execute query #
145
+ #########################
143
146
  groups = all_groups
144
147
  # MariaDB, SQLite, and Postgres all support `GROUP BY 1, 2, 3`-style syntax,
145
- # where the numbers are 1-indexed references to SELECT values. It makes these
146
- # generated queries much shorter and more readable, and it avoids the
147
- # ambiguity of using aliases (for GROUP BY, they can get clobbered by columns
148
- # from underlying tables) even where those are supported. But in case we find
149
- # a database that doesn't support numeric references, the fully-explicit
150
- # grouping code is commented out below.
151
- #
152
- # grouped_query = groups.any? ? from_where.group(*groups) : from_where
148
+ # where the numbers are 1-indexed references to SELECT values.
153
149
  grouped_query = groups.any? ? from_where.group(*1..groups.size) : from_where
154
150
  data = grouped_query.pluck(*groups, *value_selects)
155
- # .pluck(:one_column) returns an array of values instead of an array of arrays,
156
- # which breaks the aggregation and assignment below in case anyone ever asks
157
- # `summarize` for only one thing.
151
+
152
+ # .pluck(:just_one_column) returns an array of values instead of an array
153
+ # of arrays, which breaks the aggregation and assignment below.
158
154
  data = data.map { |d| [d] } if (groups.size + value_selects.size) == 1
159
155
 
160
- # Aggregate & assign results
161
- group_idx = groups.each_with_index.to_h
156
+ ##############################
157
+ # Build aggregation reducers #
158
+ ##############################
159
+ # groups includes all base groups and all sub-groups
160
+ group_idx = groups.each_with_index.to_h # Inverts the groups list: `[:foo, :bar]` becomes `{:foo => 0, :bar => 1}`
162
161
  starting_values, reducers = @calculations.each_with_index.map do |f, i|
163
162
  value_column = groups.size + i
163
+ # each calculation shares any base groups that exist and may have sub-groups, which won't be shared by others
164
164
  group_columns = f.relation.group_values.map { |k| group_idx[k] }
165
- # `row[value_column] || 0` pattern in reducers because SQL SUM(NULL)
166
- # returns NULL, but like ActiveRecord we always want .sum to return a
167
- # number, and our "starting_values and reducers" implementation means
168
- # we sometimes will have to add NULL to our numbers.
169
165
  case group_columns.size
170
166
  when 0 then [
171
- 0,
172
- ->(memo, row) { memo + (row[value_column] || 0) }
167
+ f.initial,
168
+ ->(memo, row) { f.reducer(memo, row[value_column]) }
173
169
  ]
174
170
  when 1 then [
175
- Hash.new(0), # Default 0 makes the reducer much cleaner, but we have to clean it up later
171
+ {},
176
172
  ->(memo, row) {
177
- memo[row[group_columns[0]]] += row[value_column] unless (row[value_column] || 0).zero?
173
+ key = row[group_columns[0]]
174
+ prev_val = memo[key] || f.initial
175
+ next_val = f.reducer(prev_val, row[value_column])
176
+ memo[key] = next_val unless next_val == prev_val
178
177
  memo
179
178
  }
180
179
  ]
181
180
  else [
182
- Hash.new(0),
181
+ {},
183
182
  ->(memo, row) {
184
- memo[group_columns.map { |i| row[i] }] += row[value_column] unless (row[value_column] || 0).zero?
183
+ key = group_columns.map { |i| row[i] }
184
+ prev_val = memo[key] || f.initial
185
+ next_val = f.reducer(prev_val, row[value_column])
186
+ memo[key] = next_val unless next_val == prev_val
185
187
  memo
186
188
  }
187
189
  ]
@@ -199,8 +201,7 @@ module ActiveRecord::Summarize
199
201
  values[i] = reducers[i].call(values[i], row)
200
202
  end
201
203
  end
202
- # Set any hash's default back to nil, since callers will expect a normal hash
203
- values.each { |v| v.default = nil if v.is_a? Hash }
204
+ values
204
205
  end
205
206
  end
206
207
 
@@ -241,7 +242,7 @@ module ActiveRecord::Summarize
241
242
 
242
243
  def value_selects
243
244
  @calculations.each_with_index.map do |f, i|
244
- f.select_value(@relation)
245
+ f.select_column_arel_node(@relation)
245
246
  .as("_v#{i}") # In Postgres with certain Rails versions, alias is needed to disambiguate result column names for type information
246
247
  end
247
248
  end
@@ -253,39 +254,6 @@ module ActiveRecord::Summarize
253
254
  end
254
255
  end
255
256
 
256
- class CalculationResult
257
- attr_reader :relation, :method, :column
258
-
259
- def initialize(relation, method, column)
260
- @relation = relation
261
- @method = method
262
- @column = column
263
- end
264
-
265
- def select_value(base_relation)
266
- where = relation.where_clause - base_relation.where_clause
267
- for_select = column
268
- for_select = Arel::Nodes::Case.new(where.ast).when(true, for_select).else(unmatch_arel_node) unless where.empty?
269
- function.new([for_select]).tap { |f| f.distinct = relation.distinct_value }
270
- end
271
-
272
- def unmatch_arel_node
273
- case method
274
- when "sum" then 0 # Adding zero to a sum does nothing
275
- when "count" then nil # In SQL, null is no value and is not counted
276
- else raise "Unknown calculation method"
277
- end
278
- end
279
-
280
- def function
281
- case method
282
- when "sum" then Arel::Nodes::Sum
283
- when "count" then Arel::Nodes::Count
284
- else raise "Unknown calculation method"
285
- end
286
- end
287
- end
288
-
289
257
  module RelationMethods
290
258
  def summarize(**opts, &block)
291
259
  raise Unsummarizable, "Cannot summarize within a summarize block" if @summarize
@@ -299,9 +267,23 @@ module ActiveRecord::Summarize
299
267
  def perform_calculation(operation, column_name)
300
268
  case operation = operation.to_s.downcase
301
269
  when "count", "sum"
302
- column_name = :id if [nil, "*", :all].include? column_name
270
+ column_name = :id if [nil, "*", :all].include? column_name # only applies to count
303
271
  raise Unsummarizable, "DISTINCT in SQL is not reliably correct with summarize" if column_name.is_a?(String) && /\bdistinct\b/i === column_name
304
- @summarize.add_calculation(self, operation, aggregate_column(column_name))
272
+ @summarize.add_calculation(operation, self, aggregate_column(column_name))
273
+ when "average"
274
+ ChainableResult::WITH_RESOLVED[
275
+ perform_calculation("sum", column_name),
276
+ perform_calculation("count", column_name)
277
+ ] do |sum, count|
278
+ if sum.is_a? Hash
279
+ sum.to_h { |key, s| [key, s.to_d / count[key]] }
280
+ else
281
+ next nil if count == 0
282
+ sum.to_d / count
283
+ end
284
+ end
285
+ when "minimum", "maximum"
286
+ @summarize.add_calculation(operation, self, aggregate_column(column_name))
305
287
  else super
306
288
  end
307
289
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: activerecord-summarize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joshua Paine
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-16 00:00:00.000000000 Z
11
+ date: 2026-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -58,6 +58,7 @@ files:
58
58
  - docs/summarize_compared_with_load_async.md
59
59
  - docs/use_case_moderator_dashboard.md
60
60
  - lib/activerecord/summarize.rb
61
+ - lib/activerecord/summarize/calculation_implementation.rb
61
62
  - lib/activerecord/summarize/version.rb
62
63
  - lib/chainable_result.rb
63
64
  - sig/activerecord/summarize.rbs