activerecord-summarize 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d3349ef226e79ac7b8182798fbe3f0966a94bd29a5ec31202fe14c74e681bc68
4
- data.tar.gz: f257baecb4562c791d0d7648f45ec2549e1ae6faff2cfcd386d95a5e68fb233a
3
+ metadata.gz: 9a4373f45ac68b039a480eabff7429cf8c0459c3ca43ab1d2e44ac7fce5987cd
4
+ data.tar.gz: da0aee342ea440116ffb2e557f628d5f18781de3c143e23a2b6b3350a607aea7
5
5
  SHA512:
6
- metadata.gz: fdf0dece89a7d1db578414a1682adb956e8a973c7d575b7d589d0645ef906bdba2f9d849a1833fd1cc10a7b6f1e105c2c36b54532624005d67d8501e1e03aa13
7
- data.tar.gz: a21a8e232e86706283954d4690b99c824c80b3521337c2dba10dc3f415f295844670f6d0c4ac94d7a0a8fa58c5fe408a07a64b3d31b65f20086a1d6920409382
6
+ metadata.gz: b8812147378b9d2c1b4069b9074f126cd20e37f763c6b4df374ada72bec3b2d354505cf67470cc6708198f4d997c14d8d0b402b71efe7274ef2ce096c4ac73d1
7
+ data.tar.gz: d7a893afa09e883a5a9db77f802d0f3a9ab85ff9ad744cb30f30247c0b2d85fea30147ea15ca866f9e4498538dc7d5b68c191a7219050cc4ef35cebe2474ed47
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## [0.6.0] - 2026-05-20
2
+
3
+ - **FEATURE:** Support for `.average`, `.minimum`, and `.maximum` inside `summarize` blocks. All three work with the same flexibility as `.count` and `.sum`: they can be combined freely with each other and with other calculation methods, support `.where` and `.group` sub-scopes, and their results are chainable just like any other `ChainableResult`.
4
+
5
+ ## [0.5.1] - 2023-08-16
6
+
7
+ - **BUGFIX:** Starting with version 7.0.5, the behavior of ActiveRecord's `pluck` changed: when you pluck multiple values with the same aggregate function (e.g., `sum`), in PostgreSQL, the data type of the last such value is now applied to all such values, though they used to be inferred correctly. Our solution is to add an explicit alias to each result column.
8
+
1
9
  ## [0.5.0] - 2023-05-14
2
10
 
3
11
  - **FEATURE:** Your `summarize` blocks won't need to accept the proc second argument as often, because `ChainableResult` methods will also resolve their arguments. E.g., `query.summarize {|q| @mult = q.sum(:a) * q.sum(:b) }` now works, where previously you would have needed to write `query.summarize {|q,with| @mult = with[q.sum(:a),q.sum(:b)] {|a,b| a * b } }`.
data/Gemfile CHANGED
@@ -9,5 +9,6 @@ gem "rake", "~> 13.0"
9
9
  gem "minitest", "~> 5.0"
10
10
  gem "standard", "~> 1.3"
11
11
 
12
- gem "activerecord", "7.0.3"
13
- gem "sqlite3", "1.4.2"
12
+ gem "activerecord", "7.0.7"
13
+ gem "sqlite3", "1.6.3"
14
+ gem "pg", "~> 1.5"
data/Gemfile.lock CHANGED
@@ -1,67 +1,88 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- activerecord-summarize (0.5.0)
4
+ activerecord-summarize (0.6.0)
5
5
  activerecord (>= 5.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- activemodel (7.0.3)
11
- activesupport (= 7.0.3)
12
- activerecord (7.0.3)
13
- activemodel (= 7.0.3)
14
- activesupport (= 7.0.3)
15
- activesupport (7.0.3)
10
+ activemodel (7.0.7)
11
+ activesupport (= 7.0.7)
12
+ activerecord (7.0.7)
13
+ activemodel (= 7.0.7)
14
+ activesupport (= 7.0.7)
15
+ activesupport (7.0.7)
16
16
  concurrent-ruby (~> 1.0, >= 1.0.2)
17
17
  i18n (>= 1.6, < 2)
18
18
  minitest (>= 5.1)
19
19
  tzinfo (~> 2.0)
20
20
  ast (2.4.2)
21
- concurrent-ruby (1.1.10)
22
- i18n (1.10.0)
21
+ concurrent-ruby (1.2.2)
22
+ i18n (1.14.1)
23
23
  concurrent-ruby (~> 1.0)
24
- minitest (5.15.0)
25
- parallel (1.21.0)
26
- parser (3.1.1.0)
24
+ json (2.6.3)
25
+ language_server-protocol (3.17.0.3)
26
+ lint_roller (1.1.0)
27
+ mini_portile2 (2.8.7)
28
+ minitest (5.19.0)
29
+ parallel (1.23.0)
30
+ parser (3.2.2.3)
27
31
  ast (~> 2.4.1)
32
+ racc
33
+ pg (1.5.3)
34
+ racc (1.7.1)
28
35
  rainbow (3.1.1)
29
36
  rake (13.0.6)
30
- regexp_parser (2.2.1)
31
- rexml (3.2.5)
32
- rubocop (1.25.1)
37
+ regexp_parser (2.8.1)
38
+ rexml (3.2.6)
39
+ rubocop (1.52.1)
40
+ json (~> 2.3)
33
41
  parallel (~> 1.10)
34
- parser (>= 3.1.0.0)
42
+ parser (>= 3.2.2.3)
35
43
  rainbow (>= 2.2.2, < 4.0)
36
44
  regexp_parser (>= 1.8, < 3.0)
37
- rexml
38
- rubocop-ast (>= 1.15.1, < 2.0)
45
+ rexml (>= 3.2.5, < 4.0)
46
+ rubocop-ast (>= 1.28.0, < 2.0)
39
47
  ruby-progressbar (~> 1.7)
40
- unicode-display_width (>= 1.4.0, < 3.0)
41
- rubocop-ast (1.16.0)
42
- parser (>= 3.1.1.0)
43
- rubocop-performance (1.13.2)
48
+ unicode-display_width (>= 2.4.0, < 3.0)
49
+ rubocop-ast (1.29.0)
50
+ parser (>= 3.2.1.0)
51
+ rubocop-performance (1.18.0)
44
52
  rubocop (>= 1.7.0, < 2.0)
45
53
  rubocop-ast (>= 0.4.0)
46
- ruby-progressbar (1.11.0)
47
- sqlite3 (1.4.2)
48
- standard (1.7.2)
49
- rubocop (= 1.25.1)
50
- rubocop-performance (= 1.13.2)
51
- tzinfo (2.0.4)
54
+ ruby-progressbar (1.13.0)
55
+ sqlite3 (1.6.3)
56
+ mini_portile2 (~> 2.8.0)
57
+ sqlite3 (1.6.3-arm64-darwin)
58
+ standard (1.30.1)
59
+ language_server-protocol (~> 3.17.0.2)
60
+ lint_roller (~> 1.0)
61
+ rubocop (~> 1.52.0)
62
+ standard-custom (~> 1.0.0)
63
+ standard-performance (~> 1.1.0)
64
+ standard-custom (1.0.2)
65
+ lint_roller (~> 1.0)
66
+ rubocop (~> 1.50)
67
+ standard-performance (1.1.2)
68
+ lint_roller (~> 1.1)
69
+ rubocop-performance (~> 1.18.0)
70
+ tzinfo (2.0.6)
52
71
  concurrent-ruby (~> 1.0)
53
- unicode-display_width (2.1.0)
72
+ unicode-display_width (2.4.2)
54
73
 
55
74
  PLATFORMS
56
75
  arm64-darwin-21
76
+ arm64-darwin-23
57
77
  x86_64-linux
58
78
 
59
79
  DEPENDENCIES
60
- activerecord (= 7.0.3)
80
+ activerecord (= 7.0.7)
61
81
  activerecord-summarize!
62
82
  minitest (~> 5.0)
83
+ pg (~> 1.5)
63
84
  rake (~> 13.0)
64
- sqlite3 (= 1.4.2)
85
+ sqlite3 (= 1.6.3)
65
86
  standard (~> 1.3)
66
87
 
67
88
  BUNDLED WITH
data/README.md CHANGED
@@ -127,6 +127,7 @@ puts Purchase.last_year.complete.group(:region_id).summarize do |purchases,with_
127
127
  target = with_resolved[total / 4, by_quarter.values.max] {|avg_q, best_q| [avg_q * 1.25, best_q].max.round }
128
128
  {last_year: total, quarters: by_quarter, unit_target: target}
129
129
  end
130
+
130
131
  # Output:
131
132
  # {
132
133
  # 1 => {
@@ -174,7 +175,9 @@ Instead the block is evaluated once to determine what calculations need to be ru
174
175
 
175
176
  ## Development
176
177
 
177
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
178
+ Run `bin/setup` to install dependencies. If you don't have PostgreSQL installed, comment out the `pg` line in `Gemfile` first. Then, run `bundle exec rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
179
+
180
+ Tests and `bin/console` support SQLite and PostgreSQL: (un)comment the appropriate lines at the top of `test/test_data.rb` to choose. In the future, we'll have a nicer solution. If you want to use PostgreSQL, run `CREATE DATABASE summarize_test;` as your default user.
178
181
 
179
182
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
180
183
 
@@ -0,0 +1,129 @@
1
+ module ActiveRecord::Summarize::CalculationImplementation
2
+ def self.new(operation, relation, column_name)
3
+ case operation
4
+ when "sum" then Sum
5
+ when "count" then Count
6
+ when "minimum" then Minimum
7
+ when "maximum" then Maximum
8
+ else raise "Unknown calculation #{operation}"
9
+ end.new(relation, column_name)
10
+ end
11
+
12
+ class Base
13
+ attr_reader :relation, :column
14
+
15
+ def initialize(relation, column)
16
+ @relation = relation
17
+ @column = column
18
+ end
19
+
20
+ def select_column_arel_node(base_relation)
21
+ where = relation.where_clause - base_relation.where_clause
22
+ for_select = column
23
+ for_select = Arel::Nodes::Case.new(where.ast).when(true, for_select).else(unmatch_arel_node) unless where.empty?
24
+ function_arel_node_class.new([for_select]).tap { |f| f.distinct = relation.distinct_value }
25
+ end
26
+
27
+ def function_arel_node_class
28
+ # Arel::Node class representing the SQL function
29
+ raise "`#{self.class}` must implement `function_arel_node_class`"
30
+ end
31
+
32
+ def unmatch_arel_node
33
+ # In case of `where` filters, this is the does-not-count value for when
34
+ # filters don't match, so far always 0 or nil (becomes NULL)
35
+ raise "`#{self.class}` must implement `unmatch_arel_node`"
36
+ end
37
+
38
+ def initial
39
+ # Initial value for reducing potentially many split-into-groups rows to
40
+ # a single value, so far always 0 or nil.
41
+ raise "`#{self.class}` must implement `initial`"
42
+ end
43
+
44
+ def reducer(memo, v)
45
+ # Reducer method for reducing potentially many split-into-groups rows to
46
+ # a single value. Method should return a value the same type as memo
47
+ # and/or v. A reducer is necessary at all because .group in columns
48
+ # _other than_ this one results in fragmenting this result into several
49
+ # rows.
50
+ raise "`#{self.class}` must implement `reducer`"
51
+ end
52
+ end
53
+
54
+ class Sum < Base
55
+ def unmatch_arel_node
56
+ 0 # Adding zero to a sum does nothing
57
+ end
58
+
59
+ def function_arel_node_class
60
+ Arel::Nodes::Sum
61
+ end
62
+
63
+ def initial
64
+ 0
65
+ end
66
+
67
+ def reducer(memo, v)
68
+ memo + (v || 0)
69
+ end
70
+ end
71
+
72
+ class Count < Base
73
+ def unmatch_arel_node
74
+ nil # In SQL, null is no value and is not counted
75
+ end
76
+
77
+ def function_arel_node_class
78
+ Arel::Nodes::Count
79
+ end
80
+
81
+ def initial
82
+ 0
83
+ end
84
+
85
+ def reducer(memo, v)
86
+ memo + (v || 0)
87
+ end
88
+ end
89
+
90
+ class Minimum < Base
91
+ def unmatch_arel_node
92
+ nil # In SQL, null is no value and is not considered for min()
93
+ end
94
+
95
+ def function_arel_node_class
96
+ Arel::Nodes::Min
97
+ end
98
+
99
+ def initial
100
+ nil
101
+ end
102
+
103
+ def reducer(memo, v)
104
+ return memo if v.nil?
105
+ return v if memo.nil?
106
+ (v < memo) ? v : memo
107
+ end
108
+ end
109
+
110
+ class Maximum < Base
111
+ def unmatch_arel_node
112
+ nil # In SQL, null is no value and is not considered for max()
113
+ end
114
+
115
+ def function_arel_node_class
116
+ Arel::Nodes::Max
117
+ end
118
+
119
+ def initial
120
+ nil
121
+ end
122
+
123
+ def reducer(memo, v)
124
+ return memo if v.nil?
125
+ return v if memo.nil?
126
+ (v > memo) ? v : memo
127
+ end
128
+ end
129
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module ActiveRecord
4
4
  module Summarize
5
- VERSION = "0.5.0"
5
+ VERSION = "0.6.0"
6
6
  end
7
7
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "summarize/version"
4
+ require_relative "summarize/calculation_implementation"
4
5
  require_relative "../chainable_result"
5
6
 
6
7
  module ActiveRecord::Summarize
@@ -130,58 +131,59 @@ module ActiveRecord::Summarize
130
131
  end
131
132
  end
132
133
 
133
- def add_calculation(relation, operation, column_name)
134
+ def add_calculation(operation, relation, column_name)
134
135
  merge_from_where!(relation)
135
- calculation = CalculationResult.new(relation, operation, column_name)
136
+ calculation = CalculationImplementation.new(operation, relation, column_name)
136
137
  index = @calculations.size
137
138
  @calculations << calculation
138
139
  ChainableResult.wrap(calculation) { current_result_row[index] }
139
140
  end
140
141
 
141
142
  def resolve
142
- # Build & execute query
143
+ #########################
144
+ # Build & execute query #
145
+ #########################
143
146
  groups = all_groups
144
147
  # MariaDB, SQLite, and Postgres all support `GROUP BY 1, 2, 3`-style syntax,
145
- # where the numbers are 1-indexed references to SELECT values. It makes these
146
- # generated queries much shorter and more readable, and it avoids the
147
- # ambiguity of using aliases (for GROUP BY, they can get clobbered by columns
148
- # from underlying tables) even where those are supported. But in case we find
149
- # a database that doesn't support numeric references, the fully-explicit
150
- # grouping code is commented out below.
151
- #
152
- # grouped_query = groups.any? ? from_where.group(*groups) : from_where
148
+ # where the numbers are 1-indexed references to SELECT values.
153
149
  grouped_query = groups.any? ? from_where.group(*1..groups.size) : from_where
154
150
  data = grouped_query.pluck(*groups, *value_selects)
155
- # .pluck(:one_column) returns an array of values instead of an array of arrays,
156
- # which breaks the aggregation and assignment below in case anyone ever asks
157
- # `summarize` for only one thing.
151
+
152
+ # .pluck(:just_one_column) returns an array of values instead of an array
153
+ # of arrays, which breaks the aggregation and assignment below.
158
154
  data = data.map { |d| [d] } if (groups.size + value_selects.size) == 1
159
155
 
160
- # Aggregate & assign results
161
- group_idx = groups.each_with_index.to_h
156
+ ##############################
157
+ # Build aggregation reducers #
158
+ ##############################
159
+ # groups includes all base groups and all sub-groups
160
+ group_idx = groups.each_with_index.to_h # Inverts the groups list: `[:foo, :bar]` becomes `{:foo => 0, :bar => 1}`
162
161
  starting_values, reducers = @calculations.each_with_index.map do |f, i|
163
162
  value_column = groups.size + i
163
+ # each calculation shares any base groups that exist and may have sub-groups, which won't be shared by others
164
164
  group_columns = f.relation.group_values.map { |k| group_idx[k] }
165
- # `row[value_column] || 0` pattern in reducers because SQL SUM(NULL)
166
- # returns NULL, but like ActiveRecord we always want .sum to return a
167
- # number, and our "starting_values and reducers" implementation means
168
- # we sometimes will have to add NULL to our numbers.
169
165
  case group_columns.size
170
166
  when 0 then [
171
- 0,
172
- ->(memo, row) { memo + (row[value_column] || 0) }
167
+ f.initial,
168
+ ->(memo, row) { f.reducer(memo, row[value_column]) }
173
169
  ]
174
170
  when 1 then [
175
- Hash.new(0), # Default 0 makes the reducer much cleaner, but we have to clean it up later
171
+ {},
176
172
  ->(memo, row) {
177
- memo[row[group_columns[0]]] += row[value_column] unless (row[value_column] || 0).zero?
173
+ key = row[group_columns[0]]
174
+ prev_val = memo[key] || f.initial
175
+ next_val = f.reducer(prev_val, row[value_column])
176
+ memo[key] = next_val unless next_val == prev_val
178
177
  memo
179
178
  }
180
179
  ]
181
180
  else [
182
- Hash.new(0),
181
+ {},
183
182
  ->(memo, row) {
184
- memo[group_columns.map { |i| row[i] }] += row[value_column] unless (row[value_column] || 0).zero?
183
+ key = group_columns.map { |i| row[i] }
184
+ prev_val = memo[key] || f.initial
185
+ next_val = f.reducer(prev_val, row[value_column])
186
+ memo[key] = next_val unless next_val == prev_val
185
187
  memo
186
188
  }
187
189
  ]
@@ -199,8 +201,7 @@ module ActiveRecord::Summarize
199
201
  values[i] = reducers[i].call(values[i], row)
200
202
  end
201
203
  end
202
- # Set any hash's default back to nil, since callers will expect a normal hash
203
- values.each { |v| v.default = nil if v.is_a? Hash }
204
+ values
204
205
  end
205
206
  end
206
207
 
@@ -240,7 +241,10 @@ module ActiveRecord::Summarize
240
241
  end
241
242
 
242
243
  def value_selects
243
- @calculations.map { |f| f.select_value(@relation) }
244
+ @calculations.each_with_index.map do |f, i|
245
+ f.select_column_arel_node(@relation)
246
+ .as("_v#{i}") # In Postgres with certain Rails versions, alias is needed to disambiguate result column names for type information
247
+ end
244
248
  end
245
249
 
246
250
  def lightly_touch_impure_hash(h)
@@ -250,39 +254,6 @@ module ActiveRecord::Summarize
250
254
  end
251
255
  end
252
256
 
253
- class CalculationResult
254
- attr_reader :relation, :method, :column
255
-
256
- def initialize(relation, method, column)
257
- @relation = relation
258
- @method = method
259
- @column = column
260
- end
261
-
262
- def select_value(base_relation)
263
- where = relation.where_clause - base_relation.where_clause
264
- for_select = column
265
- for_select = Arel::Nodes::Case.new(where.ast).when(true, for_select).else(unmatch_arel_node) unless where.empty?
266
- function.new([for_select]).tap { |f| f.distinct = relation.distinct_value }
267
- end
268
-
269
- def unmatch_arel_node
270
- case method
271
- when "sum" then 0 # Adding zero to a sum does nothing
272
- when "count" then nil # In SQL, null is no value and is not counted
273
- else raise "Unknown calculation method"
274
- end
275
- end
276
-
277
- def function
278
- case method
279
- when "sum" then Arel::Nodes::Sum
280
- when "count" then Arel::Nodes::Count
281
- else raise "Unknown calculation method"
282
- end
283
- end
284
- end
285
-
286
257
  module RelationMethods
287
258
  def summarize(**opts, &block)
288
259
  raise Unsummarizable, "Cannot summarize within a summarize block" if @summarize
@@ -296,9 +267,23 @@ module ActiveRecord::Summarize
296
267
  def perform_calculation(operation, column_name)
297
268
  case operation = operation.to_s.downcase
298
269
  when "count", "sum"
299
- column_name = :id if [nil, "*", :all].include? column_name
270
+ column_name = :id if [nil, "*", :all].include? column_name # only applies to count
300
271
  raise Unsummarizable, "DISTINCT in SQL is not reliably correct with summarize" if column_name.is_a?(String) && /\bdistinct\b/i === column_name
301
- @summarize.add_calculation(self, operation, aggregate_column(column_name))
272
+ @summarize.add_calculation(operation, self, aggregate_column(column_name))
273
+ when "average"
274
+ ChainableResult::WITH_RESOLVED[
275
+ perform_calculation("sum", column_name),
276
+ perform_calculation("count", column_name)
277
+ ] do |sum, count|
278
+ if sum.is_a? Hash
279
+ sum.to_h { |key, s| [key, s.to_d / count[key]] }
280
+ else
281
+ next nil if count == 0
282
+ sum.to_d / count
283
+ end
284
+ end
285
+ when "minimum", "maximum"
286
+ @summarize.add_calculation(operation, self, aggregate_column(column_name))
302
287
  else super
303
288
  end
304
289
  end
@@ -88,12 +88,12 @@ class ChainableResult
88
88
  end
89
89
 
90
90
  def self.with(*results, &block)
91
- ChainableResult.wrap(results.size == 1 ? results.first : results, :then, &block)
91
+ ChainableResult.wrap((results.size == 1) ? results.first : results, :then, &block)
92
92
  end
93
93
 
94
94
  def self.sync_with(*results, &block)
95
95
  # Non-time-traveling, synchronous version of `with` for testing
96
- (results.size == 1 ? results.first : results).then(&block)
96
+ ((results.size == 1) ? results.first : results).then(&block)
97
97
  end
98
98
 
99
99
  # Shorter names are deprecated
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: activerecord-summarize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joshua Paine
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-20 00:00:00.000000000 Z
11
+ date: 2026-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -58,6 +58,7 @@ files:
58
58
  - docs/summarize_compared_with_load_async.md
59
59
  - docs/use_case_moderator_dashboard.md
60
60
  - lib/activerecord/summarize.rb
61
+ - lib/activerecord/summarize/calculation_implementation.rb
61
62
  - lib/activerecord/summarize/version.rb
62
63
  - lib/chainable_result.rb
63
64
  - sig/activerecord/summarize.rbs