simple-sql 0.5.36 → 0.5.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/simple/sql/connection/scope/count_by_groups.rb +91 -33
- data/spec/simple/sql/count_by_groups_spec.rb +37 -18
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eb13412c57185b5bd356c4dc94f52232b22bdef58e032d67ceb79127811ec639
|
4
|
+
data.tar.gz: adc67bad73619d640834d8b821676bfbc5530ca003adfcabe3012535350cea39
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c5597472257a4b487c2b9b1a87545cb37d70e1e625cdd31ced80d3aaf650f547ccd6931cac3e9e81e97df25caaa6fe3be519ae905ea206b5a573717fcbc3a18
|
7
|
+
data.tar.gz: a69336914733676aa4465489b71576b1274284337d9295cc786d5e050a72942fe8b54f90cb6eccad7165e8f24ad635e6376342e49dd63bf4d2cbe174fc173620
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.37
|
@@ -19,55 +19,113 @@ class Simple::SQL::Connection::Scope
|
|
19
19
|
#
|
20
20
|
def enumerate_groups(sql_fragment)
|
21
21
|
sql = order_by(nil).to_sql(pagination: false)
|
22
|
+
@connection.all "SELECT DISTINCT #{sql_fragment} FROM (#{sql}) sq", *args
|
23
|
+
end
|
22
24
|
|
23
|
-
|
24
|
-
|
25
|
-
# cost estimates are good, but are hard to check against a hard coded value.
|
26
|
-
# see https://issues.mediafellows.com/issues/75232
|
27
|
-
#
|
28
|
-
# if cost > 10_000
|
29
|
-
# raise "enumerate_groups(#{sql_fragment.inspect}) takes too much time. Make sure to create a suitable index"
|
30
|
-
# end
|
25
|
+
def count_by(sql_fragment)
|
26
|
+
expect! sql_fragment => String
|
31
27
|
|
32
|
-
|
33
|
-
var_name = "$#{@args.count + 1}"
|
34
|
-
cur = @connection.ask "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq", *args
|
28
|
+
sql = order_by(nil).to_sql(pagination: false)
|
35
29
|
|
36
|
-
|
37
|
-
|
38
|
-
|
30
|
+
recs = @connection.all "SELECT COUNT(*) AS count, #{sql_fragment} AS group FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
|
31
|
+
|
32
|
+
# if we count by a single value (e.g. `count_by("role_id")`) each entry in recs consists of an array [group_value, count].
|
33
|
+
# The resulting Hash will have entries of group_value => count.
|
34
|
+
if recs.first&.length == 2
|
35
|
+
recs.each_with_object({}) do |count_and_group, hsh|
|
36
|
+
count, group = *count_and_group
|
37
|
+
hsh[group] = count
|
38
|
+
end
|
39
|
+
else
|
40
|
+
recs.each_with_object({}) do |count_and_group, hsh|
|
41
|
+
count, *group = *count_and_group
|
42
|
+
hsh[group] = count
|
43
|
+
end
|
39
44
|
end
|
40
|
-
|
41
|
-
groups
|
42
45
|
end
|
43
46
|
|
44
|
-
|
45
|
-
sql = order_by(nil).to_sql(pagination: false)
|
47
|
+
private
|
46
48
|
|
47
|
-
|
48
|
-
|
49
|
+
# cost estimate threshold for count_by method. Can be set to false, true, or
|
50
|
+
# a number.
|
51
|
+
#
|
52
|
+
# Note that cost estimates are problematic, since they are not reported in
|
53
|
+
# any "real" unit, meaning any comparison really is a bit pointless.
|
54
|
+
COUNT_BY_ESTIMATE_COST_THRESHOLD = 10_000
|
55
|
+
|
56
|
+
# estimates the cost to run a sql query. If COUNT_BY_ESTIMATE_COST_THRESHOLD
|
57
|
+
# is set and the cost estimate is less than COUNT_BY_ESTIMATE_COST_THRESHOLD
|
58
|
+
# \a count_by_estimate is using the estimating code path.
|
59
|
+
def use_count_by_estimate?(sql_group_by_fragment)
|
60
|
+
case COUNT_BY_ESTIMATE_COST_THRESHOLD
|
61
|
+
when true then true
|
62
|
+
when false then false
|
63
|
+
else
|
64
|
+
# estimate the effort to exact counting over all groups.
|
65
|
+
base_sql = order_by(nil).to_sql(pagination: false)
|
66
|
+
count_sql = "SELECT COUNT(*) FROM (#{base_sql}) sq GROUP BY #{sql_group_by_fragment}"
|
67
|
+
cost = @connection.estimate_cost count_sql, *args
|
68
|
+
|
69
|
+
cost >= COUNT_BY_ESTIMATE_COST_THRESHOLD
|
70
|
+
end
|
49
71
|
end
|
50
72
|
|
73
|
+
public
|
74
|
+
|
51
75
|
def count_by_estimate(sql_fragment)
|
52
|
-
|
76
|
+
expect! sql_fragment => String
|
53
77
|
|
54
|
-
|
55
|
-
# disabled (see https://issues.mediafellows.com/issues/75237).
|
78
|
+
return count_by(sql_fragment) unless use_count_by_estimate?(sql_fragment)
|
56
79
|
|
57
|
-
|
58
|
-
|
80
|
+
# iterate over all groups, estimating the count for each.
|
81
|
+
#
|
82
|
+
# For larger groups we'll use that estimate - preventing a full table scan.
|
83
|
+
# Groups smaller than EXACT_COUNT_THRESHOLD are counted exactly - in the
|
84
|
+
# hope that this query can be answered from an index.
|
85
|
+
|
86
|
+
#
|
87
|
+
# Usually Simple::SQL.all normalizes each result row into its first value,
|
88
|
+
# if the row only consists of a single value. Here, however, we don't
|
89
|
+
# know the width of a group; so to understand this we just add a dummy
|
90
|
+
# value to the sql_fragment and then remove it again.
|
91
|
+
#
|
92
|
+
groups = enumerate_groups("1 AS __dummy__, #{sql_fragment}")
|
93
|
+
groups = groups.each(&:shift)
|
59
94
|
|
60
|
-
|
95
|
+
# no groups? well, then...
|
96
|
+
return {} if groups.empty?
|
61
97
|
|
62
|
-
#
|
63
|
-
#
|
64
|
-
#
|
65
|
-
|
98
|
+
#
|
99
|
+
# The estimating code only works for groups of size 1. This is a limitation
|
100
|
+
# of simple-sql - for larger groups we would have to be able to encode arrays
|
101
|
+
# of arrays on their way to the postgres server. We are not able to do that
|
102
|
+
# currently.
|
103
|
+
#
|
104
|
+
group_size = groups.first&.length
|
105
|
+
if group_size > 1
|
106
|
+
return count_by(sql_fragment)
|
107
|
+
end
|
108
|
+
|
109
|
+
# The code below only works for groups of size 1
|
110
|
+
groups = groups.map(&:first)
|
111
|
+
|
112
|
+
#
|
113
|
+
# Now we estimate the count of entries in each group. For large groups we
|
114
|
+
# just use the estimate - because it is usually pretty close to being correct.
|
115
|
+
# Small groups are collected in the `sparse_groups` array, to be counted
|
116
|
+
# exactly later on.
|
117
|
+
#
|
66
118
|
|
67
119
|
counts = {}
|
120
|
+
|
68
121
|
sparse_groups = []
|
69
|
-
|
70
|
-
|
122
|
+
base_sql = order_by(nil).to_sql(pagination: false)
|
123
|
+
|
124
|
+
var_name = "$#{@args.count + 1}"
|
125
|
+
|
126
|
+
groups.each do |group|
|
127
|
+
scope = @connection.scope("SELECT * FROM (#{base_sql}) sq WHERE #{sql_fragment}=#{var_name}", args + [group])
|
128
|
+
|
71
129
|
estimated_count = scope.send(:estimated_count)
|
72
130
|
counts[group] = estimated_count
|
73
131
|
sparse_groups << group if estimated_count < EXACT_COUNT_THRESHOLD
|
@@ -77,7 +135,7 @@ class Simple::SQL::Connection::Scope
|
|
77
135
|
unless sparse_groups.empty?
|
78
136
|
sparse_counts = @connection.all <<~SQL, *args, sparse_groups
|
79
137
|
SELECT #{sql_fragment} AS group, COUNT(*) AS count
|
80
|
-
FROM (#{
|
138
|
+
FROM (#{base_sql}) sq
|
81
139
|
WHERE #{sql_fragment} = ANY(#{var_name})
|
82
140
|
GROUP BY #{sql_fragment}
|
83
141
|
SQL
|
@@ -1,44 +1,63 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
3
|
describe "Simple::SQL::Connection::Scope#count_by" do
|
4
|
-
let!(:users)
|
5
|
-
let(:
|
6
|
-
|
4
|
+
let!(:users) { 1.upto(10).map { |i| create(:user, role_id: i) } }
|
5
|
+
let(:scope) { SQL.scope("SELECT * FROM users") }
|
6
|
+
|
7
|
+
let(:all_role_ids) { 1.upto(10).to_a }
|
8
|
+
let(:all_role_ids_w_squares) { all_role_ids.map { |role_id| [role_id, role_id*role_id] } }
|
9
|
+
|
10
|
+
before do
|
11
|
+
# initially we have 10 users, one per role_id in the range 1 .. 10
|
12
|
+
# This adds another 3 users with role_id of 1.
|
13
|
+
create(:user, role_id: 1)
|
14
|
+
create(:user, role_id: 1)
|
15
|
+
create(:user, role_id: 1)
|
16
|
+
end
|
7
17
|
|
8
18
|
describe "enumerate_groups" do
|
9
|
-
it "returns all groups" do
|
19
|
+
it "returns all groups by a single column" do
|
10
20
|
expect(scope.enumerate_groups("role_id")).to contain_exactly(*all_role_ids)
|
11
|
-
|
21
|
+
end
|
22
|
+
|
23
|
+
it "obeys where conditions" do
|
24
|
+
expect(scope.where("role_id < $1", 4).enumerate_groups("role_id")).to contain_exactly(1,2,3)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "counts all groups by multiple columns" do
|
28
|
+
expect(scope.where("role_id < $1", 4).enumerate_groups("role_id, role_id * role_id")).to contain_exactly([1, 1], [2, 4], [3, 9])
|
12
29
|
end
|
13
30
|
end
|
14
31
|
|
15
32
|
describe "count_by" do
|
16
|
-
it "counts all groups" do
|
17
|
-
create(:user, role_id: 1)
|
18
|
-
create(:user, role_id: 1)
|
19
|
-
create(:user, role_id: 1)
|
20
|
-
|
33
|
+
it "counts all groups by a single column" do
|
21
34
|
expect(scope.count_by("role_id")).to include(1 => 4)
|
22
35
|
expect(scope.count_by("role_id")).to include(2 => 1)
|
23
36
|
expect(scope.count_by("role_id").keys).to contain_exactly(*all_role_ids)
|
24
37
|
end
|
38
|
+
|
39
|
+
it "counts all groups by multiple columns" do
|
40
|
+
expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([1,1] => 4)
|
41
|
+
expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([2, 4] => 1)
|
42
|
+
expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
|
43
|
+
end
|
25
44
|
end
|
26
45
|
|
27
46
|
describe "count_by_estimate" do
|
28
47
|
before do
|
29
|
-
|
30
|
-
# but
|
31
|
-
allow(::Simple::SQL).to receive(:costs).and_return([0, 10_000])
|
48
|
+
expect_any_instance_of(Simple::SQL::Connection).to receive(:estimate_cost).at_least(:once).and_return(10_000)
|
32
49
|
end
|
33
|
-
|
34
|
-
it "counts all groups" do
|
35
|
-
create(:user, role_id: 1)
|
36
|
-
create(:user, role_id: 1)
|
37
|
-
create(:user, role_id: 1)
|
38
50
|
|
51
|
+
it "counts all groups by a single column" do
|
39
52
|
expect(scope.count_by_estimate("role_id")).to include(1 => 4)
|
40
53
|
expect(scope.count_by_estimate("role_id")).to include(2 => 1)
|
41
54
|
expect(scope.count_by_estimate("role_id").keys).to contain_exactly(*all_role_ids)
|
42
55
|
end
|
56
|
+
|
57
|
+
it "counts all groups by multiple columns and conditions" do
|
58
|
+
expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([1,1] => 4)
|
59
|
+
expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([2, 4] => 1)
|
60
|
+
expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
|
61
|
+
end
|
43
62
|
end
|
44
63
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple-sql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.37
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- radiospiel
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-04-
|
12
|
+
date: 2021-04-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: pg_array_parser
|