simple-sql 0.5.36 → 0.5.37
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/simple/sql/connection/scope/count_by_groups.rb +91 -33
- data/spec/simple/sql/count_by_groups_spec.rb +37 -18
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eb13412c57185b5bd356c4dc94f52232b22bdef58e032d67ceb79127811ec639
|
4
|
+
data.tar.gz: adc67bad73619d640834d8b821676bfbc5530ca003adfcabe3012535350cea39
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c5597472257a4b487c2b9b1a87545cb37d70e1e625cdd31ced80d3aaf650f547ccd6931cac3e9e81e97df25caaa6fe3be519ae905ea206b5a573717fcbc3a18
|
7
|
+
data.tar.gz: a69336914733676aa4465489b71576b1274284337d9295cc786d5e050a72942fe8b54f90cb6eccad7165e8f24ad635e6376342e49dd63bf4d2cbe174fc173620
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.37
|
@@ -19,55 +19,113 @@ class Simple::SQL::Connection::Scope
|
|
19
19
|
#
|
20
20
|
def enumerate_groups(sql_fragment)
|
21
21
|
sql = order_by(nil).to_sql(pagination: false)
|
22
|
+
@connection.all "SELECT DISTINCT #{sql_fragment} FROM (#{sql}) sq", *args
|
23
|
+
end
|
22
24
|
|
23
|
-
|
24
|
-
|
25
|
-
# cost estimates are good, but are hard to check against a hard coded value.
|
26
|
-
# see https://issues.mediafellows.com/issues/75232
|
27
|
-
#
|
28
|
-
# if cost > 10_000
|
29
|
-
# raise "enumerate_groups(#{sql_fragment.inspect}) takes too much time. Make sure to create a suitable index"
|
30
|
-
# end
|
25
|
+
def count_by(sql_fragment)
|
26
|
+
expect! sql_fragment => String
|
31
27
|
|
32
|
-
|
33
|
-
var_name = "$#{@args.count + 1}"
|
34
|
-
cur = @connection.ask "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq", *args
|
28
|
+
sql = order_by(nil).to_sql(pagination: false)
|
35
29
|
|
36
|
-
|
37
|
-
|
38
|
-
|
30
|
+
recs = @connection.all "SELECT COUNT(*) AS count, #{sql_fragment} AS group FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
|
31
|
+
|
32
|
+
# if we count by a single value (e.g. `count_by("role_id")`) each entry in recs consists of an array [group_value, count].
|
33
|
+
# The resulting Hash will have entries of group_value => count.
|
34
|
+
if recs.first&.length == 2
|
35
|
+
recs.each_with_object({}) do |count_and_group, hsh|
|
36
|
+
count, group = *count_and_group
|
37
|
+
hsh[group] = count
|
38
|
+
end
|
39
|
+
else
|
40
|
+
recs.each_with_object({}) do |count_and_group, hsh|
|
41
|
+
count, *group = *count_and_group
|
42
|
+
hsh[group] = count
|
43
|
+
end
|
39
44
|
end
|
40
|
-
|
41
|
-
groups
|
42
45
|
end
|
43
46
|
|
44
|
-
|
45
|
-
sql = order_by(nil).to_sql(pagination: false)
|
47
|
+
private
|
46
48
|
|
47
|
-
|
48
|
-
|
49
|
+
# cost estimate threshold for count_by method. Can be set to false, true, or
|
50
|
+
# a number.
|
51
|
+
#
|
52
|
+
# Note that cost estimates are problematic, since they are not reported in
|
53
|
+
# any "real" unit, meaning any comparison really is a bit pointless.
|
54
|
+
COUNT_BY_ESTIMATE_COST_THRESHOLD = 10_000
|
55
|
+
|
56
|
+
# estimates the cost to run a sql query. If COUNT_BY_ESTIMATE_COST_THRESHOLD
|
57
|
+
# is set and the cost estimate is less than COUNT_BY_ESTIMATE_COST_THRESHOLD
|
58
|
+
# \a count_by_estimate is using the estimating code path.
|
59
|
+
def use_count_by_estimate?(sql_group_by_fragment)
|
60
|
+
case COUNT_BY_ESTIMATE_COST_THRESHOLD
|
61
|
+
when true then true
|
62
|
+
when false then false
|
63
|
+
else
|
64
|
+
# estimate the effort to exact counting over all groups.
|
65
|
+
base_sql = order_by(nil).to_sql(pagination: false)
|
66
|
+
count_sql = "SELECT COUNT(*) FROM (#{base_sql}) sq GROUP BY #{sql_group_by_fragment}"
|
67
|
+
cost = @connection.estimate_cost count_sql, *args
|
68
|
+
|
69
|
+
cost >= COUNT_BY_ESTIMATE_COST_THRESHOLD
|
70
|
+
end
|
49
71
|
end
|
50
72
|
|
73
|
+
public
|
74
|
+
|
51
75
|
def count_by_estimate(sql_fragment)
|
52
|
-
|
76
|
+
expect! sql_fragment => String
|
53
77
|
|
54
|
-
|
55
|
-
# disabled (see https://issues.mediafellows.com/issues/75237).
|
78
|
+
return count_by(sql_fragment) unless use_count_by_estimate?(sql_fragment)
|
56
79
|
|
57
|
-
|
58
|
-
|
80
|
+
# iterate over all groups, estimating the count for each.
|
81
|
+
#
|
82
|
+
# For larger groups we'll use that estimate - preventing a full table scan.
|
83
|
+
# Groups smaller than EXACT_COUNT_THRESHOLD are counted exactly - in the
|
84
|
+
# hope that this query can be answered from an index.
|
85
|
+
|
86
|
+
#
|
87
|
+
# Usually Simple::SQL.all normalizes each result row into its first value,
|
88
|
+
# if the row only consists of a single value. Here, however, we don't
|
89
|
+
# know the width of a group; so to understand this we just add a dummy
|
90
|
+
# value to the sql_fragment and then remove it again.
|
91
|
+
#
|
92
|
+
groups = enumerate_groups("1 AS __dummy__, #{sql_fragment}")
|
93
|
+
groups = groups.each(&:shift)
|
59
94
|
|
60
|
-
|
95
|
+
# no groups? well, then...
|
96
|
+
return {} if groups.empty?
|
61
97
|
|
62
|
-
#
|
63
|
-
#
|
64
|
-
#
|
65
|
-
|
98
|
+
#
|
99
|
+
# The estimating code only works for groups of size 1. This is a limitation
|
100
|
+
# of simple-sql - for larger groups we would have to be able to encode arrays
|
101
|
+
# of arrays on their way to the postgres server. We are not able to do that
|
102
|
+
# currently.
|
103
|
+
#
|
104
|
+
group_size = groups.first&.length
|
105
|
+
if group_size > 1
|
106
|
+
return count_by(sql_fragment)
|
107
|
+
end
|
108
|
+
|
109
|
+
# The code below only works for groups of size 1
|
110
|
+
groups = groups.map(&:first)
|
111
|
+
|
112
|
+
#
|
113
|
+
# Now we estimate the count of entries in each group. For large groups we
|
114
|
+
# just use the estimate - because it is usually pretty close to being correct.
|
115
|
+
# Small groups are collected in the `sparse_groups` array, to be counted
|
116
|
+
# exactly later on.
|
117
|
+
#
|
66
118
|
|
67
119
|
counts = {}
|
120
|
+
|
68
121
|
sparse_groups = []
|
69
|
-
|
70
|
-
|
122
|
+
base_sql = order_by(nil).to_sql(pagination: false)
|
123
|
+
|
124
|
+
var_name = "$#{@args.count + 1}"
|
125
|
+
|
126
|
+
groups.each do |group|
|
127
|
+
scope = @connection.scope("SELECT * FROM (#{base_sql}) sq WHERE #{sql_fragment}=#{var_name}", args + [group])
|
128
|
+
|
71
129
|
estimated_count = scope.send(:estimated_count)
|
72
130
|
counts[group] = estimated_count
|
73
131
|
sparse_groups << group if estimated_count < EXACT_COUNT_THRESHOLD
|
@@ -77,7 +135,7 @@ class Simple::SQL::Connection::Scope
|
|
77
135
|
unless sparse_groups.empty?
|
78
136
|
sparse_counts = @connection.all <<~SQL, *args, sparse_groups
|
79
137
|
SELECT #{sql_fragment} AS group, COUNT(*) AS count
|
80
|
-
FROM (#{
|
138
|
+
FROM (#{base_sql}) sq
|
81
139
|
WHERE #{sql_fragment} = ANY(#{var_name})
|
82
140
|
GROUP BY #{sql_fragment}
|
83
141
|
SQL
|
@@ -1,44 +1,63 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
3
|
describe "Simple::SQL::Connection::Scope#count_by" do
|
4
|
-
let!(:users)
|
5
|
-
let(:
|
6
|
-
|
4
|
+
let!(:users) { 1.upto(10).map { |i| create(:user, role_id: i) } }
|
5
|
+
let(:scope) { SQL.scope("SELECT * FROM users") }
|
6
|
+
|
7
|
+
let(:all_role_ids) { 1.upto(10).to_a }
|
8
|
+
let(:all_role_ids_w_squares) { all_role_ids.map { |role_id| [role_id, role_id*role_id] } }
|
9
|
+
|
10
|
+
before do
|
11
|
+
# initially we have 10 users, one per role_id in the range 1 .. 10
|
12
|
+
# This adds another 3 users with role_id of 1.
|
13
|
+
create(:user, role_id: 1)
|
14
|
+
create(:user, role_id: 1)
|
15
|
+
create(:user, role_id: 1)
|
16
|
+
end
|
7
17
|
|
8
18
|
describe "enumerate_groups" do
|
9
|
-
it "returns all groups" do
|
19
|
+
it "returns all groups by a single column" do
|
10
20
|
expect(scope.enumerate_groups("role_id")).to contain_exactly(*all_role_ids)
|
11
|
-
|
21
|
+
end
|
22
|
+
|
23
|
+
it "obeys where conditions" do
|
24
|
+
expect(scope.where("role_id < $1", 4).enumerate_groups("role_id")).to contain_exactly(1,2,3)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "counts all groups by multiple columns" do
|
28
|
+
expect(scope.where("role_id < $1", 4).enumerate_groups("role_id, role_id * role_id")).to contain_exactly([1, 1], [2, 4], [3, 9])
|
12
29
|
end
|
13
30
|
end
|
14
31
|
|
15
32
|
describe "count_by" do
|
16
|
-
it "counts all groups" do
|
17
|
-
create(:user, role_id: 1)
|
18
|
-
create(:user, role_id: 1)
|
19
|
-
create(:user, role_id: 1)
|
20
|
-
|
33
|
+
it "counts all groups by a single column" do
|
21
34
|
expect(scope.count_by("role_id")).to include(1 => 4)
|
22
35
|
expect(scope.count_by("role_id")).to include(2 => 1)
|
23
36
|
expect(scope.count_by("role_id").keys).to contain_exactly(*all_role_ids)
|
24
37
|
end
|
38
|
+
|
39
|
+
it "counts all groups by multiple columns" do
|
40
|
+
expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([1,1] => 4)
|
41
|
+
expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([2, 4] => 1)
|
42
|
+
expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
|
43
|
+
end
|
25
44
|
end
|
26
45
|
|
27
46
|
describe "count_by_estimate" do
|
28
47
|
before do
|
29
|
-
|
30
|
-
# but
|
31
|
-
allow(::Simple::SQL).to receive(:costs).and_return([0, 10_000])
|
48
|
+
expect_any_instance_of(Simple::SQL::Connection).to receive(:estimate_cost).at_least(:once).and_return(10_000)
|
32
49
|
end
|
33
|
-
|
34
|
-
it "counts all groups" do
|
35
|
-
create(:user, role_id: 1)
|
36
|
-
create(:user, role_id: 1)
|
37
|
-
create(:user, role_id: 1)
|
38
50
|
|
51
|
+
it "counts all groups by a single column" do
|
39
52
|
expect(scope.count_by_estimate("role_id")).to include(1 => 4)
|
40
53
|
expect(scope.count_by_estimate("role_id")).to include(2 => 1)
|
41
54
|
expect(scope.count_by_estimate("role_id").keys).to contain_exactly(*all_role_ids)
|
42
55
|
end
|
56
|
+
|
57
|
+
it "counts all groups by multiple columns and conditions" do
|
58
|
+
expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([1,1] => 4)
|
59
|
+
expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([2, 4] => 1)
|
60
|
+
expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
|
61
|
+
end
|
43
62
|
end
|
44
63
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple-sql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.37
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- radiospiel
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-04-
|
12
|
+
date: 2021-04-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: pg_array_parser
|