simple-sql 0.5.36 → 0.5.37

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5185777df8efad3d0d17334a70d30b35551f4abd7adf0fded3549dd2d1b1325
4
- data.tar.gz: e34ee800440259bfbe8ef88f9f70fb1675dee4a91e441c162a8bdc6313e5d88f
3
+ metadata.gz: eb13412c57185b5bd356c4dc94f52232b22bdef58e032d67ceb79127811ec639
4
+ data.tar.gz: adc67bad73619d640834d8b821676bfbc5530ca003adfcabe3012535350cea39
5
5
  SHA512:
6
- metadata.gz: 9b03b9f204d9bd0e87e0c349f1f2ab962bc94101f14ab33cb2522c0287861e801ddca3cd98e97f4317716df7a07a054f6acf26837309f18bc14b8a50f95eac4a
7
- data.tar.gz: 5eb4fe4c83ca847aef2263251b56f95b3ea450c15abc1e8289ed92bbcaaaa5d5d8a3683814c009c2dd4928d679a4072d10717a616e3cbac2978d59f2d2f453e8
6
+ metadata.gz: 1c5597472257a4b487c2b9b1a87545cb37d70e1e625cdd31ced80d3aaf650f547ccd6931cac3e9e81e97df25caaa6fe3be519ae905ea206b5a573717fcbc3a18
7
+ data.tar.gz: a69336914733676aa4465489b71576b1274284337d9295cc786d5e050a72942fe8b54f90cb6eccad7165e8f24ad635e6376342e49dd63bf4d2cbe174fc173620
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.36
1
+ 0.5.37
@@ -19,55 +19,113 @@ class Simple::SQL::Connection::Scope
19
19
  #
20
20
  def enumerate_groups(sql_fragment)
21
21
  sql = order_by(nil).to_sql(pagination: false)
22
+ @connection.all "SELECT DISTINCT #{sql_fragment} FROM (#{sql}) sq", *args
23
+ end
22
24
 
23
- cost = @connection.estimate_cost "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq", *args
24
-
25
- # cost estimates are good, but are hard to check against a hard coded value.
26
- # see https://issues.mediafellows.com/issues/75232
27
- #
28
- # if cost > 10_000
29
- # raise "enumerate_groups(#{sql_fragment.inspect}) takes too much time. Make sure to create a suitable index"
30
- # end
25
+ def count_by(sql_fragment)
26
+ expect! sql_fragment => String
31
27
 
32
- groups = []
33
- var_name = "$#{@args.count + 1}"
34
- cur = @connection.ask "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq", *args
28
+ sql = order_by(nil).to_sql(pagination: false)
35
29
 
36
- while cur
37
- groups << cur
38
- cur = @connection.ask "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq"" WHERE #{sql_fragment} > #{var_name}", *args, cur
30
+ recs = @connection.all "SELECT COUNT(*) AS count, #{sql_fragment} AS group FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
31
+
32
+ # if we count by a single value (e.g. `count_by("role_id")`) each entry in recs consists of an array [group_value, count].
33
+ # The resulting Hash will have entries of group_value => count.
34
+ if recs.first&.length == 2
35
+ recs.each_with_object({}) do |count_and_group, hsh|
36
+ count, group = *count_and_group
37
+ hsh[group] = count
38
+ end
39
+ else
40
+ recs.each_with_object({}) do |count_and_group, hsh|
41
+ count, *group = *count_and_group
42
+ hsh[group] = count
43
+ end
39
44
  end
40
-
41
- groups
42
45
  end
43
46
 
44
- def count_by(sql_fragment)
45
- sql = order_by(nil).to_sql(pagination: false)
47
+ private
46
48
 
47
- recs = @connection.all "SELECT #{sql_fragment} AS group, COUNT(*) AS count FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
48
- Hash[recs]
49
+ # cost estimate threshold for count_by method. Can be set to false, true, or
50
+ # a number.
51
+ #
52
+ # Note that cost estimates are problematic, since they are not reported in
53
+ # any "real" unit, meaning any comparison really is a bit pointless.
54
+ COUNT_BY_ESTIMATE_COST_THRESHOLD = 10_000
55
+
56
+ # estimates the cost to run a sql query. If COUNT_BY_ESTIMATE_COST_THRESHOLD
57
+ # is set and the cost estimate is less than COUNT_BY_ESTIMATE_COST_THRESHOLD
58
+ # \a count_by_estimate is using the estimating code path.
59
+ def use_count_by_estimate?(sql_group_by_fragment)
60
+ case COUNT_BY_ESTIMATE_COST_THRESHOLD
61
+ when true then true
62
+ when false then false
63
+ else
64
+ # estimate the effort to exact counting over all groups.
65
+ base_sql = order_by(nil).to_sql(pagination: false)
66
+ count_sql = "SELECT COUNT(*) FROM (#{base_sql}) sq GROUP BY #{sql_group_by_fragment}"
67
+ cost = @connection.estimate_cost count_sql, *args
68
+
69
+ cost >= COUNT_BY_ESTIMATE_COST_THRESHOLD
70
+ end
49
71
  end
50
72
 
73
+ public
74
+
51
75
  def count_by_estimate(sql_fragment)
52
- return count_by(sql_fragment)
76
+ expect! sql_fragment => String
53
77
 
54
- # The code below runs an estimate on the effort to count by a group. This is currently
55
- # disabled (see https://issues.mediafellows.com/issues/75237).
78
+ return count_by(sql_fragment) unless use_count_by_estimate?(sql_fragment)
56
79
 
57
- sql = order_by(nil).to_sql(pagination: false)
58
- cost = @connection.estimate_cost "SELECT COUNT(*) FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
80
+ # iterate over all groups, estimating the count for each.
81
+ #
82
+ # For larger groups we'll use that estimate - preventing a full table scan.
83
+ # Groups smaller than EXACT_COUNT_THRESHOLD are counted exactly - in the
84
+ # hope that this query can be answered from an index.
85
+
86
+ #
87
+ # Usually Simple::SQL.all normalizes each result row into its first value,
88
+ # if the row only consists of a single value. Here, however, we don't
89
+ # know the width of a group; so to understand this we just add a dummy
90
+ # value to the sql_fragment and then remove it again.
91
+ #
92
+ groups = enumerate_groups("1 AS __dummy__, #{sql_fragment}")
93
+ groups = groups.each(&:shift)
59
94
 
60
- return count_by(sql_fragment) if cost < 10_000
95
+ # no groups? well, then...
96
+ return {} if groups.empty?
61
97
 
62
- # iterate over all groups, estimating the count for each. If the count is
63
- # less than EXACT_COUNT_THRESHOLD we ask for the exact count in that and
64
- # similarily sparse groups.
65
- var_name = "$#{@args.count + 1}"
98
+ #
99
+ # The estimating code only works for groups of size 1. This is a limitation
100
+ # of simple-sql - for larger groups we would have to be able to encode arrays
101
+ # of arrays on their way to the postgres server. We are not able to do that
102
+ # currently.
103
+ #
104
+ group_size = groups.first&.length
105
+ if group_size > 1
106
+ return count_by(sql_fragment)
107
+ end
108
+
109
+ # The code below only works for groups of size 1
110
+ groups = groups.map(&:first)
111
+
112
+ #
113
+ # Now we estimate the count of entries in each group. For large groups we
114
+ # just use the estimate - because it is usually pretty close to being correct.
115
+ # Small groups are collected in the `sparse_groups` array, to be counted
116
+ # exactly later on.
117
+ #
66
118
 
67
119
  counts = {}
120
+
68
121
  sparse_groups = []
69
- enumerate_groups(sql_fragment).each do |group|
70
- scope = @connection.scope("SELECT * FROM (#{sql}) sq WHERE #{sql_fragment}=#{var_name}", *args, group)
122
+ base_sql = order_by(nil).to_sql(pagination: false)
123
+
124
+ var_name = "$#{@args.count + 1}"
125
+
126
+ groups.each do |group|
127
+ scope = @connection.scope("SELECT * FROM (#{base_sql}) sq WHERE #{sql_fragment}=#{var_name}", args + [group])
128
+
71
129
  estimated_count = scope.send(:estimated_count)
72
130
  counts[group] = estimated_count
73
131
  sparse_groups << group if estimated_count < EXACT_COUNT_THRESHOLD
@@ -77,7 +135,7 @@ class Simple::SQL::Connection::Scope
77
135
  unless sparse_groups.empty?
78
136
  sparse_counts = @connection.all <<~SQL, *args, sparse_groups
79
137
  SELECT #{sql_fragment} AS group, COUNT(*) AS count
80
- FROM (#{sql}) sq
138
+ FROM (#{base_sql}) sq
81
139
  WHERE #{sql_fragment} = ANY(#{var_name})
82
140
  GROUP BY #{sql_fragment}
83
141
  SQL
@@ -1,44 +1,63 @@
1
1
  require "spec_helper"
2
2
 
3
3
  describe "Simple::SQL::Connection::Scope#count_by" do
4
- let!(:users) { 1.upto(10).map { |i| create(:user, role_id: i) } }
5
- let(:all_role_ids) { SQL.all("SELECT DISTINCT role_id FROM users") }
6
- let(:scope) { SQL.scope("SELECT * FROM users") }
4
+ let!(:users) { 1.upto(10).map { |i| create(:user, role_id: i) } }
5
+ let(:scope) { SQL.scope("SELECT * FROM users") }
6
+
7
+ let(:all_role_ids) { 1.upto(10).to_a }
8
+ let(:all_role_ids_w_squares) { all_role_ids.map { |role_id| [role_id, role_id*role_id] } }
9
+
10
+ before do
11
+ # initially we have 10 users, one per role_id in the range 1 .. 10
12
+ # This adds another 3 users with role_id of 1.
13
+ create(:user, role_id: 1)
14
+ create(:user, role_id: 1)
15
+ create(:user, role_id: 1)
16
+ end
7
17
 
8
18
  describe "enumerate_groups" do
9
- it "returns all groups" do
19
+ it "returns all groups by a single column" do
10
20
  expect(scope.enumerate_groups("role_id")).to contain_exactly(*all_role_ids)
11
- expect(scope.where("role_id < 4").enumerate_groups("role_id")).to contain_exactly(*(1.upto(3).to_a))
21
+ end
22
+
23
+ it "obeys where conditions" do
24
+ expect(scope.where("role_id < $1", 4).enumerate_groups("role_id")).to contain_exactly(1,2,3)
25
+ end
26
+
27
+ it "counts all groups by multiple columns" do
28
+ expect(scope.where("role_id < $1", 4).enumerate_groups("role_id, role_id * role_id")).to contain_exactly([1, 1], [2, 4], [3, 9])
12
29
  end
13
30
  end
14
31
 
15
32
  describe "count_by" do
16
- it "counts all groups" do
17
- create(:user, role_id: 1)
18
- create(:user, role_id: 1)
19
- create(:user, role_id: 1)
20
-
33
+ it "counts all groups by a single column" do
21
34
  expect(scope.count_by("role_id")).to include(1 => 4)
22
35
  expect(scope.count_by("role_id")).to include(2 => 1)
23
36
  expect(scope.count_by("role_id").keys).to contain_exactly(*all_role_ids)
24
37
  end
38
+
39
+ it "counts all groups by multiple columns" do
40
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([1,1] => 4)
41
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([2, 4] => 1)
42
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
43
+ end
25
44
  end
26
45
 
27
46
  describe "count_by_estimate" do
28
47
  before do
29
- # 10_000 is chosen "magically". It is large enough to switch to the fast algorithm,
30
- # but
31
- allow(::Simple::SQL).to receive(:costs).and_return([0, 10_000])
48
+ expect_any_instance_of(Simple::SQL::Connection).to receive(:estimate_cost).at_least(:once).and_return(10_000)
32
49
  end
33
-
34
- it "counts all groups" do
35
- create(:user, role_id: 1)
36
- create(:user, role_id: 1)
37
- create(:user, role_id: 1)
38
50
 
51
+ it "counts all groups by a single column" do
39
52
  expect(scope.count_by_estimate("role_id")).to include(1 => 4)
40
53
  expect(scope.count_by_estimate("role_id")).to include(2 => 1)
41
54
  expect(scope.count_by_estimate("role_id").keys).to contain_exactly(*all_role_ids)
42
55
  end
56
+
57
+ it "counts all groups by multiple columns and conditions" do
58
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([1,1] => 4)
59
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([2, 4] => 1)
60
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
61
+ end
43
62
  end
44
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-sql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.36
4
+ version: 0.5.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - radiospiel
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-04-06 00:00:00.000000000 Z
12
+ date: 2021-04-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pg_array_parser