simple-sql 0.5.36 → 0.5.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5185777df8efad3d0d17334a70d30b35551f4abd7adf0fded3549dd2d1b1325
4
- data.tar.gz: e34ee800440259bfbe8ef88f9f70fb1675dee4a91e441c162a8bdc6313e5d88f
3
+ metadata.gz: eb13412c57185b5bd356c4dc94f52232b22bdef58e032d67ceb79127811ec639
4
+ data.tar.gz: adc67bad73619d640834d8b821676bfbc5530ca003adfcabe3012535350cea39
5
5
  SHA512:
6
- metadata.gz: 9b03b9f204d9bd0e87e0c349f1f2ab962bc94101f14ab33cb2522c0287861e801ddca3cd98e97f4317716df7a07a054f6acf26837309f18bc14b8a50f95eac4a
7
- data.tar.gz: 5eb4fe4c83ca847aef2263251b56f95b3ea450c15abc1e8289ed92bbcaaaa5d5d8a3683814c009c2dd4928d679a4072d10717a616e3cbac2978d59f2d2f453e8
6
+ metadata.gz: 1c5597472257a4b487c2b9b1a87545cb37d70e1e625cdd31ced80d3aaf650f547ccd6931cac3e9e81e97df25caaa6fe3be519ae905ea206b5a573717fcbc3a18
7
+ data.tar.gz: a69336914733676aa4465489b71576b1274284337d9295cc786d5e050a72942fe8b54f90cb6eccad7165e8f24ad635e6376342e49dd63bf4d2cbe174fc173620
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.36
1
+ 0.5.37
@@ -19,55 +19,113 @@ class Simple::SQL::Connection::Scope
19
19
  #
20
20
  def enumerate_groups(sql_fragment)
21
21
  sql = order_by(nil).to_sql(pagination: false)
22
+ @connection.all "SELECT DISTINCT #{sql_fragment} FROM (#{sql}) sq", *args
23
+ end
22
24
 
23
- cost = @connection.estimate_cost "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq", *args
24
-
25
- # cost estimates are good, but are hard to check against a hard coded value.
26
- # see https://issues.mediafellows.com/issues/75232
27
- #
28
- # if cost > 10_000
29
- # raise "enumerate_groups(#{sql_fragment.inspect}) takes too much time. Make sure to create a suitable index"
30
- # end
25
+ def count_by(sql_fragment)
26
+ expect! sql_fragment => String
31
27
 
32
- groups = []
33
- var_name = "$#{@args.count + 1}"
34
- cur = @connection.ask "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq", *args
28
+ sql = order_by(nil).to_sql(pagination: false)
35
29
 
36
- while cur
37
- groups << cur
38
- cur = @connection.ask "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq"" WHERE #{sql_fragment} > #{var_name}", *args, cur
30
+ recs = @connection.all "SELECT COUNT(*) AS count, #{sql_fragment} AS group FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
31
+
32
+ # if we count by a single value (e.g. `count_by("role_id")`) each entry in recs consists of an array [group_value, count].
33
+ # The resulting Hash will have entries of group_value => count.
34
+ if recs.first&.length == 2
35
+ recs.each_with_object({}) do |count_and_group, hsh|
36
+ count, group = *count_and_group
37
+ hsh[group] = count
38
+ end
39
+ else
40
+ recs.each_with_object({}) do |count_and_group, hsh|
41
+ count, *group = *count_and_group
42
+ hsh[group] = count
43
+ end
39
44
  end
40
-
41
- groups
42
45
  end
43
46
 
44
- def count_by(sql_fragment)
45
- sql = order_by(nil).to_sql(pagination: false)
47
+ private
46
48
 
47
- recs = @connection.all "SELECT #{sql_fragment} AS group, COUNT(*) AS count FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
48
- Hash[recs]
49
+ # cost estimate threshold for count_by method. Can be set to false, true, or
50
+ # a number.
51
+ #
52
+ # Note that cost estimates are problematic, since they are not reported in
53
+ # any "real" unit, meaning any comparison really is a bit pointless.
54
+ COUNT_BY_ESTIMATE_COST_THRESHOLD = 10_000
55
+
56
+ # estimates the cost to run a sql query. If COUNT_BY_ESTIMATE_COST_THRESHOLD
57
+ # is set and the cost estimate is less than COUNT_BY_ESTIMATE_COST_THRESHOLD
58
+ # \a count_by_estimate is using the estimating code path.
59
+ def use_count_by_estimate?(sql_group_by_fragment)
60
+ case COUNT_BY_ESTIMATE_COST_THRESHOLD
61
+ when true then true
62
+ when false then false
63
+ else
64
+ # estimate the effort to exact counting over all groups.
65
+ base_sql = order_by(nil).to_sql(pagination: false)
66
+ count_sql = "SELECT COUNT(*) FROM (#{base_sql}) sq GROUP BY #{sql_group_by_fragment}"
67
+ cost = @connection.estimate_cost count_sql, *args
68
+
69
+ cost >= COUNT_BY_ESTIMATE_COST_THRESHOLD
70
+ end
49
71
  end
50
72
 
73
+ public
74
+
51
75
  def count_by_estimate(sql_fragment)
52
- return count_by(sql_fragment)
76
+ expect! sql_fragment => String
53
77
 
54
- # The code below runs an estimate on the effort to count by a group. This is currently
55
- # disabled (see https://issues.mediafellows.com/issues/75237).
78
+ return count_by(sql_fragment) unless use_count_by_estimate?(sql_fragment)
56
79
 
57
- sql = order_by(nil).to_sql(pagination: false)
58
- cost = @connection.estimate_cost "SELECT COUNT(*) FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
80
+ # iterate over all groups, estimating the count for each.
81
+ #
82
+ # For larger groups we'll use that estimate - preventing a full table scan.
83
+ # Groups smaller than EXACT_COUNT_THRESHOLD are counted exactly - in the
84
+ # hope that this query can be answered from an index.
85
+
86
+ #
87
+ # Usually Simple::SQL.all normalizes each result row into its first value,
88
+ # if the row only consists of a single value. Here, however, we don't
89
+ # know the width of a group; so to understand this we just add a dummy
90
+ # value to the sql_fragment and then remove it again.
91
+ #
92
+ groups = enumerate_groups("1 AS __dummy__, #{sql_fragment}")
93
+ groups = groups.each(&:shift)
59
94
 
60
- return count_by(sql_fragment) if cost < 10_000
95
+ # no groups? well, then...
96
+ return {} if groups.empty?
61
97
 
62
- # iterate over all groups, estimating the count for each. If the count is
63
- # less than EXACT_COUNT_THRESHOLD we ask for the exact count in that and
64
- # similarily sparse groups.
65
- var_name = "$#{@args.count + 1}"
98
+ #
99
+ # The estimating code only works for groups of size 1. This is a limitation
100
+ # of simple-sql - for larger groups we would have to be able to encode arrays
101
+ # of arrays on their way to the postgres server. We are not able to do that
102
+ # currently.
103
+ #
104
+ group_size = groups.first&.length
105
+ if group_size > 1
106
+ return count_by(sql_fragment)
107
+ end
108
+
109
+ # The code below only works for groups of size 1
110
+ groups = groups.map(&:first)
111
+
112
+ #
113
+ # Now we estimate the count of entries in each group. For large groups we
114
+ # just use the estimate - because it is usually pretty close to being correct.
115
+ # Small groups are collected in the `sparse_groups` array, to be counted
116
+ # exactly later on.
117
+ #
66
118
 
67
119
  counts = {}
120
+
68
121
  sparse_groups = []
69
- enumerate_groups(sql_fragment).each do |group|
70
- scope = @connection.scope("SELECT * FROM (#{sql}) sq WHERE #{sql_fragment}=#{var_name}", *args, group)
122
+ base_sql = order_by(nil).to_sql(pagination: false)
123
+
124
+ var_name = "$#{@args.count + 1}"
125
+
126
+ groups.each do |group|
127
+ scope = @connection.scope("SELECT * FROM (#{base_sql}) sq WHERE #{sql_fragment}=#{var_name}", args + [group])
128
+
71
129
  estimated_count = scope.send(:estimated_count)
72
130
  counts[group] = estimated_count
73
131
  sparse_groups << group if estimated_count < EXACT_COUNT_THRESHOLD
@@ -77,7 +135,7 @@ class Simple::SQL::Connection::Scope
77
135
  unless sparse_groups.empty?
78
136
  sparse_counts = @connection.all <<~SQL, *args, sparse_groups
79
137
  SELECT #{sql_fragment} AS group, COUNT(*) AS count
80
- FROM (#{sql}) sq
138
+ FROM (#{base_sql}) sq
81
139
  WHERE #{sql_fragment} = ANY(#{var_name})
82
140
  GROUP BY #{sql_fragment}
83
141
  SQL
@@ -1,44 +1,63 @@
1
1
  require "spec_helper"
2
2
 
3
3
  describe "Simple::SQL::Connection::Scope#count_by" do
4
- let!(:users) { 1.upto(10).map { |i| create(:user, role_id: i) } }
5
- let(:all_role_ids) { SQL.all("SELECT DISTINCT role_id FROM users") }
6
- let(:scope) { SQL.scope("SELECT * FROM users") }
4
+ let!(:users) { 1.upto(10).map { |i| create(:user, role_id: i) } }
5
+ let(:scope) { SQL.scope("SELECT * FROM users") }
6
+
7
+ let(:all_role_ids) { 1.upto(10).to_a }
8
+ let(:all_role_ids_w_squares) { all_role_ids.map { |role_id| [role_id, role_id*role_id] } }
9
+
10
+ before do
11
+ # initially we have 10 users, one per role_id in the range 1 .. 10
12
+ # This adds another 3 users with role_id of 1.
13
+ create(:user, role_id: 1)
14
+ create(:user, role_id: 1)
15
+ create(:user, role_id: 1)
16
+ end
7
17
 
8
18
  describe "enumerate_groups" do
9
- it "returns all groups" do
19
+ it "returns all groups by a single column" do
10
20
  expect(scope.enumerate_groups("role_id")).to contain_exactly(*all_role_ids)
11
- expect(scope.where("role_id < 4").enumerate_groups("role_id")).to contain_exactly(*(1.upto(3).to_a))
21
+ end
22
+
23
+ it "obeys where conditions" do
24
+ expect(scope.where("role_id < $1", 4).enumerate_groups("role_id")).to contain_exactly(1,2,3)
25
+ end
26
+
27
+ it "counts all groups by multiple columns" do
28
+ expect(scope.where("role_id < $1", 4).enumerate_groups("role_id, role_id * role_id")).to contain_exactly([1, 1], [2, 4], [3, 9])
12
29
  end
13
30
  end
14
31
 
15
32
  describe "count_by" do
16
- it "counts all groups" do
17
- create(:user, role_id: 1)
18
- create(:user, role_id: 1)
19
- create(:user, role_id: 1)
20
-
33
+ it "counts all groups by a single column" do
21
34
  expect(scope.count_by("role_id")).to include(1 => 4)
22
35
  expect(scope.count_by("role_id")).to include(2 => 1)
23
36
  expect(scope.count_by("role_id").keys).to contain_exactly(*all_role_ids)
24
37
  end
38
+
39
+ it "counts all groups by multiple columns" do
40
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([1,1] => 4)
41
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([2, 4] => 1)
42
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
43
+ end
25
44
  end
26
45
 
27
46
  describe "count_by_estimate" do
28
47
  before do
29
- # 10_000 is chosen "magically". It is large enough to switch to the fast algorithm,
30
- # but
31
- allow(::Simple::SQL).to receive(:costs).and_return([0, 10_000])
48
+ expect_any_instance_of(Simple::SQL::Connection).to receive(:estimate_cost).at_least(:once).and_return(10_000)
32
49
  end
33
-
34
- it "counts all groups" do
35
- create(:user, role_id: 1)
36
- create(:user, role_id: 1)
37
- create(:user, role_id: 1)
38
50
 
51
+ it "counts all groups by a single column" do
39
52
  expect(scope.count_by_estimate("role_id")).to include(1 => 4)
40
53
  expect(scope.count_by_estimate("role_id")).to include(2 => 1)
41
54
  expect(scope.count_by_estimate("role_id").keys).to contain_exactly(*all_role_ids)
42
55
  end
56
+
57
+ it "counts all groups by multiple columns and conditions" do
58
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([1,1] => 4)
59
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([2, 4] => 1)
60
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
61
+ end
43
62
  end
44
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-sql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.36
4
+ version: 0.5.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - radiospiel
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-04-06 00:00:00.000000000 Z
12
+ date: 2021-04-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pg_array_parser