simple-sql 0.5.32 → 0.5.37

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fb4af8eb0802b84e83370417d134ff18f82c812dbd9a053853739d9570f92a95
4
- data.tar.gz: 0b023420320187d12f32cb6e1a4a093775a5bd2849015443768b0177392729fb
3
+ metadata.gz: eb13412c57185b5bd356c4dc94f52232b22bdef58e032d67ceb79127811ec639
4
+ data.tar.gz: adc67bad73619d640834d8b821676bfbc5530ca003adfcabe3012535350cea39
5
5
  SHA512:
6
- metadata.gz: 996461571d090b5cf7589dfc6f1d12925b11b54d047fc2ec2395bbb6a2d75e583a7f82d0cc92985982891d3254e3e26a83260f6a38be5bee0eac5872effb4609
7
- data.tar.gz: 57decbf1a834422bdfc2987056c3fa37f04988de7059ed8c6eb8aa64ffc16c445a19f1dc9e91f880eac8a2ea7e8772dcfb8b1aeaf43b57ca7f13d4662ceaf177
6
+ metadata.gz: 1c5597472257a4b487c2b9b1a87545cb37d70e1e625cdd31ced80d3aaf650f547ccd6931cac3e9e81e97df25caaa6fe3be519ae905ea206b5a573717fcbc3a18
7
+ data.tar.gz: a69336914733676aa4465489b71576b1274284337d9295cc786d5e050a72942fe8b54f90cb6eccad7165e8f24ad635e6376342e49dd63bf4d2cbe174fc173620
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.32
1
+ 0.5.37
@@ -19,45 +19,113 @@ class Simple::SQL::Connection::Scope
19
19
  #
20
20
  def enumerate_groups(sql_fragment)
21
21
  sql = order_by(nil).to_sql(pagination: false)
22
+ @connection.all "SELECT DISTINCT #{sql_fragment} FROM (#{sql}) sq", *args
23
+ end
22
24
 
23
- cost = @connection.estimate_cost "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq", *args
24
- raise "enumerate_groups(#{sql_fragment.inspect}) takes too much time. Make sure to create a suitable index" if cost > 10_000
25
+ def count_by(sql_fragment)
26
+ expect! sql_fragment => String
25
27
 
26
- groups = []
27
- var_name = "$#{@args.count + 1}"
28
- cur = @connection.ask "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq", *args
28
+ sql = order_by(nil).to_sql(pagination: false)
29
29
 
30
- while cur
31
- groups << cur
32
- cur = @connection.ask "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq"" WHERE #{sql_fragment} > #{var_name}", *args, cur
33
- end
30
+ recs = @connection.all "SELECT COUNT(*) AS count, #{sql_fragment} AS group FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
34
31
 
35
- groups
32
+ # if we count by a single value (e.g. `count_by("role_id")`) each entry in recs consists of an array [group_value, count].
33
+ # The resulting Hash will have entries of group_value => count.
34
+ if recs.first&.length == 2
35
+ recs.each_with_object({}) do |count_and_group, hsh|
36
+ count, group = *count_and_group
37
+ hsh[group] = count
38
+ end
39
+ else
40
+ recs.each_with_object({}) do |count_and_group, hsh|
41
+ count, *group = *count_and_group
42
+ hsh[group] = count
43
+ end
44
+ end
36
45
  end
37
46
 
38
- def count_by(sql_fragment)
39
- sql = order_by(nil).to_sql(pagination: false)
47
+ private
48
+
49
+ # cost estimate threshold for count_by method. Can be set to false, true, or
50
+ # a number.
51
+ #
52
+ # Note that cost estimates are problematic, since they are not reported in
53
+ # any "real" unit, meaning any comparison really is a bit pointless.
54
+ COUNT_BY_ESTIMATE_COST_THRESHOLD = 10_000
40
55
 
41
- recs = @connection.all "SELECT #{sql_fragment} AS group, COUNT(*) AS count FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
42
- Hash[recs]
56
+ # estimates the cost to run a sql query. If COUNT_BY_ESTIMATE_COST_THRESHOLD
57
+ # is set and the cost estimate is less than COUNT_BY_ESTIMATE_COST_THRESHOLD
58
+ # \a count_by_estimate is using the estimating code path.
59
+ def use_count_by_estimate?(sql_group_by_fragment)
60
+ case COUNT_BY_ESTIMATE_COST_THRESHOLD
61
+ when true then true
62
+ when false then false
63
+ else
64
+ # estimate the effort to exact counting over all groups.
65
+ base_sql = order_by(nil).to_sql(pagination: false)
66
+ count_sql = "SELECT COUNT(*) FROM (#{base_sql}) sq GROUP BY #{sql_group_by_fragment}"
67
+ cost = @connection.estimate_cost count_sql, *args
68
+
69
+ cost >= COUNT_BY_ESTIMATE_COST_THRESHOLD
70
+ end
43
71
  end
44
72
 
73
+ public
74
+
45
75
  def count_by_estimate(sql_fragment)
46
- sql = order_by(nil).to_sql(pagination: false)
76
+ expect! sql_fragment => String
47
77
 
48
- cost = @connection.estimate_cost "SELECT COUNT(*) FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
78
+ return count_by(sql_fragment) unless use_count_by_estimate?(sql_fragment)
49
79
 
50
- return count_by(sql_fragment) if cost < 10_000
80
+ # iterate over all groups, estimating the count for each.
81
+ #
82
+ # For larger groups we'll use that estimate - preventing a full table scan.
83
+ # Groups smaller than EXACT_COUNT_THRESHOLD are counted exactly - in the
84
+ # hope that this query can be answered from an index.
51
85
 
52
- # iterate over all groups, estimating the count for each. If the count is
53
- # less than EXACT_COUNT_THRESHOLD we ask for the exact count in that and
54
- # similarily sparse groups.
55
- var_name = "$#{@args.count + 1}"
86
+ #
87
+ # Usually Simple::SQL.all normalizes each result row into its first value,
88
+ # if the row only consists of a single value. Here, however, we don't
89
+ # know the width of a group; so to understand this we just add a dummy
90
+ # value to the sql_fragment and then remove it again.
91
+ #
92
+ groups = enumerate_groups("1 AS __dummy__, #{sql_fragment}")
93
+ groups = groups.each(&:shift)
94
+
95
+ # no groups? well, then...
96
+ return {} if groups.empty?
97
+
98
+ #
99
+ # The estimating code only works for groups of size 1. This is a limitation
100
+ # of simple-sql - for larger groups we would have to be able to encode arrays
101
+ # of arrays on their way to the postgres server. We are not able to do that
102
+ # currently.
103
+ #
104
+ group_size = groups.first&.length
105
+ if group_size > 1
106
+ return count_by(sql_fragment)
107
+ end
108
+
109
+ # The code below only works for groups of size 1
110
+ groups = groups.map(&:first)
111
+
112
+ #
113
+ # Now we estimate the count of entries in each group. For large groups we
114
+ # just use the estimate - because it is usually pretty close to being correct.
115
+ # Small groups are collected in the `sparse_groups` array, to be counted
116
+ # exactly later on.
117
+ #
56
118
 
57
119
  counts = {}
120
+
58
121
  sparse_groups = []
59
- enumerate_groups(sql_fragment).each do |group|
60
- scope = @connection.scope("SELECT * FROM (#{sql}) sq WHERE #{sql_fragment}=#{var_name}", *args, group)
122
+ base_sql = order_by(nil).to_sql(pagination: false)
123
+
124
+ var_name = "$#{@args.count + 1}"
125
+
126
+ groups.each do |group|
127
+ scope = @connection.scope("SELECT * FROM (#{base_sql}) sq WHERE #{sql_fragment}=#{var_name}", args + [group])
128
+
61
129
  estimated_count = scope.send(:estimated_count)
62
130
  counts[group] = estimated_count
63
131
  sparse_groups << group if estimated_count < EXACT_COUNT_THRESHOLD
@@ -67,7 +135,7 @@ class Simple::SQL::Connection::Scope
67
135
  unless sparse_groups.empty?
68
136
  sparse_counts = @connection.all <<~SQL, *args, sparse_groups
69
137
  SELECT #{sql_fragment} AS group, COUNT(*) AS count
70
- FROM (#{sql}) sq
138
+ FROM (#{base_sql}) sq
71
139
  WHERE #{sql_fragment} = ANY(#{var_name})
72
140
  GROUP BY #{sql_fragment}
73
141
  SQL
@@ -2,12 +2,14 @@
2
2
 
3
3
  module Simple::SQL::MonkeyPatches
4
4
  def self.warn(msg)
5
+ return if ENV["SIMPLE_SQL_SILENCE"] == "1"
6
+
5
7
  @@warned ||= {}
6
8
  return if @@warned[msg]
7
9
 
8
10
  @@warned[msg] = true
9
11
 
10
- STDERR.puts "== monkeypatch warning: #{msg}"
12
+ STDERR.puts "== monkeypatch warning: #{msg} (set SIMPLE_SQL_SILENCE=1 to disable)"
11
13
  end
12
14
  end
13
15
 
@@ -0,0 +1,29 @@
1
+ require "benchmark"
2
+ require "simple-sql"
3
+
4
+ ENV["DATABASE_URL"] = "postgres://admin:admin@localhost/um_development"
5
+
6
+ Simple::SQL.connect
7
+ # require 'stackprof'# added
8
+
9
+ Benchmark.bmbm do |x|
10
+ x.report("1000x Simple::SQL HStore performance for 100 users") do
11
+ 1000.times { Simple::SQL.all("SELECT id, meta_data FROM users limit 100") }
12
+ end
13
+
14
+ x.report("1000x Simple::SQL HStore::varchar performance for 100 users") do
15
+ 1000.times { Simple::SQL.all("SELECT id, meta_data::varchar FROM users limit 100") }
16
+ end
17
+
18
+ x.report("1000x Simple::SQL HStore as jsonb performance for 100 users") do
19
+ 1000.times { Simple::SQL.all("SELECT id, to_jsonb(meta_data) FROM users limit 100") }
20
+ end
21
+
22
+ x.report("1000x Simple::SQL timestamp performance for 100 users") do
23
+ 1000.times { Simple::SQL.all("SELECT id, created_at FROM users limit 100") }
24
+ end
25
+
26
+ x.report("1000x Simple::SQL timestamp::varchar performance for 100 users") do
27
+ 1000.times { Simple::SQL.all("SELECT id, created_at::varchar FROM users limit 100") }
28
+ end
29
+ end
data/simple-sql.gemspec CHANGED
@@ -33,16 +33,16 @@ Gem::Specification.new do |gem|
33
33
  # during tests we check the SIMPLE_SQL_ACTIVERECORD_SPECS environment setting.
34
34
  # Run make tests to run all tests
35
35
  if ENV["SIMPLE_SQL_ACTIVERECORD_SPECS"]
36
- gem.add_dependency 'activerecord', '> 4.2', *(ENV["SIMPLE_SQL_ACTIVERECORD_SPECS"].split(","))
36
+ gem.add_dependency 'activerecord', '>= 5.2.4.5', *(ENV["SIMPLE_SQL_ACTIVERECORD_SPECS"].split(","))
37
37
  else
38
- gem.add_dependency 'activerecord', '> 4.2', '< 7'
38
+ gem.add_dependency 'activerecord', '>= 5.2.4.5', '< 6.1'
39
39
  end
40
40
 
41
41
  # optional gems (required by some of the parts)
42
42
 
43
43
  # development gems
44
44
  gem.add_development_dependency 'pg', '0.20'
45
- gem.add_development_dependency 'rake', '~> 11'
45
+ gem.add_development_dependency 'rake', '>= 12.3.3'
46
46
  gem.add_development_dependency 'rspec', '~> 3.7'
47
47
  gem.add_development_dependency 'rubocop', '~> 0.61.1'
48
48
  gem.add_development_dependency 'simplecov', '~> 0'
@@ -1,44 +1,63 @@
1
1
  require "spec_helper"
2
2
 
3
3
  describe "Simple::SQL::Connection::Scope#count_by" do
4
- let!(:users) { 1.upto(10).map { |i| create(:user, role_id: i) } }
5
- let(:all_role_ids) { SQL.all("SELECT DISTINCT role_id FROM users") }
6
- let(:scope) { SQL.scope("SELECT * FROM users") }
4
+ let!(:users) { 1.upto(10).map { |i| create(:user, role_id: i) } }
5
+ let(:scope) { SQL.scope("SELECT * FROM users") }
6
+
7
+ let(:all_role_ids) { 1.upto(10).to_a }
8
+ let(:all_role_ids_w_squares) { all_role_ids.map { |role_id| [role_id, role_id*role_id] } }
9
+
10
+ before do
11
+ # initially we have 10 users, one per role_id in the range 1 .. 10
12
+ # This adds another 3 users with role_id of 1.
13
+ create(:user, role_id: 1)
14
+ create(:user, role_id: 1)
15
+ create(:user, role_id: 1)
16
+ end
7
17
 
8
18
  describe "enumerate_groups" do
9
- it "returns all groups" do
19
+ it "returns all groups by a single column" do
10
20
  expect(scope.enumerate_groups("role_id")).to contain_exactly(*all_role_ids)
11
- expect(scope.where("role_id < 4").enumerate_groups("role_id")).to contain_exactly(*(1.upto(3).to_a))
21
+ end
22
+
23
+ it "obeys where conditions" do
24
+ expect(scope.where("role_id < $1", 4).enumerate_groups("role_id")).to contain_exactly(1,2,3)
25
+ end
26
+
27
+ it "counts all groups by multiple columns" do
28
+ expect(scope.where("role_id < $1", 4).enumerate_groups("role_id, role_id * role_id")).to contain_exactly([1, 1], [2, 4], [3, 9])
12
29
  end
13
30
  end
14
31
 
15
32
  describe "count_by" do
16
- it "counts all groups" do
17
- create(:user, role_id: 1)
18
- create(:user, role_id: 1)
19
- create(:user, role_id: 1)
20
-
33
+ it "counts all groups by a single column" do
21
34
  expect(scope.count_by("role_id")).to include(1 => 4)
22
35
  expect(scope.count_by("role_id")).to include(2 => 1)
23
36
  expect(scope.count_by("role_id").keys).to contain_exactly(*all_role_ids)
24
37
  end
38
+
39
+ it "counts all groups by multiple columns" do
40
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([1,1] => 4)
41
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([2, 4] => 1)
42
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
43
+ end
25
44
  end
26
45
 
27
46
  describe "count_by_estimate" do
28
47
  before do
29
- # 10_000 is chosen "magically". It is large enough to switch to the fast algorithm,
30
- # but
31
- allow(::Simple::SQL).to receive(:costs).and_return([0, 10_000])
48
+ expect_any_instance_of(Simple::SQL::Connection).to receive(:estimate_cost).at_least(:once).and_return(10_000)
32
49
  end
33
-
34
- it "counts all groups" do
35
- create(:user, role_id: 1)
36
- create(:user, role_id: 1)
37
- create(:user, role_id: 1)
38
50
 
51
+ it "counts all groups by a single column" do
39
52
  expect(scope.count_by_estimate("role_id")).to include(1 => 4)
40
53
  expect(scope.count_by_estimate("role_id")).to include(2 => 1)
41
54
  expect(scope.count_by_estimate("role_id").keys).to contain_exactly(*all_role_ids)
42
55
  end
56
+
57
+ it "counts all groups by multiple columns and conditions" do
58
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([1,1] => 4)
59
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([2, 4] => 1)
60
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
61
+ end
43
62
  end
44
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-sql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.32
4
+ version: 0.5.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - radiospiel
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-02-02 00:00:00.000000000 Z
12
+ date: 2021-04-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pg_array_parser
@@ -91,22 +91,22 @@ dependencies:
91
91
  name: activerecord
92
92
  requirement: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ">"
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
- version: '4.2'
96
+ version: 5.2.4.5
97
97
  - - "<"
98
98
  - !ruby/object:Gem::Version
99
- version: '7'
99
+ version: '6.1'
100
100
  type: :runtime
101
101
  prerelease: false
102
102
  version_requirements: !ruby/object:Gem::Requirement
103
103
  requirements:
104
- - - ">"
104
+ - - ">="
105
105
  - !ruby/object:Gem::Version
106
- version: '4.2'
106
+ version: 5.2.4.5
107
107
  - - "<"
108
108
  - !ruby/object:Gem::Version
109
- version: '7'
109
+ version: '6.1'
110
110
  - !ruby/object:Gem::Dependency
111
111
  name: pg
112
112
  requirement: !ruby/object:Gem::Requirement
@@ -125,16 +125,16 @@ dependencies:
125
125
  name: rake
126
126
  requirement: !ruby/object:Gem::Requirement
127
127
  requirements:
128
- - - "~>"
128
+ - - ">="
129
129
  - !ruby/object:Gem::Version
130
- version: '11'
130
+ version: 12.3.3
131
131
  type: :development
132
132
  prerelease: false
133
133
  version_requirements: !ruby/object:Gem::Requirement
134
134
  requirements:
135
- - - "~>"
135
+ - - ">="
136
136
  - !ruby/object:Gem::Version
137
- version: '11'
137
+ version: 12.3.3
138
138
  - !ruby/object:Gem::Dependency
139
139
  name: rspec
140
140
  requirement: !ruby/object:Gem::Requirement
@@ -235,6 +235,7 @@ files:
235
235
  - lib/simple/sql/table_print.rb
236
236
  - lib/simple/sql/version.rb
237
237
  - log/.gitkeep
238
+ - scripts/benchmark1.rb
238
239
  - scripts/release
239
240
  - scripts/release.rb
240
241
  - scripts/stats