simple-sql 0.5.32 → 0.5.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fb4af8eb0802b84e83370417d134ff18f82c812dbd9a053853739d9570f92a95
4
- data.tar.gz: 0b023420320187d12f32cb6e1a4a093775a5bd2849015443768b0177392729fb
3
+ metadata.gz: eb13412c57185b5bd356c4dc94f52232b22bdef58e032d67ceb79127811ec639
4
+ data.tar.gz: adc67bad73619d640834d8b821676bfbc5530ca003adfcabe3012535350cea39
5
5
  SHA512:
6
- metadata.gz: 996461571d090b5cf7589dfc6f1d12925b11b54d047fc2ec2395bbb6a2d75e583a7f82d0cc92985982891d3254e3e26a83260f6a38be5bee0eac5872effb4609
7
- data.tar.gz: 57decbf1a834422bdfc2987056c3fa37f04988de7059ed8c6eb8aa64ffc16c445a19f1dc9e91f880eac8a2ea7e8772dcfb8b1aeaf43b57ca7f13d4662ceaf177
6
+ metadata.gz: 1c5597472257a4b487c2b9b1a87545cb37d70e1e625cdd31ced80d3aaf650f547ccd6931cac3e9e81e97df25caaa6fe3be519ae905ea206b5a573717fcbc3a18
7
+ data.tar.gz: a69336914733676aa4465489b71576b1274284337d9295cc786d5e050a72942fe8b54f90cb6eccad7165e8f24ad635e6376342e49dd63bf4d2cbe174fc173620
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.32
1
+ 0.5.37
@@ -19,45 +19,113 @@ class Simple::SQL::Connection::Scope
19
19
  #
20
20
  def enumerate_groups(sql_fragment)
21
21
  sql = order_by(nil).to_sql(pagination: false)
22
+ @connection.all "SELECT DISTINCT #{sql_fragment} FROM (#{sql}) sq", *args
23
+ end
22
24
 
23
- cost = @connection.estimate_cost "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq", *args
24
- raise "enumerate_groups(#{sql_fragment.inspect}) takes too much time. Make sure to create a suitable index" if cost > 10_000
25
+ def count_by(sql_fragment)
26
+ expect! sql_fragment => String
25
27
 
26
- groups = []
27
- var_name = "$#{@args.count + 1}"
28
- cur = @connection.ask "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq", *args
28
+ sql = order_by(nil).to_sql(pagination: false)
29
29
 
30
- while cur
31
- groups << cur
32
- cur = @connection.ask "SELECT MIN(#{sql_fragment}) FROM (#{sql}) sq"" WHERE #{sql_fragment} > #{var_name}", *args, cur
33
- end
30
+ recs = @connection.all "SELECT COUNT(*) AS count, #{sql_fragment} AS group FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
34
31
 
35
- groups
32
+ # if we count by a single value (e.g. `count_by("role_id")`) each entry in recs consists of an array [group_value, count].
33
+ # The resulting Hash will have entries of group_value => count.
34
+ if recs.first&.length == 2
35
+ recs.each_with_object({}) do |count_and_group, hsh|
36
+ count, group = *count_and_group
37
+ hsh[group] = count
38
+ end
39
+ else
40
+ recs.each_with_object({}) do |count_and_group, hsh|
41
+ count, *group = *count_and_group
42
+ hsh[group] = count
43
+ end
44
+ end
36
45
  end
37
46
 
38
- def count_by(sql_fragment)
39
- sql = order_by(nil).to_sql(pagination: false)
47
+ private
48
+
49
+ # cost estimate threshold for count_by method. Can be set to false, true, or
50
+ # a number.
51
+ #
52
+ # Note that cost estimates are problematic, since they are not reported in
53
+ # any "real" unit, meaning any comparison really is a bit pointless.
54
+ COUNT_BY_ESTIMATE_COST_THRESHOLD = 10_000
40
55
 
41
- recs = @connection.all "SELECT #{sql_fragment} AS group, COUNT(*) AS count FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
42
- Hash[recs]
56
+ # estimates the cost to run a sql query. If COUNT_BY_ESTIMATE_COST_THRESHOLD
57
+ # is set and the cost estimate is less than COUNT_BY_ESTIMATE_COST_THRESHOLD
58
+ # \a count_by_estimate is using the estimating code path.
59
+ def use_count_by_estimate?(sql_group_by_fragment)
60
+ case COUNT_BY_ESTIMATE_COST_THRESHOLD
61
+ when true then true
62
+ when false then false
63
+ else
64
+ # estimate the effort to exact counting over all groups.
65
+ base_sql = order_by(nil).to_sql(pagination: false)
66
+ count_sql = "SELECT COUNT(*) FROM (#{base_sql}) sq GROUP BY #{sql_group_by_fragment}"
67
+ cost = @connection.estimate_cost count_sql, *args
68
+
69
+ cost >= COUNT_BY_ESTIMATE_COST_THRESHOLD
70
+ end
43
71
  end
44
72
 
73
+ public
74
+
45
75
  def count_by_estimate(sql_fragment)
46
- sql = order_by(nil).to_sql(pagination: false)
76
+ expect! sql_fragment => String
47
77
 
48
- cost = @connection.estimate_cost "SELECT COUNT(*) FROM (#{sql}) sq GROUP BY #{sql_fragment}", *args
78
+ return count_by(sql_fragment) unless use_count_by_estimate?(sql_fragment)
49
79
 
50
- return count_by(sql_fragment) if cost < 10_000
80
+ # iterate over all groups, estimating the count for each.
81
+ #
82
+ # For larger groups we'll use that estimate - preventing a full table scan.
83
+ # Groups smaller than EXACT_COUNT_THRESHOLD are counted exactly - in the
84
+ # hope that this query can be answered from an index.
51
85
 
52
- # iterate over all groups, estimating the count for each. If the count is
53
- # less than EXACT_COUNT_THRESHOLD we ask for the exact count in that and
54
- # similarily sparse groups.
55
- var_name = "$#{@args.count + 1}"
86
+ #
87
+ # Usually Simple::SQL.all normalizes each result row into its first value,
88
+ # if the row only consists of a single value. Here, however, we don't
89
+ # know the width of a group; so to understand this we just add a dummy
90
+ # value to the sql_fragment and then remove it again.
91
+ #
92
+ groups = enumerate_groups("1 AS __dummy__, #{sql_fragment}")
93
+ groups = groups.each(&:shift)
94
+
95
+ # no groups? well, then...
96
+ return {} if groups.empty?
97
+
98
+ #
99
+ # The estimating code only works for groups of size 1. This is a limitation
100
+ # of simple-sql - for larger groups we would have to be able to encode arrays
101
+ # of arrays on their way to the postgres server. We are not able to do that
102
+ # currently.
103
+ #
104
+ group_size = groups.first&.length
105
+ if group_size > 1
106
+ return count_by(sql_fragment)
107
+ end
108
+
109
+ # The code below only works for groups of size 1
110
+ groups = groups.map(&:first)
111
+
112
+ #
113
+ # Now we estimate the count of entries in each group. For large groups we
114
+ # just use the estimate - because it is usually pretty close to being correct.
115
+ # Small groups are collected in the `sparse_groups` array, to be counted
116
+ # exactly later on.
117
+ #
56
118
 
57
119
  counts = {}
120
+
58
121
  sparse_groups = []
59
- enumerate_groups(sql_fragment).each do |group|
60
- scope = @connection.scope("SELECT * FROM (#{sql}) sq WHERE #{sql_fragment}=#{var_name}", *args, group)
122
+ base_sql = order_by(nil).to_sql(pagination: false)
123
+
124
+ var_name = "$#{@args.count + 1}"
125
+
126
+ groups.each do |group|
127
+ scope = @connection.scope("SELECT * FROM (#{base_sql}) sq WHERE #{sql_fragment}=#{var_name}", args + [group])
128
+
61
129
  estimated_count = scope.send(:estimated_count)
62
130
  counts[group] = estimated_count
63
131
  sparse_groups << group if estimated_count < EXACT_COUNT_THRESHOLD
@@ -67,7 +135,7 @@ class Simple::SQL::Connection::Scope
67
135
  unless sparse_groups.empty?
68
136
  sparse_counts = @connection.all <<~SQL, *args, sparse_groups
69
137
  SELECT #{sql_fragment} AS group, COUNT(*) AS count
70
- FROM (#{sql}) sq
138
+ FROM (#{base_sql}) sq
71
139
  WHERE #{sql_fragment} = ANY(#{var_name})
72
140
  GROUP BY #{sql_fragment}
73
141
  SQL
@@ -2,12 +2,14 @@
2
2
 
3
3
  module Simple::SQL::MonkeyPatches
4
4
  def self.warn(msg)
5
+ return if ENV["SIMPLE_SQL_SILENCE"] == "1"
6
+
5
7
  @@warned ||= {}
6
8
  return if @@warned[msg]
7
9
 
8
10
  @@warned[msg] = true
9
11
 
10
- STDERR.puts "== monkeypatch warning: #{msg}"
12
+ STDERR.puts "== monkeypatch warning: #{msg} (set SIMPLE_SQL_SILENCE=1 to disable)"
11
13
  end
12
14
  end
13
15
 
@@ -0,0 +1,29 @@
1
+ require "benchmark"
2
+ require "simple-sql"
3
+
4
+ ENV["DATABASE_URL"] = "postgres://admin:admin@localhost/um_development"
5
+
6
+ Simple::SQL.connect
7
+ # require 'stackprof'# added
8
+
9
+ Benchmark.bmbm do |x|
10
+ x.report("1000x Simple::SQL HStore performance for 100 users") do
11
+ 1000.times { Simple::SQL.all("SELECT id, meta_data FROM users limit 100") }
12
+ end
13
+
14
+ x.report("1000x Simple::SQL HStore::varchar performance for 100 users") do
15
+ 1000.times { Simple::SQL.all("SELECT id, meta_data::varchar FROM users limit 100") }
16
+ end
17
+
18
+ x.report("1000x Simple::SQL HStore as jsonb performance for 100 users") do
19
+ 1000.times { Simple::SQL.all("SELECT id, to_jsonb(meta_data) FROM users limit 100") }
20
+ end
21
+
22
+ x.report("1000x Simple::SQL timestamp performance for 100 users") do
23
+ 1000.times { Simple::SQL.all("SELECT id, created_at FROM users limit 100") }
24
+ end
25
+
26
+ x.report("1000x Simple::SQL timestamp::varchar performance for 100 users") do
27
+ 1000.times { Simple::SQL.all("SELECT id, created_at::varchar FROM users limit 100") }
28
+ end
29
+ end
data/simple-sql.gemspec CHANGED
@@ -33,16 +33,16 @@ Gem::Specification.new do |gem|
33
33
  # during tests we check the SIMPLE_SQL_ACTIVERECORD_SPECS environment setting.
34
34
  # Run make tests to run all tests
35
35
  if ENV["SIMPLE_SQL_ACTIVERECORD_SPECS"]
36
- gem.add_dependency 'activerecord', '> 4.2', *(ENV["SIMPLE_SQL_ACTIVERECORD_SPECS"].split(","))
36
+ gem.add_dependency 'activerecord', '>= 5.2.4.5', *(ENV["SIMPLE_SQL_ACTIVERECORD_SPECS"].split(","))
37
37
  else
38
- gem.add_dependency 'activerecord', '> 4.2', '< 7'
38
+ gem.add_dependency 'activerecord', '>= 5.2.4.5', '< 6.1'
39
39
  end
40
40
 
41
41
  # optional gems (required by some of the parts)
42
42
 
43
43
  # development gems
44
44
  gem.add_development_dependency 'pg', '0.20'
45
- gem.add_development_dependency 'rake', '~> 11'
45
+ gem.add_development_dependency 'rake', '>= 12.3.3'
46
46
  gem.add_development_dependency 'rspec', '~> 3.7'
47
47
  gem.add_development_dependency 'rubocop', '~> 0.61.1'
48
48
  gem.add_development_dependency 'simplecov', '~> 0'
@@ -1,44 +1,63 @@
1
1
  require "spec_helper"
2
2
 
3
3
  describe "Simple::SQL::Connection::Scope#count_by" do
4
- let!(:users) { 1.upto(10).map { |i| create(:user, role_id: i) } }
5
- let(:all_role_ids) { SQL.all("SELECT DISTINCT role_id FROM users") }
6
- let(:scope) { SQL.scope("SELECT * FROM users") }
4
+ let!(:users) { 1.upto(10).map { |i| create(:user, role_id: i) } }
5
+ let(:scope) { SQL.scope("SELECT * FROM users") }
6
+
7
+ let(:all_role_ids) { 1.upto(10).to_a }
8
+ let(:all_role_ids_w_squares) { all_role_ids.map { |role_id| [role_id, role_id*role_id] } }
9
+
10
+ before do
11
+ # initially we have 10 users, one per role_id in the range 1 .. 10
12
+ # This adds another 3 users with role_id of 1.
13
+ create(:user, role_id: 1)
14
+ create(:user, role_id: 1)
15
+ create(:user, role_id: 1)
16
+ end
7
17
 
8
18
  describe "enumerate_groups" do
9
- it "returns all groups" do
19
+ it "returns all groups by a single column" do
10
20
  expect(scope.enumerate_groups("role_id")).to contain_exactly(*all_role_ids)
11
- expect(scope.where("role_id < 4").enumerate_groups("role_id")).to contain_exactly(*(1.upto(3).to_a))
21
+ end
22
+
23
+ it "obeys where conditions" do
24
+ expect(scope.where("role_id < $1", 4).enumerate_groups("role_id")).to contain_exactly(1,2,3)
25
+ end
26
+
27
+ it "counts all groups by multiple columns" do
28
+ expect(scope.where("role_id < $1", 4).enumerate_groups("role_id, role_id * role_id")).to contain_exactly([1, 1], [2, 4], [3, 9])
12
29
  end
13
30
  end
14
31
 
15
32
  describe "count_by" do
16
- it "counts all groups" do
17
- create(:user, role_id: 1)
18
- create(:user, role_id: 1)
19
- create(:user, role_id: 1)
20
-
33
+ it "counts all groups by a single column" do
21
34
  expect(scope.count_by("role_id")).to include(1 => 4)
22
35
  expect(scope.count_by("role_id")).to include(2 => 1)
23
36
  expect(scope.count_by("role_id").keys).to contain_exactly(*all_role_ids)
24
37
  end
38
+
39
+ it "counts all groups by multiple columns" do
40
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([1,1] => 4)
41
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id")).to include([2, 4] => 1)
42
+ expect(scope.where("role_id < $1", 4).count_by("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
43
+ end
25
44
  end
26
45
 
27
46
  describe "count_by_estimate" do
28
47
  before do
29
- # 10_000 is chosen "magically". It is large enough to switch to the fast algorithm,
30
- # but
31
- allow(::Simple::SQL).to receive(:costs).and_return([0, 10_000])
48
+ expect_any_instance_of(Simple::SQL::Connection).to receive(:estimate_cost).at_least(:once).and_return(10_000)
32
49
  end
33
-
34
- it "counts all groups" do
35
- create(:user, role_id: 1)
36
- create(:user, role_id: 1)
37
- create(:user, role_id: 1)
38
50
 
51
+ it "counts all groups by a single column" do
39
52
  expect(scope.count_by_estimate("role_id")).to include(1 => 4)
40
53
  expect(scope.count_by_estimate("role_id")).to include(2 => 1)
41
54
  expect(scope.count_by_estimate("role_id").keys).to contain_exactly(*all_role_ids)
42
55
  end
56
+
57
+ it "counts all groups by multiple columns and conditions" do
58
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([1,1] => 4)
59
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id")).to include([2, 4] => 1)
60
+ expect(scope.where("role_id < $1", 4).count_by_estimate("role_id, role_id * role_id").keys).to contain_exactly([1, 1], [2, 4], [3, 9])
61
+ end
43
62
  end
44
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-sql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.32
4
+ version: 0.5.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - radiospiel
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-02-02 00:00:00.000000000 Z
12
+ date: 2021-04-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pg_array_parser
@@ -91,22 +91,22 @@ dependencies:
91
91
  name: activerecord
92
92
  requirement: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ">"
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
- version: '4.2'
96
+ version: 5.2.4.5
97
97
  - - "<"
98
98
  - !ruby/object:Gem::Version
99
- version: '7'
99
+ version: '6.1'
100
100
  type: :runtime
101
101
  prerelease: false
102
102
  version_requirements: !ruby/object:Gem::Requirement
103
103
  requirements:
104
- - - ">"
104
+ - - ">="
105
105
  - !ruby/object:Gem::Version
106
- version: '4.2'
106
+ version: 5.2.4.5
107
107
  - - "<"
108
108
  - !ruby/object:Gem::Version
109
- version: '7'
109
+ version: '6.1'
110
110
  - !ruby/object:Gem::Dependency
111
111
  name: pg
112
112
  requirement: !ruby/object:Gem::Requirement
@@ -125,16 +125,16 @@ dependencies:
125
125
  name: rake
126
126
  requirement: !ruby/object:Gem::Requirement
127
127
  requirements:
128
- - - "~>"
128
+ - - ">="
129
129
  - !ruby/object:Gem::Version
130
- version: '11'
130
+ version: 12.3.3
131
131
  type: :development
132
132
  prerelease: false
133
133
  version_requirements: !ruby/object:Gem::Requirement
134
134
  requirements:
135
- - - "~>"
135
+ - - ">="
136
136
  - !ruby/object:Gem::Version
137
- version: '11'
137
+ version: 12.3.3
138
138
  - !ruby/object:Gem::Dependency
139
139
  name: rspec
140
140
  requirement: !ruby/object:Gem::Requirement
@@ -235,6 +235,7 @@ files:
235
235
  - lib/simple/sql/table_print.rb
236
236
  - lib/simple/sql/version.rb
237
237
  - log/.gitkeep
238
+ - scripts/benchmark1.rb
238
239
  - scripts/release
239
240
  - scripts/release.rb
240
241
  - scripts/stats