pgslice 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +21 -47
- data/lib/pgslice/cli/add_partitions.rb +3 -0
- data/lib/pgslice/cli/prep.rb +13 -12
- data/lib/pgslice/cli/swap.rb +1 -1
- data/lib/pgslice/helpers.rb +19 -3
- data/lib/pgslice/table.rb +7 -3
- data/lib/pgslice/version.rb +1 -1
- metadata +10 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2f56f37e43ecc30fd145316bde2bd20478b360c077f928323fdee982379433b8
|
4
|
+
data.tar.gz: 594dc56e38e2d5902df435ddc514cdbb15004432f4b2b0d5d95ae37c7f8b00a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4cda3774db98a20ba3d13e1655baa7bcd3f294f810457d1c1a2a6ff7b3da1e861814d26638a95fcfa8f9fcad3280b0a6e419fead798cf83fb76e30108bd8b567
|
7
|
+
data.tar.gz: 62b2bc217e0fc0e822e4852159790b5d0e7d5e20a8d27216890362975de7100253183834c383b618c2d7ed1623891fbd41f76d44145e3274d6f7b0e1a1a7a225
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -32,9 +32,9 @@ This will give you the `pgslice` command. You can also install it with [Homebrew
|
|
32
32
|
pgslice prep <table> <column> <period>
|
33
33
|
```
|
34
34
|
|
35
|
-
|
35
|
+
The column should be a `timestamp`, `timestamptz`, or `date` column and period can be `day`, `month`, or `year`.
|
36
36
|
|
37
|
-
This creates a partitioned table named `<table>_intermediate
|
37
|
+
This creates a partitioned table named `<table>_intermediate` using range partitioning.
|
38
38
|
|
39
39
|
4. Add partitions to the intermediate table
|
40
40
|
|
@@ -50,7 +50,7 @@ This will give you the `pgslice` command. You can also install it with [Homebrew
|
|
50
50
|
pgslice fill <table>
|
51
51
|
```
|
52
52
|
|
53
|
-
Use the `--batch-size` and `--sleep` options to control the speed
|
53
|
+
Use the `--batch-size` and `--sleep` options to control the speed (defaults to `10000` and `0` respectively)
|
54
54
|
|
55
55
|
To sync data across different databases, check out [pgsync](https://github.com/ankane/pgsync).
|
56
56
|
|
@@ -108,17 +108,17 @@ pgslice add_partitions visits --intermediate --past 1 --future 1
|
|
108
108
|
```sql
|
109
109
|
BEGIN;
|
110
110
|
|
111
|
-
CREATE TABLE "public"."
|
111
|
+
CREATE TABLE "public"."visits_202208" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2022-08-01') TO ('2022-09-01');
|
112
112
|
|
113
|
-
ALTER TABLE "public"."
|
113
|
+
ALTER TABLE "public"."visits_202208" ADD PRIMARY KEY ("id");
|
114
114
|
|
115
|
-
CREATE TABLE "public"."
|
115
|
+
CREATE TABLE "public"."visits_202209" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2022-09-01') TO ('2022-10-01');
|
116
116
|
|
117
|
-
ALTER TABLE "public"."
|
117
|
+
ALTER TABLE "public"."visits_202209" ADD PRIMARY KEY ("id");
|
118
118
|
|
119
|
-
CREATE TABLE "public"."
|
119
|
+
CREATE TABLE "public"."visits_202210" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2022-10-01') TO ('2022-11-01');
|
120
120
|
|
121
|
-
ALTER TABLE "public"."
|
121
|
+
ALTER TABLE "public"."visits_202210" ADD PRIMARY KEY ("id");
|
122
122
|
|
123
123
|
COMMIT;
|
124
124
|
```
|
@@ -131,17 +131,17 @@ pgslice fill visits
|
|
131
131
|
/* 1 of 3 */
|
132
132
|
INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at")
|
133
133
|
SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits"
|
134
|
-
WHERE "id" > 0 AND "id" <= 10000 AND "created_at" >= '
|
134
|
+
WHERE "id" > 0 AND "id" <= 10000 AND "created_at" >= '2022-08-01'::date AND "created_at" < '2022-11-01'::date
|
135
135
|
|
136
136
|
/* 2 of 3 */
|
137
137
|
INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at")
|
138
138
|
SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits"
|
139
|
-
WHERE "id" > 10000 AND "id" <= 20000 AND "created_at" >= '
|
139
|
+
WHERE "id" > 10000 AND "id" <= 20000 AND "created_at" >= '2022-08-01'::date AND "created_at" < '2022-11-01'::date
|
140
140
|
|
141
141
|
/* 3 of 3 */
|
142
142
|
INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at")
|
143
143
|
SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits"
|
144
|
-
WHERE "id" > 20000 AND "id" <= 30000 AND "created_at" >= '
|
144
|
+
WHERE "id" > 20000 AND "id" <= 30000 AND "created_at" >= '2022-08-01'::date AND "created_at" < '2022-11-01'::date
|
145
145
|
```
|
146
146
|
|
147
147
|
```sh
|
@@ -149,11 +149,11 @@ pgslice analyze visits
|
|
149
149
|
```
|
150
150
|
|
151
151
|
```sql
|
152
|
-
ANALYZE VERBOSE "public"."
|
152
|
+
ANALYZE VERBOSE "public"."visits_202208";
|
153
153
|
|
154
|
-
ANALYZE VERBOSE "public"."
|
154
|
+
ANALYZE VERBOSE "public"."visits_202209";
|
155
155
|
|
156
|
-
ANALYZE VERBOSE "public"."
|
156
|
+
ANALYZE VERBOSE "public"."visits_202210";
|
157
157
|
|
158
158
|
ANALYZE VERBOSE "public"."visits_intermediate";
|
159
159
|
```
|
@@ -217,25 +217,20 @@ WHERE
|
|
217
217
|
Back up and drop older partitions each day, month, or year.
|
218
218
|
|
219
219
|
```sh
|
220
|
-
pg_dump -c -Fc -t <table>
|
221
|
-
psql -c "DROP TABLE <table>
|
220
|
+
pg_dump -c -Fc -t <table>_202209 $PGSLICE_URL > <table>_202209.dump
|
221
|
+
psql -c "DROP TABLE <table>_202209" $PGSLICE_URL
|
222
222
|
```
|
223
223
|
|
224
224
|
If you use [Amazon S3](https://aws.amazon.com/s3/) for backups, [s3cmd](https://github.com/s3tools/s3cmd) is a nice tool.
|
225
225
|
|
226
226
|
```sh
|
227
|
-
s3cmd put <table>
|
227
|
+
s3cmd put <table>_202209.dump s3://<s3-bucket>/<table>_202209.dump
|
228
228
|
```
|
229
229
|
|
230
230
|
## Schema Updates
|
231
231
|
|
232
232
|
Once a table is partitioned, make schema updates on the master table only (not partitions). This includes adding, removing, and modifying columns, as well as adding and removing indexes and foreign keys.
|
233
233
|
|
234
|
-
A few exceptions are:
|
235
|
-
|
236
|
-
- For Postgres 10, make index and foreign key updates on partitions only
|
237
|
-
- For Postgres < 10, make index and foreign key updates on the master table and all partitions
|
238
|
-
|
239
234
|
## Additional Commands
|
240
235
|
|
241
236
|
To undo prep (which will delete partitions), use:
|
@@ -272,27 +267,20 @@ SELECT * FROM
|
|
272
267
|
WHERE
|
273
268
|
user_id = 123 AND
|
274
269
|
-- for performance
|
275
|
-
created_at >= '
|
270
|
+
created_at >= '2022-09-01' AND created_at < '2022-09-02'
|
276
271
|
```
|
277
272
|
|
278
|
-
For this to be effective, ensure `constraint_exclusion` is set to `partition` (default value) or `on`.
|
273
|
+
For this to be effective, ensure `constraint_exclusion` is set to `partition` (the default value) or `on`.
|
279
274
|
|
280
275
|
```sql
|
281
276
|
SHOW constraint_exclusion;
|
282
277
|
```
|
283
278
|
|
284
|
-
### Writes
|
285
|
-
|
286
|
-
Before Postgres 10, if you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
|
287
|
-
|
288
|
-
1. Insert directly into the partition
|
289
|
-
2. Get value before the insert with `SELECT nextval('sequence_name')` (for multiple rows, append `FROM generate_series(1, n)`)
|
290
|
-
|
291
279
|
## Frameworks
|
292
280
|
|
293
281
|
### Rails
|
294
282
|
|
295
|
-
|
283
|
+
Specify the primary key for partitioned models to ensure it’s returned.
|
296
284
|
|
297
285
|
```ruby
|
298
286
|
class Visit < ApplicationRecord
|
@@ -300,16 +288,6 @@ class Visit < ApplicationRecord
|
|
300
288
|
end
|
301
289
|
```
|
302
290
|
|
303
|
-
Before Postgres 10, preload the value.
|
304
|
-
|
305
|
-
```ruby
|
306
|
-
class Visit < ApplicationRecord
|
307
|
-
before_create do
|
308
|
-
self.id ||= self.class.connection.select_all("SELECT nextval('#{self.class.sequence_name}')").first["nextval"]
|
309
|
-
end
|
310
|
-
end
|
311
|
-
```
|
312
|
-
|
313
291
|
### Other Frameworks
|
314
292
|
|
315
293
|
Please submit a PR if additional configuration is needed.
|
@@ -328,10 +306,6 @@ pgslice swap <table>
|
|
328
306
|
|
329
307
|
Triggers aren’t copied from the original table. You can set up triggers on the intermediate table if needed. Note that Postgres doesn’t support `BEFORE / FOR EACH ROW` triggers on partitioned tables.
|
330
308
|
|
331
|
-
## Declarative Partitioning
|
332
|
-
|
333
|
-
Postgres 10 introduces [declarative partitioning](https://www.postgresql.org/docs/10/static/ddl-partitioning.html#ddl-partitioning-declarative). A major benefit is `INSERT` statements with a `RETURNING` clause work as expected. If you prefer to use trigger-based partitioning instead (not recommended), pass the `--trigger-based` option to the `prep` command.
|
334
|
-
|
335
309
|
## Data Protection
|
336
310
|
|
337
311
|
Always make sure your [connection is secure](https://ankane.org/postgres-sslmode-explained) when connecting to a database over a network you don’t fully trust. Your best option is to connect over SSH or a VPN. Another option is to use `sslmode=verify-full`. If you don’t do this, your database credentials can be compromised.
|
@@ -96,6 +96,9 @@ CREATE TABLE #{quote_table(partition)}
|
|
96
96
|
partitions.each do |partition|
|
97
97
|
day = partition_date(partition, name_format)
|
98
98
|
|
99
|
+
# note: does not support generated columns
|
100
|
+
# could support by listing columns
|
101
|
+
# but this would cause issues with schema changes
|
99
102
|
sql = "(NEW.#{quote_ident(field)} >= #{sql_date(day, cast)} AND NEW.#{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
|
100
103
|
INSERT INTO #{quote_table(partition)} VALUES (NEW.*);"
|
101
104
|
|
data/lib/pgslice/cli/prep.rb
CHANGED
@@ -3,6 +3,7 @@ module PgSlice
|
|
3
3
|
desc "prep TABLE [COLUMN] [PERIOD]", "Create an intermediate table for partitioning"
|
4
4
|
option :partition, type: :boolean, default: true, desc: "Partition the table"
|
5
5
|
option :trigger_based, type: :boolean, default: false, desc: "Use trigger-based partitioning"
|
6
|
+
option :test_version, type: :numeric, hide: true
|
6
7
|
def prep(table, column=nil, period=nil)
|
7
8
|
table = create_table(table)
|
8
9
|
intermediate_table = table.intermediate_table
|
@@ -24,23 +25,23 @@ module PgSlice
|
|
24
25
|
queries = []
|
25
26
|
|
26
27
|
# version summary
|
27
|
-
# 1. trigger-based
|
28
|
-
# 2. declarative, with indexes and foreign keys on child tables
|
29
|
-
# 3. declarative, with indexes and foreign keys on parent table
|
30
|
-
version =
|
31
|
-
if options[:trigger_based] || server_version_num < 100000
|
32
|
-
1
|
33
|
-
elsif server_version_num < 110000
|
34
|
-
2
|
35
|
-
else
|
36
|
-
3
|
37
|
-
end
|
28
|
+
# 1. trigger-based (pg9)
|
29
|
+
# 2. declarative, with indexes and foreign keys on child tables (pg10)
|
30
|
+
# 3. declarative, with indexes and foreign keys on parent table (pg11+)
|
31
|
+
version = options[:test_version] || (options[:trigger_based] ? 1 : 3)
|
38
32
|
|
39
33
|
declarative = version > 1
|
40
34
|
|
41
35
|
if declarative && options[:partition]
|
36
|
+
including = ["DEFAULTS", "CONSTRAINTS", "STORAGE", "COMMENTS", "STATISTICS"]
|
37
|
+
if server_version_num >= 120000
|
38
|
+
including << "GENERATED"
|
39
|
+
end
|
40
|
+
if server_version_num >= 140000
|
41
|
+
including << "COMPRESSION"
|
42
|
+
end
|
42
43
|
queries << <<-SQL
|
43
|
-
CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)}
|
44
|
+
CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} #{including.map { |v| "INCLUDING #{v}" }.join(" ")}) PARTITION BY RANGE (#{quote_ident(column)});
|
44
45
|
SQL
|
45
46
|
|
46
47
|
if version == 3
|
data/lib/pgslice/cli/swap.rb
CHANGED
@@ -20,7 +20,7 @@ module PgSlice
|
|
20
20
|
queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_schema"])}.#{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
|
21
21
|
end
|
22
22
|
|
23
|
-
queries.unshift("SET LOCAL lock_timeout =
|
23
|
+
queries.unshift("SET LOCAL lock_timeout = #{escape_literal(options[:lock_timeout])};")
|
24
24
|
|
25
25
|
run_queries(queries)
|
26
26
|
end
|
data/lib/pgslice/helpers.rb
CHANGED
@@ -35,11 +35,15 @@ module PgSlice
|
|
35
35
|
@schema = Array(params.delete("schema") || "public")[0]
|
36
36
|
uri.query = params.any? ? URI.encode_www_form(params) : nil
|
37
37
|
|
38
|
-
ENV["PGCONNECT_TIMEOUT"] ||= "
|
38
|
+
ENV["PGCONNECT_TIMEOUT"] ||= "3"
|
39
39
|
conn = PG::Connection.new(uri.to_s)
|
40
40
|
conn.set_notice_processor do |message|
|
41
41
|
say message
|
42
42
|
end
|
43
|
+
@server_version_num = conn.exec("SHOW server_version_num")[0]["server_version_num"].to_i
|
44
|
+
if @server_version_num < 110000
|
45
|
+
abort "This version of pgslice requires Postgres 11+"
|
46
|
+
end
|
43
47
|
conn
|
44
48
|
end
|
45
49
|
rescue PG::ConnectionBad => e
|
@@ -86,7 +90,8 @@ module PgSlice
|
|
86
90
|
end
|
87
91
|
|
88
92
|
def server_version_num
|
89
|
-
|
93
|
+
connection # ensure called first
|
94
|
+
@server_version_num
|
90
95
|
end
|
91
96
|
|
92
97
|
# helpers
|
@@ -97,7 +102,7 @@ module PgSlice
|
|
97
102
|
else
|
98
103
|
fmt = "%Y-%m-%d"
|
99
104
|
end
|
100
|
-
str =
|
105
|
+
str = escape_literal(time.strftime(fmt))
|
101
106
|
add_cast ? "#{str}::#{cast}" : str
|
102
107
|
end
|
103
108
|
|
@@ -152,6 +157,10 @@ module PgSlice
|
|
152
157
|
PG::Connection.quote_ident(value)
|
153
158
|
end
|
154
159
|
|
160
|
+
def escape_literal(value)
|
161
|
+
connection.escape_literal(value)
|
162
|
+
end
|
163
|
+
|
155
164
|
def quote_table(table)
|
156
165
|
table.quote_table
|
157
166
|
end
|
@@ -176,5 +185,12 @@ module PgSlice
|
|
176
185
|
def make_fk_def(fk_def, table)
|
177
186
|
"ALTER TABLE #{quote_table(table)} ADD #{fk_def};"
|
178
187
|
end
|
188
|
+
|
189
|
+
def make_stat_def(stat_def, table)
|
190
|
+
m = /ON (.+) FROM/.match(stat_def)
|
191
|
+
# errors on duplicate names, but should be rare
|
192
|
+
stat_name = "#{table}_#{m[1].split(", ").map { |v| v.gsub(/\W/i, "") }.join("_")}_stat"
|
193
|
+
stat_def.sub(/ FROM \S+/, " FROM #{quote_table(table)}").sub(/ STATISTICS .+ ON /, " STATISTICS #{quote_ident(stat_name)} ON ") + ";"
|
194
|
+
end
|
179
195
|
end
|
180
196
|
end
|
data/lib/pgslice/table.rb
CHANGED
@@ -16,7 +16,7 @@ module PgSlice
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def columns
|
19
|
-
execute("SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2", [schema, name]).map{ |r| r["column_name"] }
|
19
|
+
execute("SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2 AND is_generated = 'NEVER'", [schema, name]).map{ |r| r["column_name"] }
|
20
20
|
end
|
21
21
|
|
22
22
|
# http://www.dbforums.com/showthread.php?1667561-How-to-list-sequences-and-the-columns-by-SQL
|
@@ -178,12 +178,16 @@ module PgSlice
|
|
178
178
|
PgSlice::CLI.instance.send(:execute, *args)
|
179
179
|
end
|
180
180
|
|
181
|
+
def escape_literal(value)
|
182
|
+
PgSlice::CLI.instance.send(:escape_literal, value)
|
183
|
+
end
|
184
|
+
|
181
185
|
def quote_ident(value)
|
182
186
|
PG::Connection.quote_ident(value)
|
183
187
|
end
|
184
188
|
|
185
189
|
def regclass
|
186
|
-
"
|
190
|
+
"#{escape_literal(quote_table)}::regclass"
|
187
191
|
end
|
188
192
|
|
189
193
|
def sql_date(time, cast, add_cast = true)
|
@@ -192,7 +196,7 @@ module PgSlice
|
|
192
196
|
else
|
193
197
|
fmt = "%Y-%m-%d"
|
194
198
|
end
|
195
|
-
str =
|
199
|
+
str = escape_literal(time.strftime(fmt))
|
196
200
|
add_cast ? "#{str}::#{cast}" : str
|
197
201
|
end
|
198
202
|
end
|
data/lib/pgslice/version.rb
CHANGED
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgslice
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-04-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: pg
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: thor
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0
|
33
|
+
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0
|
40
|
+
version: '0'
|
41
41
|
description:
|
42
42
|
email: andrew@ankane.org
|
43
43
|
executables:
|
@@ -73,14 +73,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
73
73
|
requirements:
|
74
74
|
- - ">="
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: '2.
|
76
|
+
version: '2.7'
|
77
77
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
78
|
requirements:
|
79
79
|
- - ">="
|
80
80
|
- !ruby/object:Gem::Version
|
81
81
|
version: '0'
|
82
82
|
requirements: []
|
83
|
-
rubygems_version: 3.4.
|
83
|
+
rubygems_version: 3.4.10
|
84
84
|
signing_key:
|
85
85
|
specification_version: 4
|
86
86
|
summary: Postgres partitioning as easy as pie
|