pgslice 0.4.4 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7b29363ba9973cca9be997f3bc62c748b27bee61d25da67f3d3af60f9166cec9
4
- data.tar.gz: bb6be94d05a1ff1e059f3386ce4ba34a1a4698a25f89e676d8d3117d4d203ac4
3
+ metadata.gz: 21d2ab39e5d79cc2477aedc2f458f5e42341c76387fcdd97dd7865cfaec777f8
4
+ data.tar.gz: 9df6923ae6aba4054c6631a3d0c5c34c8de82214db0d0d6644e73002b9224702
5
5
  SHA512:
6
- metadata.gz: abceaedcfa90cfa78954c3184baa02e270244d89b4be19016eb0e1c570902008745845018120182f4dbe543a42cf8d6d8926a52238245d95976946fcf1a49d6e
7
- data.tar.gz: b5adb37599b3651e737f81193f50495a9ae416544cc96acb77f371c26083cae000c27dc740a9992d303815eb3a250881c7ce5c0dd7bf571e7c1b58f4df4dff66
6
+ metadata.gz: eb51a60758f6179337c74052bac2ba433efc65ec211ccbcbf9175f9986df9e45f3f16560669a29fd6e9b3684377164dea622d5e39e26ac0f442b441e92953f72
7
+ data.tar.gz: fc16788a99b3f307d99dd1d5230cb53e5746241e68cf3028454301f48cd711332b71d4716ab1210c07ca9a86daa07df342d009b306f24cc6245a9efbcb6fb32b
@@ -1,3 +1,8 @@
1
+ ## 0.4.5
2
+
3
+ - Added support for Postgres 11 foreign key improvements
4
+ - Improved versioning
5
+
1
6
  ## 0.4.4
2
7
 
3
8
  - Added partitioning by `year`
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # pgslice
2
2
 
3
- Postgres partitioning as easy as pie. Works great for both new and existing tables, with zero downtime and minimal app changes. Archive older data on a rolling basis to keep your database size under control.
3
+ Postgres partitioning as easy as pie. Works great for both new and existing tables, with zero downtime and minimal app changes. No need to install anything on your database server. Archive older data on a rolling basis to keep your database size under control.
4
4
 
5
5
  :tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
6
6
 
@@ -92,34 +92,13 @@ pgslice prep visits created_at month
92
92
  ```
93
93
 
94
94
  ```sql
95
- -- Postgres 10
96
-
97
- BEGIN;
98
-
99
- CREATE TABLE visits_intermediate (LIKE visits INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS) PARTITION BY RANGE (created_at);
100
-
101
- COMMENT ON TABLE visits_intermediate is 'column:created_at,period:month';
102
-
103
- COMMIT;
104
-
105
- -- Postgres 9.6 and below
106
-
107
95
  BEGIN;
108
96
 
109
- CREATE TABLE visits_intermediate (LIKE visits INCLUDING ALL);
110
-
111
- CREATE FUNCTION visits_insert_trigger()
112
- RETURNS trigger AS $$
113
- BEGIN
114
- RAISE EXCEPTION 'Create partitions first.';
115
- END;
116
- $$ LANGUAGE plpgsql;
97
+ CREATE TABLE "public"."visits_intermediate" (LIKE "public"."visits" INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS) PARTITION BY RANGE ("created_at");
117
98
 
118
- CREATE TRIGGER visits_insert_trigger
119
- BEFORE INSERT ON visits_intermediate
120
- FOR EACH ROW EXECUTE PROCEDURE visits_insert_trigger();
99
+ CREATE INDEX ON "public"."visits_intermediate" USING btree ("created_at");
121
100
 
122
- COMMENT ON TRIGGER visits_insert_trigger ON visits_intermediate is 'column:created_at,period:month';
101
+ COMMENT ON TABLE "public"."visits_intermediate" is 'column:createdAt,period:day,cast:date,version:3';
123
102
 
124
103
  COMMIT;
125
104
  ```
@@ -129,73 +108,19 @@ pgslice add_partitions visits --intermediate --past 1 --future 1
129
108
  ```
130
109
 
131
110
  ```sql
132
- -- Postgres 10
133
-
134
- BEGIN;
135
-
136
- CREATE TABLE visits_201608 PARTITION OF visits_intermediate FOR VALUES FROM ('2016-08-01') TO ('2016-09-01');
137
-
138
- ALTER TABLE visits_201608 ADD PRIMARY KEY (id);
139
-
140
- CREATE INDEX ON visits_201608 USING btree (user_id);
141
-
142
- CREATE TABLE visits_201609 PARTITION OF visits_intermediate FOR VALUES FROM ('2016-09-01') TO ('2016-10-01');
143
-
144
- ALTER TABLE visits_201609 ADD PRIMARY KEY (id);
145
-
146
- CREATE INDEX ON visits_201609 USING btree (user_id);
147
-
148
- CREATE TABLE visits_201610 PARTITION OF visits_intermediate FOR VALUES FROM ('2016-10-01') TO ('2016-11-01');
149
-
150
- ALTER TABLE visits_201610 ADD PRIMARY KEY (id);
151
-
152
- CREATE INDEX ON visits_201610 USING btree (user_id);
153
-
154
- COMMIT;
155
-
156
- -- Postgres 9.6 and below
157
-
158
111
  BEGIN;
159
112
 
160
- CREATE TABLE visits_201608
161
- (CHECK (created_at >= '2016-08-01'::date AND created_at < '2016-09-01'::date))
162
- INHERITS (visits_intermediate);
113
+ CREATE TABLE "public"."visits_201808" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2018-08-01') TO ('2018-09-01');
163
114
 
164
- ALTER TABLE visits_201608 ADD PRIMARY KEY (id);
115
+ ALTER TABLE "public"."visits_201808" ADD PRIMARY KEY ("id");
165
116
 
166
- CREATE INDEX ON visits_201608 USING btree (user_id);
117
+ CREATE TABLE "public"."visits_201809" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2018-09-01') TO ('2018-10-01');
167
118
 
168
- CREATE TABLE visits_201609
169
- (CHECK (created_at >= '2016-09-01'::date AND created_at < '2016-10-01'::date))
170
- INHERITS (visits_intermediate);
119
+ ALTER TABLE "public"."visits_201809" ADD PRIMARY KEY ("id");
171
120
 
172
- ALTER TABLE visits_201609 ADD PRIMARY KEY (id);
121
+ CREATE TABLE "public"."visits_201810" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2018-10-01') TO ('2018-11-01');
173
122
 
174
- CREATE INDEX ON visits_201609 USING btree (user_id);
175
-
176
- CREATE TABLE visits_201610
177
- (CHECK (created_at >= '2016-10-01'::date AND created_at < '2016-11-01'::date))
178
- INHERITS (visits_intermediate);
179
-
180
- ALTER TABLE visits_201610 ADD PRIMARY KEY (id);
181
-
182
- CREATE INDEX ON visits_201610 USING btree (user_id);
183
-
184
- CREATE OR REPLACE FUNCTION visits_insert_trigger()
185
- RETURNS trigger AS $$
186
- BEGIN
187
- IF (NEW.created_at >= '2016-09-01'::date AND NEW.created_at < '2016-10-01'::date) THEN
188
- INSERT INTO visits_201609 VALUES (NEW.*);
189
- ELSIF (NEW.created_at >= '2016-10-01'::date AND NEW.created_at < '2016-11-01'::date) THEN
190
- INSERT INTO visits_201610 VALUES (NEW.*);
191
- ELSIF (NEW.created_at >= '2016-08-01'::date AND NEW.created_at < '2016-09-01'::date) THEN
192
- INSERT INTO visits_201608 VALUES (NEW.*);
193
- ELSE
194
- RAISE EXCEPTION 'Date out of range. Ensure partitions are created.';
195
- END IF;
196
- RETURN NULL;
197
- END;
198
- $$ LANGUAGE plpgsql;
123
+ ALTER TABLE "public"."visits_201808" ADD PRIMARY KEY ("id");
199
124
 
200
125
  COMMIT;
201
126
  ```
@@ -206,19 +131,19 @@ pgslice fill visits
206
131
 
207
132
  ```sql
208
133
  /* 1 of 3 */
209
- INSERT INTO visits_intermediate ("id", "user_id", "ip", "created_at")
210
- SELECT "id", "user_id", "ip", "created_at" FROM visits
211
- WHERE id > 0 AND id <= 10000 AND created_at >= '2016-08-01'::date AND created_at < '2016-11-01'::date
134
+ INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at")
135
+ SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits"
136
+ WHERE "id" > 0 AND "id" <= 10000 AND "created_at" >= '2018-08-01'::date AND "created_at" < '2018-11-01'::date
212
137
 
213
138
  /* 2 of 3 */
214
- INSERT INTO visits_intermediate ("id", "user_id", "ip", "created_at")
215
- SELECT "id", "user_id", "ip", "created_at" FROM visits
216
- WHERE id > 10000 AND id <= 20000 AND created_at >= '2016-08-01'::date AND created_at < '2016-11-01'::date
139
+ INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at")
140
+ SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits"
141
+ WHERE "id" > 10000 AND "id" <= 20000 AND "created_at" >= '2018-08-01'::date AND "created_at" < '2018-11-01'::date
217
142
 
218
143
  /* 3 of 3 */
219
- INSERT INTO visits_intermediate ("id", "user_id", "ip", "created_at")
220
- SELECT "id", "user_id", "ip", "created_at" FROM visits
221
- WHERE id > 20000 AND id <= 30000 AND created_at >= '2016-08-01'::date AND created_at < '2016-11-01'::date
144
+ INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at")
145
+ SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits"
146
+ WHERE "id" > 20000 AND "id" <= 30000 AND "created_at" >= '2018-08-01'::date AND "created_at" < '2018-11-01'::date
222
147
  ```
223
148
 
224
149
  ```sh
@@ -226,13 +151,13 @@ pgslice analyze visits
226
151
  ```
227
152
 
228
153
  ```sql
229
- ANALYZE VERBOSE visits_201608;
154
+ ANALYZE VERBOSE "public"."visits_201808";
230
155
 
231
- ANALYZE VERBOSE visits_201609;
156
+ ANALYZE VERBOSE "public"."visits_201809";
232
157
 
233
- ANALYZE VERBOSE visits_201610;
158
+ ANALYZE VERBOSE "public"."visits_201810";
234
159
 
235
- ANALYZE VERBOSE visits_intermediate;
160
+ ANALYZE VERBOSE "public"."visits_intermediate";
236
161
  ```
237
162
 
238
163
  ```sh
@@ -244,11 +169,11 @@ BEGIN;
244
169
 
245
170
  SET LOCAL lock_timeout = '5s';
246
171
 
247
- ALTER TABLE visits RENAME TO visits_retired;
172
+ ALTER TABLE "public"."visits" RENAME TO "visits_retired";
248
173
 
249
- ALTER TABLE visits_intermediate RENAME TO visits;
174
+ ALTER TABLE "public"."visits_intermediate" RENAME TO "visits";
250
175
 
251
- ALTER SEQUENCE visits_id_seq OWNED BY visits.id;
176
+ ALTER SEQUENCE "visits_id_seq" OWNED BY "public"."visits"."id";
252
177
 
253
178
  COMMIT;
254
179
  ```
@@ -294,14 +219,14 @@ WHERE
294
219
  Back up and drop older partitions each day, month, or year.
295
220
 
296
221
  ```sh
297
- pg_dump -c -Fc -t <table>_201609 $PGSLICE_URL > <table>_201609.dump
298
- psql -c "DROP <table>_201609" $PGSLICE_URL
222
+ pg_dump -c -Fc -t <table>_201809 $PGSLICE_URL > <table>_201809.dump
223
+ psql -c "DROP <table>_201809" $PGSLICE_URL
299
224
  ```
300
225
 
301
226
  If you use [Amazon S3](https://aws.amazon.com/s3/) for backups, [s3cmd](https://github.com/s3tools/s3cmd) is a nice tool.
302
227
 
303
228
  ```sh
304
- s3cmd put <table>_201609.dump s3://<s3-bucket>/<table>_201609.dump
229
+ s3cmd put <table>_201809.dump s3://<s3-bucket>/<table>_201809.dump
305
230
  ```
306
231
 
307
232
  ## Additional Commands
@@ -322,16 +247,9 @@ pgslice unswap <table>
322
247
 
323
248
  This set up allows you to read and write with the original table name with no knowledge it’s partitioned. However, there are a few things to be aware of.
324
249
 
325
- ### Writes
326
-
327
- Before Postgres 10, if you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
328
-
329
- 1. Insert directly into the partition
330
- 2. Get value before the insert with `SELECT nextval('sequence_name')` (for multiple rows, append `FROM generate_series(1, n)`)
331
-
332
250
  ### Reads
333
251
 
334
- When possible, queries should include the column you partition on to limit the number of partitions the database needs to check. For instance, if you partition on `created_at`, try to include it in queries:
252
+ When possible, queries should include the column you partition on to limit the number of partitions the database needs to check. For instance, if you partition on `created_at`, try to include it in queries:
335
253
 
336
254
  ```sql
337
255
  SELECT * FROM
@@ -339,7 +257,7 @@ SELECT * FROM
339
257
  WHERE
340
258
  user_id = 123 AND
341
259
  -- for performance
342
- created_at >= '2016-09-01' AND created_at < '2016-09-02'
260
+ created_at >= '2018-09-01' AND created_at < '2018-09-02'
343
261
  ```
344
262
 
345
263
  For this to be effective, ensure `constraint_exclusion` is set to `partition` (default value) or `on`.
@@ -348,6 +266,13 @@ For this to be effective, ensure `constraint_exclusion` is set to `partition` (d
348
266
  SHOW constraint_exclusion;
349
267
  ```
350
268
 
269
+ ### Writes
270
+
271
+ Before Postgres 10, if you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
272
+
273
+ 1. Insert directly into the partition
274
+ 2. Get value before the insert with `SELECT nextval('sequence_name')` (for multiple rows, append `FROM generate_series(1, n)`)
275
+
351
276
  ## One Off Tasks
352
277
 
353
278
  You can also use pgslice to reduce the size of a table without partitioning by creating a new table, filling it with a subset of records, and swapping it in.
@@ -362,8 +287,8 @@ pgslice swap <table>
362
287
 
363
288
  Once a table is partitioned, here’s how to change the schema:
364
289
 
365
- - To add, remove, or modify a column, make the update on the master table only
366
- - To add or remove an index, make the update on the master table and all partitions (for Postgres 11, make the update on the master table only)
290
+ - To add, remove, or modify a column, make the update on the master table only.
291
+ - To add or remove an index or foreign key, make the update on the master table. For Postgres < 11, make the update on all partitions as well.
367
292
 
368
293
  ## Declarative Partitioning
369
294
 
@@ -371,17 +296,7 @@ Postgres 10 introduces [declarative partitioning](https://www.postgresql.org/doc
371
296
 
372
297
  ## Data Protection
373
298
 
374
- When connecting to a remote database, make sure your connection is secure.
375
-
376
- If you do not use a VPN, you must use `sslmode=verify-full` with a root certificate to [protect against MITM attacks](https://www.postgresql.org/docs/current/static/libpq-ssl.html). If you don’t do this, your database credentials can be compromised. This cannot be understated!
377
-
378
- Surprisingly and unfortunately, there’s [not a secure way](https://thusoy.com/2016/mitming-postgres) to connect to Heroku Postgres with any client.
379
-
380
- For Amazon RDS, download the [root certificate](https://s3.amazonaws.com/rds-downloads/rds-combined-ca-bundle.pem) and append to your database url:
381
-
382
- ```
383
- ?sslmode=verify-full&sslrootcert=rds-combined-ca-bundle.pem
384
- ```
299
+ Always make sure your connection is secure when connecting to your database over a network you don’t fully trust. Your best option is to connect over SSH or a VPN. Another option is to use `sslmode=verify-full`. If you don’t do this, your database credentials can be compromised.
385
300
 
386
301
  ## Upgrading
387
302
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  require "pgslice"
4
4
  begin
5
- PgSlice::Client.start
5
+ PgSlice::CLI.start
6
6
  rescue Interrupt
7
7
  abort
8
8
  end
@@ -1,10 +1,20 @@
1
1
  # dependencies
2
2
  require "cgi"
3
- require "thor"
4
3
  require "pg"
4
+ require "thor"
5
+ require "time"
5
6
 
6
7
  # modules
7
- require "pgslice/client"
8
- require "pgslice/generic_table"
8
+ require "pgslice/helpers"
9
9
  require "pgslice/table"
10
10
  require "pgslice/version"
11
+
12
+ # commands
13
+ require "pgslice/cli"
14
+ require "pgslice/cli/add_partitions"
15
+ require "pgslice/cli/analyze"
16
+ require "pgslice/cli/fill"
17
+ require "pgslice/cli/prep"
18
+ require "pgslice/cli/swap"
19
+ require "pgslice/cli/unprep"
20
+ require "pgslice/cli/unswap"
@@ -0,0 +1,32 @@
1
+ module PgSlice
2
+ class CLI < Thor
3
+ class << self
4
+ attr_accessor :instance
5
+ end
6
+
7
+ include Helpers
8
+
9
+ check_unknown_options!
10
+
11
+ class_option :url, desc: "Database URL"
12
+ class_option :dry_run, type: :boolean, default: false, desc: "Print statements without executing"
13
+
14
+ map %w[--version -v] => :version
15
+
16
+ def self.exit_on_failure?
17
+ true
18
+ end
19
+
20
+ def initialize(*args)
21
+ PgSlice::CLI.instance = self
22
+ $stdout.sync = true
23
+ $stderr.sync = true
24
+ super
25
+ end
26
+
27
+ desc "version", "Show version"
28
+ def version
29
+ log("pgslice #{PgSlice::VERSION}")
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,130 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "add_partitions TABLE", "Add partitions"
4
+ option :intermediate, type: :boolean, default: false, desc: "Add to intermediate table"
5
+ option :past, type: :numeric, default: 0, desc: "Number of past partitions to add"
6
+ option :future, type: :numeric, default: 0, desc: "Number of future partitions to add"
7
+ def add_partitions(table)
8
+ original_table = create_table(table)
9
+ table = options[:intermediate] ? original_table.intermediate_table : original_table
10
+ trigger_name = original_table.trigger_name
11
+
12
+ assert_table(table)
13
+
14
+ future = options[:future]
15
+ past = options[:past]
16
+ range = (-1 * past)..future
17
+
18
+ period, field, cast, needs_comment, declarative, version = table.fetch_settings(original_table.trigger_name)
19
+ unless period
20
+ message = "No settings found: #{table}"
21
+ message = "#{message}\nDid you mean to use --intermediate?" unless options[:intermediate]
22
+ abort message
23
+ end
24
+
25
+ queries = []
26
+
27
+ if needs_comment
28
+ queries << "COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_table(table)} is 'column:#{field},period:#{period},cast:#{cast}';"
29
+ end
30
+
31
+ # today = utc date
32
+ today = round_date(Time.now.utc.to_date, period)
33
+
34
+ schema_table =
35
+ if !declarative
36
+ table
37
+ elsif options[:intermediate]
38
+ original_table
39
+ else
40
+ table.partitions.last
41
+ end
42
+
43
+ # indexes automatically propagate in Postgres 11+
44
+ if version < 3
45
+ index_defs = schema_table.index_defs
46
+ fk_defs = schema_table.foreign_keys
47
+ else
48
+ index_defs = []
49
+ fk_defs = []
50
+ end
51
+
52
+ primary_key = schema_table.primary_key
53
+
54
+ added_partitions = []
55
+ range.each do |n|
56
+ day = advance_date(today, period, n)
57
+
58
+ partition = Table.new(original_table.schema, "#{original_table.name}_#{day.strftime(name_format(period))}")
59
+ next if partition.exists?
60
+ added_partitions << partition
61
+
62
+ if declarative
63
+ queries << <<-SQL
64
+ CREATE TABLE #{quote_table(partition)} PARTITION OF #{quote_table(table)} FOR VALUES FROM (#{sql_date(day, cast, false)}) TO (#{sql_date(advance_date(day, period, 1), cast, false)});
65
+ SQL
66
+ else
67
+ queries << <<-SQL
68
+ CREATE TABLE #{quote_table(partition)}
69
+ (CHECK (#{quote_ident(field)} >= #{sql_date(day, cast)} AND #{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}))
70
+ INHERITS (#{quote_table(table)});
71
+ SQL
72
+ end
73
+
74
+ queries << "ALTER TABLE #{quote_table(partition)} ADD PRIMARY KEY (#{primary_key.map { |k| quote_ident(k) }.join(", ")});" if primary_key.any?
75
+
76
+ index_defs.each do |index_def|
77
+ queries << make_index_def(index_def, partition)
78
+ end
79
+
80
+ fk_defs.each do |fk_def|
81
+ queries << make_fk_def(fk_def, partition)
82
+ end
83
+ end
84
+
85
+ unless declarative
86
+ # update trigger based on existing partitions
87
+ current_defs = []
88
+ future_defs = []
89
+ past_defs = []
90
+ name_format = self.name_format(period)
91
+ partitions = (table.partitions + added_partitions).uniq(&:name).sort_by(&:name)
92
+
93
+ partitions.each do |partition|
94
+ day = partition_date(partition, name_format)
95
+
96
+ sql = "(NEW.#{quote_ident(field)} >= #{sql_date(day, cast)} AND NEW.#{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
97
+ INSERT INTO #{quote_table(partition)} VALUES (NEW.*);"
98
+
99
+ if day.to_date < today
100
+ past_defs << sql
101
+ elsif advance_date(day, period, 1) < today
102
+ current_defs << sql
103
+ else
104
+ future_defs << sql
105
+ end
106
+ end
107
+
108
+ # order by current period, future periods asc, past periods desc
109
+ trigger_defs = current_defs + future_defs + past_defs.reverse
110
+
111
+ if trigger_defs.any?
112
+ queries << <<-SQL
113
+ CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
114
+ RETURNS trigger AS $$
115
+ BEGIN
116
+ IF #{trigger_defs.join("\n ELSIF ")}
117
+ ELSE
118
+ RAISE EXCEPTION 'Date out of range. Ensure partitions are created.';
119
+ END IF;
120
+ RETURN NULL;
121
+ END;
122
+ $$ LANGUAGE plpgsql;
123
+ SQL
124
+ end
125
+ end
126
+
127
+ run_queries(queries) if queries.any?
128
+ end
129
+ end
130
+ end