pgslice 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9a07db7bfb2579474dfc22c97f654eeb0753c67f
4
- data.tar.gz: 31147f793f011cee930c989d40429087f9ceab4a
3
+ metadata.gz: 10c9e21963650fa68dd402f3c690ab0b89300bfb
4
+ data.tar.gz: 5fe1cea0ef611008c95dbf06eae19e766dbf679e
5
5
  SHA512:
6
- metadata.gz: a474711ac629c19dca856e11b028e89a5d2b38732aa451b2fe531628b2c54c74b6bef628c0c279bbf0b88ffc5d0ce53e25705e3f43bc959999c50c85f5cf26a4
7
- data.tar.gz: a2e83881771c76b3ac4676eea176f64e0384534f2a0d71f4c00c412da2b21945ca274c78caa2265e5bf1df84df05fb95a890b6a76a054e653aecc47eafa758a3
6
+ metadata.gz: 6f879dec8f413fd90eed3f911aacaedf5dda87036d490cab18d14fb1447439114a2359db4f15a95e3f713bcd0bf2bcad10f65c96c69537b83ed4172820b2d582
7
+ data.tar.gz: 74189c73cdd2e6cf2549b31524c9e64103719d78135708bc2d992a118a0413a1b6d63cc4dc22659427717938fde3be5b8caab53d17af6d3088628697cc94256e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.0
2
+
3
+ - Switched to new trigger, which is about 20% faster
4
+
1
5
  ## 0.1.7
2
6
 
3
7
  - Added `--source-table` option to `fill`
data/README.md CHANGED
@@ -1,24 +1,30 @@
1
1
  # pgslice
2
2
 
3
- Postgres partitioning as easy as pie
3
+ Postgres partitioning as easy as pie. Works great for both new and existing tables, with zero downtime and minimal app changes.
4
+
5
+ :tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
4
6
 
5
7
  ## Install
6
8
 
7
- Run:
9
+ pgslice is a command line tool. To install, run:
8
10
 
9
11
  ```sh
10
12
  gem install pgslice
11
13
  ```
12
14
 
15
+ This will give you the `pgslice` command.
16
+
13
17
  ## Steps
14
18
 
15
- 1. Specify your database credentials
19
+ 1. Ensure the table you want to partition has been created. We’ll refer to this as `<table>`.
20
+
21
+ 2. Specify your database credentials
16
22
 
17
23
  ```sh
18
24
  export PGSLICE_URL=postgres://localhost/myapp_development
19
25
  ```
20
26
 
21
- 2. Create an intermediate table
27
+ 3. Create an intermediate table
22
28
 
23
29
  ```sh
24
30
  pgslice prep <table> <column> <period>
@@ -28,7 +34,7 @@ gem install pgslice
28
34
 
29
35
  This creates a table named `<table>_intermediate` with the appropriate trigger for partitioning.
30
36
 
31
- 3. Add partitions
37
+ 4. Add partitions
32
38
 
33
39
  ```sh
34
40
  pgslice add_partitions <table> --intermediate --past 3 --future 3
@@ -38,7 +44,7 @@ gem install pgslice
38
44
 
39
45
  Use the `--past` and `--future` options to control the number of partitions.
40
46
 
41
- 4. *Optional, for tables with data* - Fill the partitions in batches with data from the original table
47
+ 5. *Optional, for tables with data* - Fill the partitions in batches with data from the original table
42
48
 
43
49
  ```sh
44
50
  pgslice fill <table>
@@ -48,7 +54,7 @@ gem install pgslice
48
54
 
49
55
  To sync data across different databases, check out [pgsync](https://github.com/ankane/pgsync).
50
56
 
51
- 5. Swap the intermediate table with the original table
57
+ 6. Swap the intermediate table with the original table
52
58
 
53
59
  ```sh
54
60
  pgslice swap <table>
@@ -56,165 +62,209 @@ gem install pgslice
56
62
 
57
63
  The original table is renamed `<table>_retired` and the intermediate table is renamed `<table>`.
58
64
 
59
- 6. Fill the rest
65
+ 7. Fill the rest (rows inserted between the first fill and the swap)
60
66
 
61
67
  ```sh
62
68
  pgslice fill <table> --swapped
63
69
  ```
64
70
 
65
- 7. Archive and drop the original table
71
+ 8. Archive and drop the original table
66
72
 
67
- ## Adding Partitions
73
+ ## Sample Output
68
74
 
69
- To add partitions, use:
75
+ pgslice prints the SQL commands that were executed on the server. To print without executing, use the `--dry-run` option.
70
76
 
71
77
  ```sh
72
- pgslice add_partitions <table> --future 3
78
+ pgslice prep visits created_at month
73
79
  ```
74
80
 
75
- Add this as a cron job to create a new partition each day or month.
81
+ ```sql
82
+ BEGIN;
76
83
 
77
- ```
78
- # day
79
- 0 0 * * * pgslice add_partitions <table> --future 3 --url ...
84
+ CREATE TABLE visits_intermediate (LIKE visits INCLUDING ALL);
80
85
 
81
- # month
82
- 0 0 1 * * pgslice add_partitions <table> --future 3 --url ...
86
+ CREATE FUNCTION visits_insert_trigger()
87
+ RETURNS trigger AS $$
88
+ BEGIN
89
+ RAISE EXCEPTION 'Date out of range. Create partitions first.';
90
+ END;
91
+ $$ LANGUAGE plpgsql;
92
+
93
+ CREATE TRIGGER visits_insert_trigger
94
+ BEFORE INSERT ON visits_intermediate
95
+ FOR EACH ROW EXECUTE PROCEDURE visits_insert_trigger();
96
+
97
+ COMMENT ON TRIGGER visits_insert_trigger ON visits_intermediate is 'column:created_at,period:month';
98
+
99
+ COMMIT;
83
100
  ```
84
101
 
85
- Add a monitor to ensure partitions are being created.
102
+ ```sh
103
+ pgslice add_partitions visits --intermediate --past 1 --future 1
104
+ ```
86
105
 
87
106
  ```sql
88
- SELECT 1 FROM
89
- pg_catalog.pg_class c
90
- INNER JOIN
91
- pg_catalog.pg_namespace n ON n.oid = c.relnamespace
92
- WHERE
93
- c.relkind = 'r' AND
94
- n.nspname = 'public' AND
95
- c.relname = '<table>_' || to_char(NOW() + INTERVAL '3 days', 'YYYYMMDD')
96
- -- for months, use to_char(NOW() + INTERVAL '3 months', 'YYYYMM')
97
- ```
107
+ BEGIN;
98
108
 
99
- ## Additional Commands
109
+ CREATE TABLE visits_201608
110
+ (CHECK (created_at >= '2016-08-01'::date AND created_at < '2016-09-01'::date))
111
+ INHERITS (visits_intermediate);
100
112
 
101
- To undo prep (which will delete partitions), use:
113
+ ALTER TABLE visits_201608 ADD PRIMARY KEY (id);
102
114
 
103
- ```sh
104
- pgslice unprep <table>
105
- ```
115
+ CREATE INDEX ON visits_201608 USING btree (user_id);
106
116
 
107
- To undo swap, use:
117
+ CREATE TABLE visits_201609
118
+ (CHECK (created_at >= '2016-09-01'::date AND created_at < '2016-10-01'::date))
119
+ INHERITS (visits_intermediate);
108
120
 
109
- ```sh
110
- pgslice unswap <table>
111
- ```
121
+ ALTER TABLE visits_201609 ADD PRIMARY KEY (id);
112
122
 
113
- ## Sample Output
123
+ CREATE INDEX ON visits_201609 USING btree (user_id);
114
124
 
115
- `pgslice` prints the SQL commands that were executed on the server. To print without executing, use the `--dry-run` option.
125
+ CREATE TABLE visits_201610
126
+ (CHECK (created_at >= '2016-10-01'::date AND created_at < '2016-11-01'::date))
127
+ INHERITS (visits_intermediate);
116
128
 
117
- ```console
118
- $ pgslice prep locations created_at day
119
- BEGIN;
129
+ ALTER TABLE visits_201610 ADD PRIMARY KEY (id);
120
130
 
121
- CREATE TABLE locations_intermediate (LIKE locations INCLUDING ALL);
131
+ CREATE INDEX ON visits_201610 USING btree (user_id);
122
132
 
123
- CREATE FUNCTION locations_insert_trigger()
133
+ CREATE OR REPLACE FUNCTION visits_insert_trigger()
124
134
  RETURNS trigger AS $$
125
135
  BEGIN
126
- EXECUTE 'INSERT INTO locations_' || to_char(NEW.created_at, 'YYYYMMDD') || ' VALUES ($1.*)' USING NEW;
136
+ IF (NEW.created_at >= '2016-09-01'::date AND NEW.created_at < '2016-10-01'::date) THEN
137
+ INSERT INTO visits_201609 VALUES (NEW.*);
138
+ ELSIF (NEW.created_at >= '2016-10-01'::date AND NEW.created_at < '2016-11-01'::date) THEN
139
+ INSERT INTO visits_201610 VALUES (NEW.*);
140
+ ELSIF (NEW.created_at >= '2016-08-01'::date AND NEW.created_at < '2016-09-01'::date) THEN
141
+ INSERT INTO visits_201608 VALUES (NEW.*);
142
+ ELSE
143
+ RAISE EXCEPTION 'Date out of range. Ensure partitions are created.';
144
+ END IF;
127
145
  RETURN NULL;
128
146
  END;
129
147
  $$ LANGUAGE plpgsql;
130
148
 
131
- CREATE TRIGGER locations_insert_trigger
132
- BEFORE INSERT ON locations_intermediate
133
- FOR EACH ROW EXECUTE PROCEDURE locations_insert_trigger();
134
-
135
149
  COMMIT;
136
150
  ```
137
151
 
138
- ```console
139
- $ pgslice add_partitions locations --intermediate --past 1 --future 1
140
- BEGIN;
141
-
142
- CREATE TABLE locations_20160423
143
- (CHECK (created_at >= '2016-04-23'::date AND created_at < '2016-04-24'::date))
144
- INHERITS (locations_intermediate);
152
+ ```sh
153
+ pgslice fill visits
154
+ ```
145
155
 
146
- ALTER TABLE locations_20160423 ADD PRIMARY KEY (id);
156
+ ```sql
157
+ /* 1 of 3 */
158
+ INSERT INTO visits_intermediate (id, user_id, ip, created_at)
159
+ SELECT id, user_id, ip, created_at FROM visits
160
+ WHERE id > 0 AND id <= 10000 AND created_at >= '2016-08-01'::date AND created_at < '2016-11-01'::date
147
161
 
148
- CREATE INDEX ON locations_20160423 USING btree (updated_at, shopper_id);
162
+ /* 2 of 3 */
163
+ INSERT INTO visits_intermediate (id, user_id, ip, created_at)
164
+ SELECT id, user_id, ip, created_at FROM visits
165
+ WHERE id > 10000 AND id <= 20000 AND created_at >= '2016-08-01'::date AND created_at < '2016-11-01'::date
149
166
 
150
- CREATE TABLE locations_20160424
151
- (CHECK (created_at >= '2016-04-24'::date AND created_at < '2016-04-25'::date))
152
- INHERITS (locations_intermediate);
167
+ /* 3 of 3 */
168
+ INSERT INTO visits_intermediate (id, user_id, ip, created_at)
169
+ SELECT id, user_id, ip, created_at FROM visits
170
+ WHERE id > 20000 AND id <= 30000 AND created_at >= '2016-08-01'::date AND created_at < '2016-11-01'::date
171
+ ```
153
172
 
154
- ALTER TABLE locations_20160424 ADD PRIMARY KEY (id);
173
+ ```sh
174
+ pgslice swap visits
175
+ ```
155
176
 
156
- CREATE INDEX ON locations_20160424 USING btree (updated_at, shopper_id);
177
+ ```sql
178
+ BEGIN;
157
179
 
158
- CREATE TABLE locations_20160425
159
- (CHECK (created_at >= '2016-04-25'::date AND created_at < '2016-04-26'::date))
160
- INHERITS (locations_intermediate);
180
+ ALTER TABLE visits RENAME TO visits_retired;
161
181
 
162
- ALTER TABLE locations_20160425 ADD PRIMARY KEY (id);
182
+ ALTER TABLE visits_intermediate RENAME TO visits;
163
183
 
164
- CREATE INDEX ON locations_20160425 USING btree (updated_at, shopper_id);
184
+ ALTER SEQUENCE visits_id_seq OWNED BY visits.id;
165
185
 
166
186
  COMMIT;
167
187
  ```
168
188
 
169
- ```console
170
- $ pgslice fill locations
171
- /* 1 of 3 */
172
- INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
173
- SELECT id, latitude, longitude, created_at FROM locations
174
- WHERE id > 0 AND id <= 10000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
189
+ ## Adding Partitions
175
190
 
176
- /* 2 of 3 */
177
- INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
178
- SELECT id, latitude, longitude, created_at FROM locations
179
- WHERE id > 10000 AND id <= 20000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
191
+ To add partitions, use:
180
192
 
181
- /* 3 of 3 */
182
- INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
183
- SELECT id, latitude, longitude, created_at FROM locations
184
- WHERE id > 20000 AND id <= 30000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
193
+ ```sh
194
+ pgslice add_partitions <table> --future 3
185
195
  ```
186
196
 
187
- ```console
188
- $ pgslice swap locations
189
- BEGIN;
197
+ Add this as a cron job to create a new partition each day or month.
190
198
 
191
- ALTER TABLE locations RENAME TO locations_retired;
199
+ ```sh
200
+ # day
201
+ 0 0 * * * pgslice add_partitions <table> --future 3 --url ...
192
202
 
193
- ALTER TABLE locations_intermediate RENAME TO locations;
203
+ # month
204
+ 0 0 1 * * pgslice add_partitions <table> --future 3 --url ...
205
+ ```
194
206
 
195
- ALTER SEQUENCE locations_id_seq OWNED BY locations.id;
207
+ Add a monitor to ensure partitions are being created.
196
208
 
197
- COMMIT;
209
+ ```sql
210
+ SELECT 1 FROM
211
+ pg_catalog.pg_class c
212
+ INNER JOIN
213
+ pg_catalog.pg_namespace n ON n.oid = c.relnamespace
214
+ WHERE
215
+ c.relkind = 'r' AND
216
+ n.nspname = 'public' AND
217
+ c.relname = '<table>_' || to_char(NOW() + INTERVAL '3 days', 'YYYYMMDD')
218
+ -- for months, use to_char(NOW() + INTERVAL '3 months', 'YYYYMM')
198
219
  ```
199
220
 
200
- ```console
201
- $ pgslice add_partitions locations --future 2
202
- BEGIN;
221
+ ## Additional Commands
203
222
 
204
- CREATE TABLE locations_20160426
205
- (CHECK (created_at >= '2016-04-26'::date AND created_at < '2016-04-27'::date))
206
- INHERITS (locations);
223
+ To undo prep (which will delete partitions), use:
207
224
 
208
- ALTER TABLE locations_20160426 ADD PRIMARY KEY (id);
225
+ ```sh
226
+ pgslice unprep <table>
227
+ ```
209
228
 
210
- CREATE INDEX ON locations_20160426 USING btree (updated_at, shopper_id);
229
+ To undo swap, use:
211
230
 
212
- COMMIT;
231
+ ```sh
232
+ pgslice unswap <table>
233
+ ```
234
+
235
+ ## App Changes
236
+
237
+ This set up allows you to read and write with the original table name with no knowledge it’s partitioned. However, there are a few things to be aware of.
238
+
239
+ ### Reads
240
+
241
+ When possible, queries should include the column you partition on to limit the number of partitions the database needs to check. For instance, if you partition on `created_at`, try to include it in queries:
242
+
243
+ ```sql
244
+ SELECT * FROM
245
+ visits
246
+ WHERE
247
+ user_id = 123 AND
248
+ -- for performance
249
+ created_at >= '2016-09-01' AND created_at < '2016-09-02'
250
+ ```
251
+
252
+ For this to be effective, ensure `constraint_exclusion` is set to `partition` (default value) or `on`.
253
+
254
+ ```sql
255
+ SHOW constraint_exclusion;
213
256
  ```
214
257
 
258
+ ### Writes
259
+
260
+ If you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record back. If you need this, you can either:
261
+
262
+ 1. Insert directly into the partition
263
+ 2. Get the value after the insert with `SELECT CURRVAL('sequence_name')`
264
+
215
265
  ## One Off Tasks
216
266
 
217
- You can also reduce the size of a table without partitioning.
267
+ You can also use pgslice to reduce the size of a table without partitioning by creating a new table, filling it with a subset of records, and swapping it in.
218
268
 
219
269
  ```sh
220
270
  pgslice prep <table> --no-partition
@@ -237,12 +287,24 @@ gem install specific_install
237
287
  gem specific_install ankane/pgslice
238
288
  ```
239
289
 
290
+ ## Reference
291
+
292
+ - [PostgreSQL Manual](https://www.postgresql.org/docs/current/static/ddl-partitioning.html)
293
+ - [PostgreSQL Wiki](https://wiki.postgresql.org/wiki/Table_partitioning)
294
+
240
295
  ## TODO
241
296
 
242
297
  - Command to sync index changes with partitions
243
298
  - Disable indexing for faster `fill`
244
299
  - ETA for `fill`
245
300
 
301
+ ## Related Projects
302
+
303
+ Also check out:
304
+
305
+ - [PgHero](https://github.com/ankane/pghero) - A performance dashboard for Postgres
306
+ - [pgsync](https://github.com/ankane/pgsync) - Sync Postgres data to your local machine
307
+
246
308
  ## Contributing
247
309
 
248
310
  Everyone is encouraged to help improve this project. Here are a few ways you can help:
data/lib/pgslice.rb CHANGED
@@ -79,8 +79,7 @@ CREATE TABLE #{intermediate_table} (LIKE #{table} INCLUDING ALL);
79
79
  CREATE FUNCTION #{trigger_name}()
80
80
  RETURNS trigger AS $$
81
81
  BEGIN
82
- EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
83
- RETURN NULL;
82
+ RAISE EXCEPTION 'Date out of range. Create partitions first.';
84
83
  END;
85
84
  $$ LANGUAGE plpgsql;
86
85
  SQL
@@ -90,6 +89,10 @@ CREATE TRIGGER #{trigger_name}
90
89
  BEFORE INSERT ON #{intermediate_table}
91
90
  FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
92
91
  SQL
92
+
93
+ queries << <<-SQL
94
+ COMMENT ON TRIGGER #{trigger_name} ON #{intermediate_table} is 'column:#{column},period:#{period}';
95
+ SQL
93
96
  end
94
97
 
95
98
  run_queries(queries)
@@ -130,14 +133,25 @@ CREATE TRIGGER #{trigger_name}
130
133
 
131
134
  queries = []
132
135
 
133
- period, field = settings_from_table(original_table, table)
136
+ comment = execute("SELECT obj_description(oid, 'pg_trigger') AS comment FROM pg_trigger WHERE tgname = $1 AND tgrelid = $2::regclass", [trigger_name, table]).first
137
+ if comment
138
+ field, period = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil]
139
+ end
140
+
141
+ unless period
142
+ period, field = settings_from_table(original_table, table)
143
+ queries << "COMMENT ON TRIGGER #{trigger_name} ON #{table} is 'column:#{field},period:#{period}';"
144
+ end
134
145
  abort "Could not read settings" unless period
135
- today = round_date(Date.today, period)
146
+ # today = utc date
147
+ today = round_date(DateTime.now.new_offset(0).to_date, period)
148
+ added_partitions = []
136
149
  range.each do |n|
137
150
  day = advance_date(today, period, n)
138
151
 
139
152
  partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
140
153
  next if table_exists?(partition_name)
154
+ added_partitions << partition_name
141
155
 
142
156
  queries << <<-SQL
143
157
  CREATE TABLE #{partition_name}
@@ -152,6 +166,48 @@ CREATE TABLE #{partition_name}
152
166
  end
153
167
  end
154
168
 
169
+ # update trigger based on existing partitions
170
+ current_defs = []
171
+ future_defs = []
172
+ past_defs = []
173
+ name_format = self.name_format(period)
174
+ existing_tables = self.existing_tables(like: "#{original_table}_%").select { |t| /#{Regexp.escape("#{original_table}_")}(\d{4,6})/.match(t) }
175
+ existing_tables = (existing_tables + added_partitions).uniq.sort
176
+
177
+ existing_tables.each do |table|
178
+ day = DateTime.strptime(table.split("_").last, name_format)
179
+ partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
180
+
181
+ sql = "(NEW.#{field} >= #{sql_date(day)} AND NEW.#{field} < #{sql_date(advance_date(day, period, 1))}) THEN
182
+ INSERT INTO #{partition_name} VALUES (NEW.*);"
183
+
184
+ if day.to_date < today
185
+ past_defs << sql
186
+ elsif advance_date(day, period, 1) < today
187
+ current_defs << sql
188
+ else
189
+ future_defs << sql
190
+ end
191
+ end
192
+
193
+ # order by current period, future periods asc, past periods desc
194
+ trigger_defs = current_defs + future_defs + past_defs.reverse
195
+
196
+ if trigger_defs.any?
197
+ queries << <<-SQL
198
+ CREATE OR REPLACE FUNCTION #{trigger_name}()
199
+ RETURNS trigger AS $$
200
+ BEGIN
201
+ IF #{trigger_defs.join("\n ELSIF ")}
202
+ ELSE
203
+ RAISE EXCEPTION 'Date out of range. Ensure partitions are created.';
204
+ END IF;
205
+ RETURN NULL;
206
+ END;
207
+ $$ LANGUAGE plpgsql;
208
+ SQL
209
+ end
210
+
155
211
  run_queries(queries) if queries.any?
156
212
  end
157
213
 
@@ -1,3 +1,3 @@
1
1
  module PgSlice
2
- VERSION = "0.1.7"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgslice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-09-15 00:00:00.000000000 Z
11
+ date: 2016-09-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop