pgslice 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9a07db7bfb2579474dfc22c97f654eeb0753c67f
4
- data.tar.gz: 31147f793f011cee930c989d40429087f9ceab4a
3
+ metadata.gz: 10c9e21963650fa68dd402f3c690ab0b89300bfb
4
+ data.tar.gz: 5fe1cea0ef611008c95dbf06eae19e766dbf679e
5
5
  SHA512:
6
- metadata.gz: a474711ac629c19dca856e11b028e89a5d2b38732aa451b2fe531628b2c54c74b6bef628c0c279bbf0b88ffc5d0ce53e25705e3f43bc959999c50c85f5cf26a4
7
- data.tar.gz: a2e83881771c76b3ac4676eea176f64e0384534f2a0d71f4c00c412da2b21945ca274c78caa2265e5bf1df84df05fb95a890b6a76a054e653aecc47eafa758a3
6
+ metadata.gz: 6f879dec8f413fd90eed3f911aacaedf5dda87036d490cab18d14fb1447439114a2359db4f15a95e3f713bcd0bf2bcad10f65c96c69537b83ed4172820b2d582
7
+ data.tar.gz: 74189c73cdd2e6cf2549b31524c9e64103719d78135708bc2d992a118a0413a1b6d63cc4dc22659427717938fde3be5b8caab53d17af6d3088628697cc94256e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.0
2
+
3
+ - Switched to new trigger, which is about 20% faster
4
+
1
5
  ## 0.1.7
2
6
 
3
7
  - Added `--source-table` option to `fill`
data/README.md CHANGED
@@ -1,24 +1,30 @@
1
1
  # pgslice
2
2
 
3
- Postgres partitioning as easy as pie
3
+ Postgres partitioning as easy as pie. Works great for both new and existing tables, with zero downtime and minimal app changes.
4
+
5
+ :tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
4
6
 
5
7
  ## Install
6
8
 
7
- Run:
9
+ pgslice is a command line tool. To install, run:
8
10
 
9
11
  ```sh
10
12
  gem install pgslice
11
13
  ```
12
14
 
15
+ This will give you the `pgslice` command.
16
+
13
17
  ## Steps
14
18
 
15
- 1. Specify your database credentials
19
+ 1. Ensure the table you want to partition has been created. We’ll refer to this as `<table>`.
20
+
21
+ 2. Specify your database credentials
16
22
 
17
23
  ```sh
18
24
  export PGSLICE_URL=postgres://localhost/myapp_development
19
25
  ```
20
26
 
21
- 2. Create an intermediate table
27
+ 3. Create an intermediate table
22
28
 
23
29
  ```sh
24
30
  pgslice prep <table> <column> <period>
@@ -28,7 +34,7 @@ gem install pgslice
28
34
 
29
35
  This creates a table named `<table>_intermediate` with the appropriate trigger for partitioning.
30
36
 
31
- 3. Add partitions
37
+ 4. Add partitions
32
38
 
33
39
  ```sh
34
40
  pgslice add_partitions <table> --intermediate --past 3 --future 3
@@ -38,7 +44,7 @@ gem install pgslice
38
44
 
39
45
  Use the `--past` and `--future` options to control the number of partitions.
40
46
 
41
- 4. *Optional, for tables with data* - Fill the partitions in batches with data from the original table
47
+ 5. *Optional, for tables with data* - Fill the partitions in batches with data from the original table
42
48
 
43
49
  ```sh
44
50
  pgslice fill <table>
@@ -48,7 +54,7 @@ gem install pgslice
48
54
 
49
55
  To sync data across different databases, check out [pgsync](https://github.com/ankane/pgsync).
50
56
 
51
- 5. Swap the intermediate table with the original table
57
+ 6. Swap the intermediate table with the original table
52
58
 
53
59
  ```sh
54
60
  pgslice swap <table>
@@ -56,165 +62,209 @@ gem install pgslice
56
62
 
57
63
  The original table is renamed `<table>_retired` and the intermediate table is renamed `<table>`.
58
64
 
59
- 6. Fill the rest
65
+ 7. Fill the rest (rows inserted between the first fill and the swap)
60
66
 
61
67
  ```sh
62
68
  pgslice fill <table> --swapped
63
69
  ```
64
70
 
65
- 7. Archive and drop the original table
71
+ 8. Archive and drop the original table
66
72
 
67
- ## Adding Partitions
73
+ ## Sample Output
68
74
 
69
- To add partitions, use:
75
+ pgslice prints the SQL commands that were executed on the server. To print without executing, use the `--dry-run` option.
70
76
 
71
77
  ```sh
72
- pgslice add_partitions <table> --future 3
78
+ pgslice prep visits created_at month
73
79
  ```
74
80
 
75
- Add this as a cron job to create a new partition each day or month.
81
+ ```sql
82
+ BEGIN;
76
83
 
77
- ```
78
- # day
79
- 0 0 * * * pgslice add_partitions <table> --future 3 --url ...
84
+ CREATE TABLE visits_intermediate (LIKE visits INCLUDING ALL);
80
85
 
81
- # month
82
- 0 0 1 * * pgslice add_partitions <table> --future 3 --url ...
86
+ CREATE FUNCTION visits_insert_trigger()
87
+ RETURNS trigger AS $$
88
+ BEGIN
89
+ RAISE EXCEPTION 'Date out of range. Create partitions first.';
90
+ END;
91
+ $$ LANGUAGE plpgsql;
92
+
93
+ CREATE TRIGGER visits_insert_trigger
94
+ BEFORE INSERT ON visits_intermediate
95
+ FOR EACH ROW EXECUTE PROCEDURE visits_insert_trigger();
96
+
97
+ COMMENT ON TRIGGER visits_insert_trigger ON visits_intermediate is 'column:created_at,period:month';
98
+
99
+ COMMIT;
83
100
  ```
84
101
 
85
- Add a monitor to ensure partitions are being created.
102
+ ```sh
103
+ pgslice add_partitions visits --intermediate --past 1 --future 1
104
+ ```
86
105
 
87
106
  ```sql
88
- SELECT 1 FROM
89
- pg_catalog.pg_class c
90
- INNER JOIN
91
- pg_catalog.pg_namespace n ON n.oid = c.relnamespace
92
- WHERE
93
- c.relkind = 'r' AND
94
- n.nspname = 'public' AND
95
- c.relname = '<table>_' || to_char(NOW() + INTERVAL '3 days', 'YYYYMMDD')
96
- -- for months, use to_char(NOW() + INTERVAL '3 months', 'YYYYMM')
97
- ```
107
+ BEGIN;
98
108
 
99
- ## Additional Commands
109
+ CREATE TABLE visits_201608
110
+ (CHECK (created_at >= '2016-08-01'::date AND created_at < '2016-09-01'::date))
111
+ INHERITS (visits_intermediate);
100
112
 
101
- To undo prep (which will delete partitions), use:
113
+ ALTER TABLE visits_201608 ADD PRIMARY KEY (id);
102
114
 
103
- ```sh
104
- pgslice unprep <table>
105
- ```
115
+ CREATE INDEX ON visits_201608 USING btree (user_id);
106
116
 
107
- To undo swap, use:
117
+ CREATE TABLE visits_201609
118
+ (CHECK (created_at >= '2016-09-01'::date AND created_at < '2016-10-01'::date))
119
+ INHERITS (visits_intermediate);
108
120
 
109
- ```sh
110
- pgslice unswap <table>
111
- ```
121
+ ALTER TABLE visits_201609 ADD PRIMARY KEY (id);
112
122
 
113
- ## Sample Output
123
+ CREATE INDEX ON visits_201609 USING btree (user_id);
114
124
 
115
- `pgslice` prints the SQL commands that were executed on the server. To print without executing, use the `--dry-run` option.
125
+ CREATE TABLE visits_201610
126
+ (CHECK (created_at >= '2016-10-01'::date AND created_at < '2016-11-01'::date))
127
+ INHERITS (visits_intermediate);
116
128
 
117
- ```console
118
- $ pgslice prep locations created_at day
119
- BEGIN;
129
+ ALTER TABLE visits_201610 ADD PRIMARY KEY (id);
120
130
 
121
- CREATE TABLE locations_intermediate (LIKE locations INCLUDING ALL);
131
+ CREATE INDEX ON visits_201610 USING btree (user_id);
122
132
 
123
- CREATE FUNCTION locations_insert_trigger()
133
+ CREATE OR REPLACE FUNCTION visits_insert_trigger()
124
134
  RETURNS trigger AS $$
125
135
  BEGIN
126
- EXECUTE 'INSERT INTO locations_' || to_char(NEW.created_at, 'YYYYMMDD') || ' VALUES ($1.*)' USING NEW;
136
+ IF (NEW.created_at >= '2016-09-01'::date AND NEW.created_at < '2016-10-01'::date) THEN
137
+ INSERT INTO visits_201609 VALUES (NEW.*);
138
+ ELSIF (NEW.created_at >= '2016-10-01'::date AND NEW.created_at < '2016-11-01'::date) THEN
139
+ INSERT INTO visits_201610 VALUES (NEW.*);
140
+ ELSIF (NEW.created_at >= '2016-08-01'::date AND NEW.created_at < '2016-09-01'::date) THEN
141
+ INSERT INTO visits_201608 VALUES (NEW.*);
142
+ ELSE
143
+ RAISE EXCEPTION 'Date out of range. Ensure partitions are created.';
144
+ END IF;
127
145
  RETURN NULL;
128
146
  END;
129
147
  $$ LANGUAGE plpgsql;
130
148
 
131
- CREATE TRIGGER locations_insert_trigger
132
- BEFORE INSERT ON locations_intermediate
133
- FOR EACH ROW EXECUTE PROCEDURE locations_insert_trigger();
134
-
135
149
  COMMIT;
136
150
  ```
137
151
 
138
- ```console
139
- $ pgslice add_partitions locations --intermediate --past 1 --future 1
140
- BEGIN;
141
-
142
- CREATE TABLE locations_20160423
143
- (CHECK (created_at >= '2016-04-23'::date AND created_at < '2016-04-24'::date))
144
- INHERITS (locations_intermediate);
152
+ ```sh
153
+ pgslice fill visits
154
+ ```
145
155
 
146
- ALTER TABLE locations_20160423 ADD PRIMARY KEY (id);
156
+ ```sql
157
+ /* 1 of 3 */
158
+ INSERT INTO visits_intermediate (id, user_id, ip, created_at)
159
+ SELECT id, user_id, ip, created_at FROM visits
160
+ WHERE id > 0 AND id <= 10000 AND created_at >= '2016-08-01'::date AND created_at < '2016-11-01'::date
147
161
 
148
- CREATE INDEX ON locations_20160423 USING btree (updated_at, shopper_id);
162
+ /* 2 of 3 */
163
+ INSERT INTO visits_intermediate (id, user_id, ip, created_at)
164
+ SELECT id, user_id, ip, created_at FROM visits
165
+ WHERE id > 10000 AND id <= 20000 AND created_at >= '2016-08-01'::date AND created_at < '2016-11-01'::date
149
166
 
150
- CREATE TABLE locations_20160424
151
- (CHECK (created_at >= '2016-04-24'::date AND created_at < '2016-04-25'::date))
152
- INHERITS (locations_intermediate);
167
+ /* 3 of 3 */
168
+ INSERT INTO visits_intermediate (id, user_id, ip, created_at)
169
+ SELECT id, user_id, ip, created_at FROM visits
170
+ WHERE id > 20000 AND id <= 30000 AND created_at >= '2016-08-01'::date AND created_at < '2016-11-01'::date
171
+ ```
153
172
 
154
- ALTER TABLE locations_20160424 ADD PRIMARY KEY (id);
173
+ ```sh
174
+ pgslice swap visits
175
+ ```
155
176
 
156
- CREATE INDEX ON locations_20160424 USING btree (updated_at, shopper_id);
177
+ ```sql
178
+ BEGIN;
157
179
 
158
- CREATE TABLE locations_20160425
159
- (CHECK (created_at >= '2016-04-25'::date AND created_at < '2016-04-26'::date))
160
- INHERITS (locations_intermediate);
180
+ ALTER TABLE visits RENAME TO visits_retired;
161
181
 
162
- ALTER TABLE locations_20160425 ADD PRIMARY KEY (id);
182
+ ALTER TABLE visits_intermediate RENAME TO visits;
163
183
 
164
- CREATE INDEX ON locations_20160425 USING btree (updated_at, shopper_id);
184
+ ALTER SEQUENCE visits_id_seq OWNED BY visits.id;
165
185
 
166
186
  COMMIT;
167
187
  ```
168
188
 
169
- ```console
170
- $ pgslice fill locations
171
- /* 1 of 3 */
172
- INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
173
- SELECT id, latitude, longitude, created_at FROM locations
174
- WHERE id > 0 AND id <= 10000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
189
+ ## Adding Partitions
175
190
 
176
- /* 2 of 3 */
177
- INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
178
- SELECT id, latitude, longitude, created_at FROM locations
179
- WHERE id > 10000 AND id <= 20000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
191
+ To add partitions, use:
180
192
 
181
- /* 3 of 3 */
182
- INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
183
- SELECT id, latitude, longitude, created_at FROM locations
184
- WHERE id > 20000 AND id <= 30000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
193
+ ```sh
194
+ pgslice add_partitions <table> --future 3
185
195
  ```
186
196
 
187
- ```console
188
- $ pgslice swap locations
189
- BEGIN;
197
+ Add this as a cron job to create a new partition each day or month.
190
198
 
191
- ALTER TABLE locations RENAME TO locations_retired;
199
+ ```sh
200
+ # day
201
+ 0 0 * * * pgslice add_partitions <table> --future 3 --url ...
192
202
 
193
- ALTER TABLE locations_intermediate RENAME TO locations;
203
+ # month
204
+ 0 0 1 * * pgslice add_partitions <table> --future 3 --url ...
205
+ ```
194
206
 
195
- ALTER SEQUENCE locations_id_seq OWNED BY locations.id;
207
+ Add a monitor to ensure partitions are being created.
196
208
 
197
- COMMIT;
209
+ ```sql
210
+ SELECT 1 FROM
211
+ pg_catalog.pg_class c
212
+ INNER JOIN
213
+ pg_catalog.pg_namespace n ON n.oid = c.relnamespace
214
+ WHERE
215
+ c.relkind = 'r' AND
216
+ n.nspname = 'public' AND
217
+ c.relname = '<table>_' || to_char(NOW() + INTERVAL '3 days', 'YYYYMMDD')
218
+ -- for months, use to_char(NOW() + INTERVAL '3 months', 'YYYYMM')
198
219
  ```
199
220
 
200
- ```console
201
- $ pgslice add_partitions locations --future 2
202
- BEGIN;
221
+ ## Additional Commands
203
222
 
204
- CREATE TABLE locations_20160426
205
- (CHECK (created_at >= '2016-04-26'::date AND created_at < '2016-04-27'::date))
206
- INHERITS (locations);
223
+ To undo prep (which will delete partitions), use:
207
224
 
208
- ALTER TABLE locations_20160426 ADD PRIMARY KEY (id);
225
+ ```sh
226
+ pgslice unprep <table>
227
+ ```
209
228
 
210
- CREATE INDEX ON locations_20160426 USING btree (updated_at, shopper_id);
229
+ To undo swap, use:
211
230
 
212
- COMMIT;
231
+ ```sh
232
+ pgslice unswap <table>
233
+ ```
234
+
235
+ ## App Changes
236
+
237
+ This set up allows you to read and write with the original table name with no knowledge it’s partitioned. However, there are a few things to be aware of.
238
+
239
+ ### Reads
240
+
241
+ When possible, queries should include the column you partition on to limit the number of partitions the database needs to check. For instance, if you partition on `created_at`, try to include it in queries:
242
+
243
+ ```sql
244
+ SELECT * FROM
245
+ visits
246
+ WHERE
247
+ user_id = 123 AND
248
+ -- for performance
249
+ created_at >= '2016-09-01' AND created_at < '2016-09-02'
250
+ ```
251
+
252
+ For this to be effective, ensure `constraint_exclusion` is set to `partition` (default value) or `on`.
253
+
254
+ ```sql
255
+ SHOW constraint_exclusion;
213
256
  ```
214
257
 
258
+ ### Writes
259
+
260
+ If you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record back. If you need this, you can either:
261
+
262
+ 1. Insert directly into the partition
263
+ 2. Get the value after the insert with `SELECT CURRVAL('sequence_name')`
264
+
215
265
  ## One Off Tasks
216
266
 
217
- You can also reduce the size of a table without partitioning.
267
+ You can also use pgslice to reduce the size of a table without partitioning by creating a new table, filling it with a subset of records, and swapping it in.
218
268
 
219
269
  ```sh
220
270
  pgslice prep <table> --no-partition
@@ -237,12 +287,24 @@ gem install specific_install
237
287
  gem specific_install ankane/pgslice
238
288
  ```
239
289
 
290
+ ## Reference
291
+
292
+ - [PostgreSQL Manual](https://www.postgresql.org/docs/current/static/ddl-partitioning.html)
293
+ - [PostgreSQL Wiki](https://wiki.postgresql.org/wiki/Table_partitioning)
294
+
240
295
  ## TODO
241
296
 
242
297
  - Command to sync index changes with partitions
243
298
  - Disable indexing for faster `fill`
244
299
  - ETA for `fill`
245
300
 
301
+ ## Related Projects
302
+
303
+ Also check out:
304
+
305
+ - [PgHero](https://github.com/ankane/pghero) - A performance dashboard for Postgres
306
+ - [pgsync](https://github.com/ankane/pgsync) - Sync Postgres data to your local machine
307
+
246
308
  ## Contributing
247
309
 
248
310
  Everyone is encouraged to help improve this project. Here are a few ways you can help:
data/lib/pgslice.rb CHANGED
@@ -79,8 +79,7 @@ CREATE TABLE #{intermediate_table} (LIKE #{table} INCLUDING ALL);
79
79
  CREATE FUNCTION #{trigger_name}()
80
80
  RETURNS trigger AS $$
81
81
  BEGIN
82
- EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
83
- RETURN NULL;
82
+ RAISE EXCEPTION 'Date out of range. Create partitions first.';
84
83
  END;
85
84
  $$ LANGUAGE plpgsql;
86
85
  SQL
@@ -90,6 +89,10 @@ CREATE TRIGGER #{trigger_name}
90
89
  BEFORE INSERT ON #{intermediate_table}
91
90
  FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
92
91
  SQL
92
+
93
+ queries << <<-SQL
94
+ COMMENT ON TRIGGER #{trigger_name} ON #{intermediate_table} is 'column:#{column},period:#{period}';
95
+ SQL
93
96
  end
94
97
 
95
98
  run_queries(queries)
@@ -130,14 +133,25 @@ CREATE TRIGGER #{trigger_name}
130
133
 
131
134
  queries = []
132
135
 
133
- period, field = settings_from_table(original_table, table)
136
+ comment = execute("SELECT obj_description(oid, 'pg_trigger') AS comment FROM pg_trigger WHERE tgname = $1 AND tgrelid = $2::regclass", [trigger_name, table]).first
137
+ if comment
138
+ field, period = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil]
139
+ end
140
+
141
+ unless period
142
+ period, field = settings_from_table(original_table, table)
143
+ queries << "COMMENT ON TRIGGER #{trigger_name} ON #{table} is 'column:#{field},period:#{period}';"
144
+ end
134
145
  abort "Could not read settings" unless period
135
- today = round_date(Date.today, period)
146
+ # today = utc date
147
+ today = round_date(DateTime.now.new_offset(0).to_date, period)
148
+ added_partitions = []
136
149
  range.each do |n|
137
150
  day = advance_date(today, period, n)
138
151
 
139
152
  partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
140
153
  next if table_exists?(partition_name)
154
+ added_partitions << partition_name
141
155
 
142
156
  queries << <<-SQL
143
157
  CREATE TABLE #{partition_name}
@@ -152,6 +166,48 @@ CREATE TABLE #{partition_name}
152
166
  end
153
167
  end
154
168
 
169
+ # update trigger based on existing partitions
170
+ current_defs = []
171
+ future_defs = []
172
+ past_defs = []
173
+ name_format = self.name_format(period)
174
+ existing_tables = self.existing_tables(like: "#{original_table}_%").select { |t| /#{Regexp.escape("#{original_table}_")}(\d{4,6})/.match(t) }
175
+ existing_tables = (existing_tables + added_partitions).uniq.sort
176
+
177
+ existing_tables.each do |table|
178
+ day = DateTime.strptime(table.split("_").last, name_format)
179
+ partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
180
+
181
+ sql = "(NEW.#{field} >= #{sql_date(day)} AND NEW.#{field} < #{sql_date(advance_date(day, period, 1))}) THEN
182
+ INSERT INTO #{partition_name} VALUES (NEW.*);"
183
+
184
+ if day.to_date < today
185
+ past_defs << sql
186
+ elsif advance_date(day, period, 1) < today
187
+ current_defs << sql
188
+ else
189
+ future_defs << sql
190
+ end
191
+ end
192
+
193
+ # order by current period, future periods asc, past periods desc
194
+ trigger_defs = current_defs + future_defs + past_defs.reverse
195
+
196
+ if trigger_defs.any?
197
+ queries << <<-SQL
198
+ CREATE OR REPLACE FUNCTION #{trigger_name}()
199
+ RETURNS trigger AS $$
200
+ BEGIN
201
+ IF #{trigger_defs.join("\n ELSIF ")}
202
+ ELSE
203
+ RAISE EXCEPTION 'Date out of range. Ensure partitions are created.';
204
+ END IF;
205
+ RETURN NULL;
206
+ END;
207
+ $$ LANGUAGE plpgsql;
208
+ SQL
209
+ end
210
+
155
211
  run_queries(queries) if queries.any?
156
212
  end
157
213
 
@@ -1,3 +1,3 @@
1
1
  module PgSlice
2
- VERSION = "0.1.7"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgslice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-09-15 00:00:00.000000000 Z
11
+ date: 2016-09-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop