pgslice 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Dockerfile +3 -0
- data/README.md +17 -8
- data/lib/pgslice.rb +49 -20
- data/lib/pgslice/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1fbf523156daf638be0cf5de6a77c4c2ed4375ab
|
4
|
+
data.tar.gz: fa1839b58db3046ab8e4705a410c7c7e3d99a86a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 697401d7e40ad0db4f7be59d2a0e711b8f1e57c7c3efdf8874d4ed29f915bf5e98bc967e6f6199990a53853796e214f21cfbfe0961bd504defa8143124f0163a
|
7
|
+
data.tar.gz: bc09c5daebc448a40aace451e80733a15f5d1ef6443e7f3fb7a5d7d82f3fee444b4e918c9b08dd6f80ec82a956ab5882a11d5997438025e4d394a0fe0b5b89b3
|
data/CHANGELOG.md
CHANGED
data/Dockerfile
ADDED
data/README.md
CHANGED
@@ -254,10 +254,17 @@ To undo swap, use:
|
|
254
254
|
pgslice unswap <table>
|
255
255
|
```
|
256
256
|
|
257
|
-
## App
|
257
|
+
## App Considerations
|
258
258
|
|
259
259
|
This set up allows you to read and write with the original table name with no knowledge it’s partitioned. However, there are a few things to be aware of.
|
260
260
|
|
261
|
+
### Writes
|
262
|
+
|
263
|
+
If you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
|
264
|
+
|
265
|
+
1. Insert directly into the partition
|
266
|
+
2. Get the value after the insert with `SELECT CURRVAL('sequence_name')`
|
267
|
+
|
261
268
|
### Reads
|
262
269
|
|
263
270
|
When possible, queries should include the column you partition on to limit the number of partitions the database needs to check. For instance, if you partition on `created_at`, try to include it in queries:
|
@@ -277,13 +284,6 @@ For this to be effective, ensure `constraint_exclusion` is set to `partition` (d
|
|
277
284
|
SHOW constraint_exclusion;
|
278
285
|
```
|
279
286
|
|
280
|
-
### Writes
|
281
|
-
|
282
|
-
If you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record back. If you need this, you can either:
|
283
|
-
|
284
|
-
1. Insert directly into the partition
|
285
|
-
2. Get the value after the insert with `SELECT CURRVAL('sequence_name')`
|
286
|
-
|
287
287
|
## One Off Tasks
|
288
288
|
|
289
289
|
You can also use pgslice to reduce the size of a table without partitioning by creating a new table, filling it with a subset of records, and swapping it in.
|
@@ -309,6 +309,15 @@ gem install specific_install
|
|
309
309
|
gem specific_install ankane/pgslice
|
310
310
|
```
|
311
311
|
|
312
|
+
## Docker
|
313
|
+
|
314
|
+
```sh
|
315
|
+
docker build -t pgslice .
|
316
|
+
alias pgslice="docker run --rm -e PGSLICE_URL pgslice"
|
317
|
+
```
|
318
|
+
|
319
|
+
This will give you the `pgslice` command.
|
320
|
+
|
312
321
|
## Reference
|
313
322
|
|
314
323
|
- [PostgreSQL Manual](https://www.postgresql.org/docs/current/static/ddl-partitioning.html)
|
data/lib/pgslice.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "pgslice/version"
|
2
2
|
require "slop"
|
3
3
|
require "pg"
|
4
|
+
require "cgi"
|
4
5
|
|
5
6
|
module PgSlice
|
6
7
|
class Error < StandardError; end
|
@@ -51,6 +52,7 @@ module PgSlice
|
|
51
52
|
|
52
53
|
def prep
|
53
54
|
table, column, period = arguments
|
55
|
+
cast = column_cast(table, column)
|
54
56
|
intermediate_table = "#{table}_intermediate"
|
55
57
|
trigger_name = self.trigger_name(table)
|
56
58
|
|
@@ -91,7 +93,7 @@ CREATE TRIGGER #{trigger_name}
|
|
91
93
|
SQL
|
92
94
|
|
93
95
|
queries << <<-SQL
|
94
|
-
COMMENT ON TRIGGER #{trigger_name} ON #{intermediate_table} is 'column:#{column},period:#{period}';
|
96
|
+
COMMENT ON TRIGGER #{trigger_name} ON #{intermediate_table} is 'column:#{column},period:#{period},cast:#{cast}';
|
95
97
|
SQL
|
96
98
|
end
|
97
99
|
|
@@ -133,11 +135,11 @@ SQL
|
|
133
135
|
|
134
136
|
queries = []
|
135
137
|
|
136
|
-
period, field, needs_comment = settings_from_trigger(original_table, table)
|
138
|
+
period, field, cast, needs_comment = settings_from_trigger(original_table, table)
|
137
139
|
abort "Could not read settings" unless period
|
138
140
|
|
139
141
|
if needs_comment
|
140
|
-
queries << "COMMENT ON TRIGGER #{trigger_name} ON #{table} is 'column:#{field},period:#{period}';"
|
142
|
+
queries << "COMMENT ON TRIGGER #{trigger_name} ON #{table} is 'column:#{field},period:#{period},cast:#{cast}';"
|
141
143
|
end
|
142
144
|
|
143
145
|
# today = utc date
|
@@ -152,7 +154,7 @@ SQL
|
|
152
154
|
|
153
155
|
queries << <<-SQL
|
154
156
|
CREATE TABLE #{partition_name}
|
155
|
-
(CHECK (#{field} >= #{sql_date(day)} AND #{field} < #{sql_date(advance_date(day, period, 1))}))
|
157
|
+
(CHECK (#{field} >= #{sql_date(day, cast)} AND #{field} < #{sql_date(advance_date(day, period, 1), cast)}))
|
156
158
|
INHERITS (#{table});
|
157
159
|
SQL
|
158
160
|
|
@@ -175,7 +177,7 @@ CREATE TABLE #{partition_name}
|
|
175
177
|
day = DateTime.strptime(table.split("_").last, name_format)
|
176
178
|
partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
|
177
179
|
|
178
|
-
sql = "(NEW.#{field} >= #{sql_date(day)} AND NEW.#{field} < #{sql_date(advance_date(day, period, 1))}) THEN
|
180
|
+
sql = "(NEW.#{field} >= #{sql_date(day, cast)} AND NEW.#{field} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
|
179
181
|
INSERT INTO #{partition_name} VALUES (NEW.*);"
|
180
182
|
|
181
183
|
if day.to_date < today
|
@@ -227,7 +229,7 @@ CREATE OR REPLACE FUNCTION #{trigger_name}()
|
|
227
229
|
abort "Table not found: #{source_table}" unless table_exists?(source_table)
|
228
230
|
abort "Table not found: #{dest_table}" unless table_exists?(dest_table)
|
229
231
|
|
230
|
-
period, field, needs_comment = settings_from_trigger(table, dest_table)
|
232
|
+
period, field, cast, needs_comment = settings_from_trigger(table, dest_table)
|
231
233
|
|
232
234
|
if period
|
233
235
|
name_format = self.name_format(period)
|
@@ -252,7 +254,7 @@ CREATE OR REPLACE FUNCTION #{trigger_name}()
|
|
252
254
|
if options[:start]
|
253
255
|
max_dest_id = options[:start]
|
254
256
|
else
|
255
|
-
min_source_id = min_id(source_table, primary_key, field, starting_time, options[:where])
|
257
|
+
min_source_id = min_id(source_table, primary_key, field, cast, starting_time, options[:where])
|
256
258
|
max_dest_id = min_source_id - 1 if min_source_id
|
257
259
|
end
|
258
260
|
end
|
@@ -260,13 +262,14 @@ CREATE OR REPLACE FUNCTION #{trigger_name}()
|
|
260
262
|
starting_id = max_dest_id
|
261
263
|
fields = columns(source_table).map { |c| PG::Connection.quote_ident(c) }.join(", ")
|
262
264
|
batch_size = options[:batch_size]
|
265
|
+
cast = column_cast(table, field)
|
263
266
|
|
264
267
|
i = 1
|
265
268
|
batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
|
266
269
|
while starting_id < max_source_id
|
267
270
|
where = "#{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size}"
|
268
271
|
if starting_time
|
269
|
-
where << " AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}"
|
272
|
+
where << " AND #{field} >= #{sql_date(starting_time, cast)} AND #{field} < #{sql_date(ending_time, cast)}"
|
270
273
|
end
|
271
274
|
if options[:where]
|
272
275
|
where << " AND #{options[:where]}"
|
@@ -398,8 +401,16 @@ INSERT INTO #{dest_table} (#{fields})
|
|
398
401
|
connect_timeout: 1
|
399
402
|
}.reject { |_, value| value.to_s.empty? }
|
400
403
|
config.map { |key, value| config[key] = uri_parser.unescape(value) if value.is_a?(String) }
|
404
|
+
@schema = CGI.parse(uri.query.to_s)["schema"][0] || "public"
|
401
405
|
PG::Connection.new(config)
|
402
406
|
end
|
407
|
+
rescue PG::ConnectionBad => e
|
408
|
+
abort e.message
|
409
|
+
end
|
410
|
+
|
411
|
+
def schema
|
412
|
+
connection # ensure called first
|
413
|
+
@schema
|
403
414
|
end
|
404
415
|
|
405
416
|
def execute(query, params = [])
|
@@ -432,7 +443,7 @@ INSERT INTO #{dest_table} (#{fields})
|
|
432
443
|
|
433
444
|
def existing_tables(like:)
|
434
445
|
query = "SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname = $1 AND tablename LIKE $2"
|
435
|
-
execute(query, [
|
446
|
+
execute(query, [schema, like]).map { |r| r["tablename"] }.sort
|
436
447
|
end
|
437
448
|
|
438
449
|
def table_exists?(table)
|
@@ -440,7 +451,7 @@ INSERT INTO #{dest_table} (#{fields})
|
|
440
451
|
end
|
441
452
|
|
442
453
|
def columns(table)
|
443
|
-
execute("SELECT column_name FROM information_schema.columns WHERE table_schema =
|
454
|
+
execute("SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2", [schema, table]).map{ |r| r["column_name"] }
|
444
455
|
end
|
445
456
|
|
446
457
|
# http://stackoverflow.com/a/20537829
|
@@ -460,7 +471,7 @@ INSERT INTO #{dest_table} (#{fields})
|
|
460
471
|
pg_attribute.attnum = any(pg_index.indkey) AND
|
461
472
|
indisprimary
|
462
473
|
SQL
|
463
|
-
row = execute(query, [
|
474
|
+
row = execute(query, [schema, table])[0]
|
464
475
|
row && row["attname"]
|
465
476
|
end
|
466
477
|
|
@@ -473,10 +484,11 @@ INSERT INTO #{dest_table} (#{fields})
|
|
473
484
|
execute(query)[0]["max"].to_i
|
474
485
|
end
|
475
486
|
|
476
|
-
def min_id(table, primary_key, column, starting_time, where)
|
487
|
+
def min_id(table, primary_key, column, cast, starting_time, where)
|
488
|
+
cast = column_cast(table, column)
|
477
489
|
query = "SELECT MIN(#{primary_key}) FROM #{table}"
|
478
490
|
conditions = []
|
479
|
-
conditions << "#{column} >= #{sql_date(starting_time)}" if starting_time
|
491
|
+
conditions << "#{column} >= #{sql_date(starting_time, cast)}" if starting_time
|
480
492
|
conditions << where if where
|
481
493
|
query << " WHERE #{conditions.join(" AND ")}" if conditions.any?
|
482
494
|
(execute(query)[0]["min"] || 1).to_i
|
@@ -498,10 +510,10 @@ INSERT INTO #{dest_table} (#{fields})
|
|
498
510
|
JOIN pg_attribute a ON (d.refobjid, d.refobjsubid) = (a.attrelid, a.attnum)
|
499
511
|
JOIN pg_namespace n ON n.oid = s.relnamespace
|
500
512
|
WHERE s.relkind = 'S'
|
501
|
-
AND n.nspname =
|
502
|
-
AND t.relname = $
|
513
|
+
AND n.nspname = $1
|
514
|
+
AND t.relname = $2
|
503
515
|
SQL
|
504
|
-
execute(query, [table])
|
516
|
+
execute(query, [schema, table])
|
505
517
|
end
|
506
518
|
|
507
519
|
# helpers
|
@@ -518,8 +530,18 @@ INSERT INTO #{dest_table} (#{fields})
|
|
518
530
|
"#{table}_retired"
|
519
531
|
end
|
520
532
|
|
521
|
-
def
|
522
|
-
"
|
533
|
+
def column_cast(table, column)
|
534
|
+
data_type = execute("SELECT data_type FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2 AND column_name = $3", [schema, table, column])[0]["data_type"]
|
535
|
+
data_type == "timestamp with time zone" ? "timestamptz" : "date"
|
536
|
+
end
|
537
|
+
|
538
|
+
def sql_date(time, cast)
|
539
|
+
if cast == "timestamptz"
|
540
|
+
fmt = "%Y-%m-%d %H:%M:%S UTC"
|
541
|
+
else
|
542
|
+
fmt = "%Y-%m-%d"
|
543
|
+
end
|
544
|
+
"'#{time.strftime(fmt)}'::#{cast}"
|
523
545
|
end
|
524
546
|
|
525
547
|
def name_format(period)
|
@@ -557,7 +579,7 @@ INSERT INTO #{dest_table} (#{fields})
|
|
557
579
|
needs_comment = false
|
558
580
|
comment = execute("SELECT obj_description(oid, 'pg_trigger') AS comment FROM pg_trigger WHERE tgname = $1 AND tgrelid = $2::regclass", [trigger_name, table])[0]
|
559
581
|
if comment
|
560
|
-
field, period = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil]
|
582
|
+
field, period, cast = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil, nil]
|
561
583
|
end
|
562
584
|
|
563
585
|
unless period
|
@@ -571,7 +593,14 @@ INSERT INTO #{dest_table} (#{fields})
|
|
571
593
|
field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
|
572
594
|
end
|
573
595
|
|
574
|
-
|
596
|
+
# backwards compatibility with 0.2.3 and earlier (pre-timestamptz support)
|
597
|
+
unless cast
|
598
|
+
cast = "date"
|
599
|
+
# update comment to explicitly define cast
|
600
|
+
needs_comment = true
|
601
|
+
end
|
602
|
+
|
603
|
+
[period, field, cast, needs_comment]
|
575
604
|
end
|
576
605
|
end
|
577
606
|
end
|
data/lib/pgslice/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgslice
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: slop
|
@@ -76,6 +76,7 @@ extra_rdoc_files: []
|
|
76
76
|
files:
|
77
77
|
- ".gitignore"
|
78
78
|
- CHANGELOG.md
|
79
|
+
- Dockerfile
|
79
80
|
- Gemfile
|
80
81
|
- README.md
|
81
82
|
- Rakefile
|