pgslice 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Dockerfile +3 -0
- data/README.md +17 -8
- data/lib/pgslice.rb +49 -20
- data/lib/pgslice/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1fbf523156daf638be0cf5de6a77c4c2ed4375ab
|
4
|
+
data.tar.gz: fa1839b58db3046ab8e4705a410c7c7e3d99a86a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 697401d7e40ad0db4f7be59d2a0e711b8f1e57c7c3efdf8874d4ed29f915bf5e98bc967e6f6199990a53853796e214f21cfbfe0961bd504defa8143124f0163a
|
7
|
+
data.tar.gz: bc09c5daebc448a40aace451e80733a15f5d1ef6443e7f3fb7a5d7d82f3fee444b4e918c9b08dd6f80ec82a956ab5882a11d5997438025e4d394a0fe0b5b89b3
|
data/CHANGELOG.md
CHANGED
data/Dockerfile
ADDED
data/README.md
CHANGED
@@ -254,10 +254,17 @@ To undo swap, use:
|
|
254
254
|
pgslice unswap <table>
|
255
255
|
```
|
256
256
|
|
257
|
-
## App
|
257
|
+
## App Considerations
|
258
258
|
|
259
259
|
This set up allows you to read and write with the original table name with no knowledge it’s partitioned. However, there are a few things to be aware of.
|
260
260
|
|
261
|
+
### Writes
|
262
|
+
|
263
|
+
If you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
|
264
|
+
|
265
|
+
1. Insert directly into the partition
|
266
|
+
2. Get the value after the insert with `SELECT CURRVAL('sequence_name')`
|
267
|
+
|
261
268
|
### Reads
|
262
269
|
|
263
270
|
When possible, queries should include the column you partition on to limit the number of partitions the database needs to check. For instance, if you partition on `created_at`, try to include it in queries:
|
@@ -277,13 +284,6 @@ For this to be effective, ensure `constraint_exclusion` is set to `partition` (d
|
|
277
284
|
SHOW constraint_exclusion;
|
278
285
|
```
|
279
286
|
|
280
|
-
### Writes
|
281
|
-
|
282
|
-
If you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record back. If you need this, you can either:
|
283
|
-
|
284
|
-
1. Insert directly into the partition
|
285
|
-
2. Get the value after the insert with `SELECT CURRVAL('sequence_name')`
|
286
|
-
|
287
287
|
## One Off Tasks
|
288
288
|
|
289
289
|
You can also use pgslice to reduce the size of a table without partitioning by creating a new table, filling it with a subset of records, and swapping it in.
|
@@ -309,6 +309,15 @@ gem install specific_install
|
|
309
309
|
gem specific_install ankane/pgslice
|
310
310
|
```
|
311
311
|
|
312
|
+
## Docker
|
313
|
+
|
314
|
+
```sh
|
315
|
+
docker build -t pgslice .
|
316
|
+
alias pgslice="docker run --rm -e PGSLICE_URL pgslice"
|
317
|
+
```
|
318
|
+
|
319
|
+
This will give you the `pgslice` command.
|
320
|
+
|
312
321
|
## Reference
|
313
322
|
|
314
323
|
- [PostgreSQL Manual](https://www.postgresql.org/docs/current/static/ddl-partitioning.html)
|
data/lib/pgslice.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "pgslice/version"
|
2
2
|
require "slop"
|
3
3
|
require "pg"
|
4
|
+
require "cgi"
|
4
5
|
|
5
6
|
module PgSlice
|
6
7
|
class Error < StandardError; end
|
@@ -51,6 +52,7 @@ module PgSlice
|
|
51
52
|
|
52
53
|
def prep
|
53
54
|
table, column, period = arguments
|
55
|
+
cast = column_cast(table, column)
|
54
56
|
intermediate_table = "#{table}_intermediate"
|
55
57
|
trigger_name = self.trigger_name(table)
|
56
58
|
|
@@ -91,7 +93,7 @@ CREATE TRIGGER #{trigger_name}
|
|
91
93
|
SQL
|
92
94
|
|
93
95
|
queries << <<-SQL
|
94
|
-
COMMENT ON TRIGGER #{trigger_name} ON #{intermediate_table} is 'column:#{column},period:#{period}';
|
96
|
+
COMMENT ON TRIGGER #{trigger_name} ON #{intermediate_table} is 'column:#{column},period:#{period},cast:#{cast}';
|
95
97
|
SQL
|
96
98
|
end
|
97
99
|
|
@@ -133,11 +135,11 @@ SQL
|
|
133
135
|
|
134
136
|
queries = []
|
135
137
|
|
136
|
-
period, field, needs_comment = settings_from_trigger(original_table, table)
|
138
|
+
period, field, cast, needs_comment = settings_from_trigger(original_table, table)
|
137
139
|
abort "Could not read settings" unless period
|
138
140
|
|
139
141
|
if needs_comment
|
140
|
-
queries << "COMMENT ON TRIGGER #{trigger_name} ON #{table} is 'column:#{field},period:#{period}';"
|
142
|
+
queries << "COMMENT ON TRIGGER #{trigger_name} ON #{table} is 'column:#{field},period:#{period},cast:#{cast}';"
|
141
143
|
end
|
142
144
|
|
143
145
|
# today = utc date
|
@@ -152,7 +154,7 @@ SQL
|
|
152
154
|
|
153
155
|
queries << <<-SQL
|
154
156
|
CREATE TABLE #{partition_name}
|
155
|
-
(CHECK (#{field} >= #{sql_date(day)} AND #{field} < #{sql_date(advance_date(day, period, 1))}))
|
157
|
+
(CHECK (#{field} >= #{sql_date(day, cast)} AND #{field} < #{sql_date(advance_date(day, period, 1), cast)}))
|
156
158
|
INHERITS (#{table});
|
157
159
|
SQL
|
158
160
|
|
@@ -175,7 +177,7 @@ CREATE TABLE #{partition_name}
|
|
175
177
|
day = DateTime.strptime(table.split("_").last, name_format)
|
176
178
|
partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
|
177
179
|
|
178
|
-
sql = "(NEW.#{field} >= #{sql_date(day)} AND NEW.#{field} < #{sql_date(advance_date(day, period, 1))}) THEN
|
180
|
+
sql = "(NEW.#{field} >= #{sql_date(day, cast)} AND NEW.#{field} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
|
179
181
|
INSERT INTO #{partition_name} VALUES (NEW.*);"
|
180
182
|
|
181
183
|
if day.to_date < today
|
@@ -227,7 +229,7 @@ CREATE OR REPLACE FUNCTION #{trigger_name}()
|
|
227
229
|
abort "Table not found: #{source_table}" unless table_exists?(source_table)
|
228
230
|
abort "Table not found: #{dest_table}" unless table_exists?(dest_table)
|
229
231
|
|
230
|
-
period, field, needs_comment = settings_from_trigger(table, dest_table)
|
232
|
+
period, field, cast, needs_comment = settings_from_trigger(table, dest_table)
|
231
233
|
|
232
234
|
if period
|
233
235
|
name_format = self.name_format(period)
|
@@ -252,7 +254,7 @@ CREATE OR REPLACE FUNCTION #{trigger_name}()
|
|
252
254
|
if options[:start]
|
253
255
|
max_dest_id = options[:start]
|
254
256
|
else
|
255
|
-
min_source_id = min_id(source_table, primary_key, field, starting_time, options[:where])
|
257
|
+
min_source_id = min_id(source_table, primary_key, field, cast, starting_time, options[:where])
|
256
258
|
max_dest_id = min_source_id - 1 if min_source_id
|
257
259
|
end
|
258
260
|
end
|
@@ -260,13 +262,14 @@ CREATE OR REPLACE FUNCTION #{trigger_name}()
|
|
260
262
|
starting_id = max_dest_id
|
261
263
|
fields = columns(source_table).map { |c| PG::Connection.quote_ident(c) }.join(", ")
|
262
264
|
batch_size = options[:batch_size]
|
265
|
+
cast = column_cast(table, field)
|
263
266
|
|
264
267
|
i = 1
|
265
268
|
batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
|
266
269
|
while starting_id < max_source_id
|
267
270
|
where = "#{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size}"
|
268
271
|
if starting_time
|
269
|
-
where << " AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}"
|
272
|
+
where << " AND #{field} >= #{sql_date(starting_time, cast)} AND #{field} < #{sql_date(ending_time, cast)}"
|
270
273
|
end
|
271
274
|
if options[:where]
|
272
275
|
where << " AND #{options[:where]}"
|
@@ -398,8 +401,16 @@ INSERT INTO #{dest_table} (#{fields})
|
|
398
401
|
connect_timeout: 1
|
399
402
|
}.reject { |_, value| value.to_s.empty? }
|
400
403
|
config.map { |key, value| config[key] = uri_parser.unescape(value) if value.is_a?(String) }
|
404
|
+
@schema = CGI.parse(uri.query.to_s)["schema"][0] || "public"
|
401
405
|
PG::Connection.new(config)
|
402
406
|
end
|
407
|
+
rescue PG::ConnectionBad => e
|
408
|
+
abort e.message
|
409
|
+
end
|
410
|
+
|
411
|
+
def schema
|
412
|
+
connection # ensure called first
|
413
|
+
@schema
|
403
414
|
end
|
404
415
|
|
405
416
|
def execute(query, params = [])
|
@@ -432,7 +443,7 @@ INSERT INTO #{dest_table} (#{fields})
|
|
432
443
|
|
433
444
|
def existing_tables(like:)
|
434
445
|
query = "SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname = $1 AND tablename LIKE $2"
|
435
|
-
execute(query, [
|
446
|
+
execute(query, [schema, like]).map { |r| r["tablename"] }.sort
|
436
447
|
end
|
437
448
|
|
438
449
|
def table_exists?(table)
|
@@ -440,7 +451,7 @@ INSERT INTO #{dest_table} (#{fields})
|
|
440
451
|
end
|
441
452
|
|
442
453
|
def columns(table)
|
443
|
-
execute("SELECT column_name FROM information_schema.columns WHERE table_schema =
|
454
|
+
execute("SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2", [schema, table]).map{ |r| r["column_name"] }
|
444
455
|
end
|
445
456
|
|
446
457
|
# http://stackoverflow.com/a/20537829
|
@@ -460,7 +471,7 @@ INSERT INTO #{dest_table} (#{fields})
|
|
460
471
|
pg_attribute.attnum = any(pg_index.indkey) AND
|
461
472
|
indisprimary
|
462
473
|
SQL
|
463
|
-
row = execute(query, [
|
474
|
+
row = execute(query, [schema, table])[0]
|
464
475
|
row && row["attname"]
|
465
476
|
end
|
466
477
|
|
@@ -473,10 +484,11 @@ INSERT INTO #{dest_table} (#{fields})
|
|
473
484
|
execute(query)[0]["max"].to_i
|
474
485
|
end
|
475
486
|
|
476
|
-
def min_id(table, primary_key, column, starting_time, where)
|
487
|
+
def min_id(table, primary_key, column, cast, starting_time, where)
|
488
|
+
cast = column_cast(table, column)
|
477
489
|
query = "SELECT MIN(#{primary_key}) FROM #{table}"
|
478
490
|
conditions = []
|
479
|
-
conditions << "#{column} >= #{sql_date(starting_time)}" if starting_time
|
491
|
+
conditions << "#{column} >= #{sql_date(starting_time, cast)}" if starting_time
|
480
492
|
conditions << where if where
|
481
493
|
query << " WHERE #{conditions.join(" AND ")}" if conditions.any?
|
482
494
|
(execute(query)[0]["min"] || 1).to_i
|
@@ -498,10 +510,10 @@ INSERT INTO #{dest_table} (#{fields})
|
|
498
510
|
JOIN pg_attribute a ON (d.refobjid, d.refobjsubid) = (a.attrelid, a.attnum)
|
499
511
|
JOIN pg_namespace n ON n.oid = s.relnamespace
|
500
512
|
WHERE s.relkind = 'S'
|
501
|
-
AND n.nspname =
|
502
|
-
AND t.relname = $
|
513
|
+
AND n.nspname = $1
|
514
|
+
AND t.relname = $2
|
503
515
|
SQL
|
504
|
-
execute(query, [table])
|
516
|
+
execute(query, [schema, table])
|
505
517
|
end
|
506
518
|
|
507
519
|
# helpers
|
@@ -518,8 +530,18 @@ INSERT INTO #{dest_table} (#{fields})
|
|
518
530
|
"#{table}_retired"
|
519
531
|
end
|
520
532
|
|
521
|
-
def
|
522
|
-
"
|
533
|
+
def column_cast(table, column)
|
534
|
+
data_type = execute("SELECT data_type FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2 AND column_name = $3", [schema, table, column])[0]["data_type"]
|
535
|
+
data_type == "timestamp with time zone" ? "timestamptz" : "date"
|
536
|
+
end
|
537
|
+
|
538
|
+
def sql_date(time, cast)
|
539
|
+
if cast == "timestamptz"
|
540
|
+
fmt = "%Y-%m-%d %H:%M:%S UTC"
|
541
|
+
else
|
542
|
+
fmt = "%Y-%m-%d"
|
543
|
+
end
|
544
|
+
"'#{time.strftime(fmt)}'::#{cast}"
|
523
545
|
end
|
524
546
|
|
525
547
|
def name_format(period)
|
@@ -557,7 +579,7 @@ INSERT INTO #{dest_table} (#{fields})
|
|
557
579
|
needs_comment = false
|
558
580
|
comment = execute("SELECT obj_description(oid, 'pg_trigger') AS comment FROM pg_trigger WHERE tgname = $1 AND tgrelid = $2::regclass", [trigger_name, table])[0]
|
559
581
|
if comment
|
560
|
-
field, period = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil]
|
582
|
+
field, period, cast = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil, nil]
|
561
583
|
end
|
562
584
|
|
563
585
|
unless period
|
@@ -571,7 +593,14 @@ INSERT INTO #{dest_table} (#{fields})
|
|
571
593
|
field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
|
572
594
|
end
|
573
595
|
|
574
|
-
|
596
|
+
# backwards compatibility with 0.2.3 and earlier (pre-timestamptz support)
|
597
|
+
unless cast
|
598
|
+
cast = "date"
|
599
|
+
# update comment to explicitly define cast
|
600
|
+
needs_comment = true
|
601
|
+
end
|
602
|
+
|
603
|
+
[period, field, cast, needs_comment]
|
575
604
|
end
|
576
605
|
end
|
577
606
|
end
|
data/lib/pgslice/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgslice
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: slop
|
@@ -76,6 +76,7 @@ extra_rdoc_files: []
|
|
76
76
|
files:
|
77
77
|
- ".gitignore"
|
78
78
|
- CHANGELOG.md
|
79
|
+
- Dockerfile
|
79
80
|
- Gemfile
|
80
81
|
- README.md
|
81
82
|
- Rakefile
|