pgslice 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4e0437c8314b62daec17ddf072a9f269fa6b7aa1
4
- data.tar.gz: 311105b8057bc43cf9ff75a7a8be0dc62b707c9b
3
+ metadata.gz: 1fbf523156daf638be0cf5de6a77c4c2ed4375ab
4
+ data.tar.gz: fa1839b58db3046ab8e4705a410c7c7e3d99a86a
5
5
  SHA512:
6
- metadata.gz: 4db509fb649e78d0fac104071e58621b54a01bc5b17b53bbd5aa786ee9908e657ed6adaf6e407ffae28cfc81549cdde76512e4a6b9195a18ac421ac31eb21755
7
- data.tar.gz: a43f16632bca4827762396778d3ed23718046f774e584ef65b6a48af8245d195014c4f628a190f9e58c0d534b4bd9b8aaf4dd59d776d4428e9f56afdaacc69ae
6
+ metadata.gz: 697401d7e40ad0db4f7be59d2a0e711b8f1e57c7c3efdf8874d4ed29f915bf5e98bc967e6f6199990a53853796e214f21cfbfe0961bd504defa8143124f0163a
7
+ data.tar.gz: bc09c5daebc448a40aace451e80733a15f5d1ef6443e7f3fb7a5d7d82f3fee444b4e918c9b08dd6f80ec82a956ab5882a11d5997438025e4d394a0fe0b5b89b3
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.3.0
2
+
3
+ - Better query performance for `timestamptz` columns
4
+ - Added support for schemas other than `public`
5
+
1
6
  ## 0.2.3
2
7
 
3
8
  - Added `--dest-table` option to `fill`
data/Dockerfile ADDED
@@ -0,0 +1,3 @@
1
+ FROM ruby:latest
2
+ RUN gem install pgslice
3
+ ENTRYPOINT ["pgslice"]
data/README.md CHANGED
@@ -254,10 +254,17 @@ To undo swap, use:
254
254
  pgslice unswap <table>
255
255
  ```
256
256
 
257
- ## App Changes
257
+ ## App Considerations
258
258
 
259
259
  This set up allows you to read and write with the original table name with no knowledge it’s partitioned. However, there are a few things to be aware of.
260
260
 
261
+ ### Writes
262
+
263
+ If you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
264
+
265
+ 1. Insert directly into the partition
266
+ 2. Get the value after the insert with `SELECT CURRVAL('sequence_name')`
267
+
261
268
  ### Reads
262
269
 
263
270
  When possible, queries should include the column you partition on to limit the number of partitions the database needs to check. For instance, if you partition on `created_at`, try to include it in queries:
@@ -277,13 +284,6 @@ For this to be effective, ensure `constraint_exclusion` is set to `partition` (d
277
284
  SHOW constraint_exclusion;
278
285
  ```
279
286
 
280
- ### Writes
281
-
282
- If you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record back. If you need this, you can either:
283
-
284
- 1. Insert directly into the partition
285
- 2. Get the value after the insert with `SELECT CURRVAL('sequence_name')`
286
-
287
287
  ## One Off Tasks
288
288
 
289
289
  You can also use pgslice to reduce the size of a table without partitioning by creating a new table, filling it with a subset of records, and swapping it in.
@@ -309,6 +309,15 @@ gem install specific_install
309
309
  gem specific_install ankane/pgslice
310
310
  ```
311
311
 
312
+ ## Docker
313
+
314
+ ```sh
315
+ docker build -t pgslice .
316
+ alias pgslice="docker run --rm -e PGSLICE_URL pgslice"
317
+ ```
318
+
319
+ This will give you the `pgslice` command.
320
+
312
321
  ## Reference
313
322
 
314
323
  - [PostgreSQL Manual](https://www.postgresql.org/docs/current/static/ddl-partitioning.html)
data/lib/pgslice.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "pgslice/version"
2
2
  require "slop"
3
3
  require "pg"
4
+ require "cgi"
4
5
 
5
6
  module PgSlice
6
7
  class Error < StandardError; end
@@ -51,6 +52,7 @@ module PgSlice
51
52
 
52
53
  def prep
53
54
  table, column, period = arguments
55
+ cast = column_cast(table, column)
54
56
  intermediate_table = "#{table}_intermediate"
55
57
  trigger_name = self.trigger_name(table)
56
58
 
@@ -91,7 +93,7 @@ CREATE TRIGGER #{trigger_name}
91
93
  SQL
92
94
 
93
95
  queries << <<-SQL
94
- COMMENT ON TRIGGER #{trigger_name} ON #{intermediate_table} is 'column:#{column},period:#{period}';
96
+ COMMENT ON TRIGGER #{trigger_name} ON #{intermediate_table} is 'column:#{column},period:#{period},cast:#{cast}';
95
97
  SQL
96
98
  end
97
99
 
@@ -133,11 +135,11 @@ SQL
133
135
 
134
136
  queries = []
135
137
 
136
- period, field, needs_comment = settings_from_trigger(original_table, table)
138
+ period, field, cast, needs_comment = settings_from_trigger(original_table, table)
137
139
  abort "Could not read settings" unless period
138
140
 
139
141
  if needs_comment
140
- queries << "COMMENT ON TRIGGER #{trigger_name} ON #{table} is 'column:#{field},period:#{period}';"
142
+ queries << "COMMENT ON TRIGGER #{trigger_name} ON #{table} is 'column:#{field},period:#{period},cast:#{cast}';"
141
143
  end
142
144
 
143
145
  # today = utc date
@@ -152,7 +154,7 @@ SQL
152
154
 
153
155
  queries << <<-SQL
154
156
  CREATE TABLE #{partition_name}
155
- (CHECK (#{field} >= #{sql_date(day)} AND #{field} < #{sql_date(advance_date(day, period, 1))}))
157
+ (CHECK (#{field} >= #{sql_date(day, cast)} AND #{field} < #{sql_date(advance_date(day, period, 1), cast)}))
156
158
  INHERITS (#{table});
157
159
  SQL
158
160
 
@@ -175,7 +177,7 @@ CREATE TABLE #{partition_name}
175
177
  day = DateTime.strptime(table.split("_").last, name_format)
176
178
  partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
177
179
 
178
- sql = "(NEW.#{field} >= #{sql_date(day)} AND NEW.#{field} < #{sql_date(advance_date(day, period, 1))}) THEN
180
+ sql = "(NEW.#{field} >= #{sql_date(day, cast)} AND NEW.#{field} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
179
181
  INSERT INTO #{partition_name} VALUES (NEW.*);"
180
182
 
181
183
  if day.to_date < today
@@ -227,7 +229,7 @@ CREATE OR REPLACE FUNCTION #{trigger_name}()
227
229
  abort "Table not found: #{source_table}" unless table_exists?(source_table)
228
230
  abort "Table not found: #{dest_table}" unless table_exists?(dest_table)
229
231
 
230
- period, field, needs_comment = settings_from_trigger(table, dest_table)
232
+ period, field, cast, needs_comment = settings_from_trigger(table, dest_table)
231
233
 
232
234
  if period
233
235
  name_format = self.name_format(period)
@@ -252,7 +254,7 @@ CREATE OR REPLACE FUNCTION #{trigger_name}()
252
254
  if options[:start]
253
255
  max_dest_id = options[:start]
254
256
  else
255
- min_source_id = min_id(source_table, primary_key, field, starting_time, options[:where])
257
+ min_source_id = min_id(source_table, primary_key, field, cast, starting_time, options[:where])
256
258
  max_dest_id = min_source_id - 1 if min_source_id
257
259
  end
258
260
  end
@@ -260,13 +262,14 @@ CREATE OR REPLACE FUNCTION #{trigger_name}()
260
262
  starting_id = max_dest_id
261
263
  fields = columns(source_table).map { |c| PG::Connection.quote_ident(c) }.join(", ")
262
264
  batch_size = options[:batch_size]
265
+ cast = column_cast(table, field)
263
266
 
264
267
  i = 1
265
268
  batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
266
269
  while starting_id < max_source_id
267
270
  where = "#{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size}"
268
271
  if starting_time
269
- where << " AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}"
272
+ where << " AND #{field} >= #{sql_date(starting_time, cast)} AND #{field} < #{sql_date(ending_time, cast)}"
270
273
  end
271
274
  if options[:where]
272
275
  where << " AND #{options[:where]}"
@@ -398,8 +401,16 @@ INSERT INTO #{dest_table} (#{fields})
398
401
  connect_timeout: 1
399
402
  }.reject { |_, value| value.to_s.empty? }
400
403
  config.map { |key, value| config[key] = uri_parser.unescape(value) if value.is_a?(String) }
404
+ @schema = CGI.parse(uri.query.to_s)["schema"][0] || "public"
401
405
  PG::Connection.new(config)
402
406
  end
407
+ rescue PG::ConnectionBad => e
408
+ abort e.message
409
+ end
410
+
411
+ def schema
412
+ connection # ensure called first
413
+ @schema
403
414
  end
404
415
 
405
416
  def execute(query, params = [])
@@ -432,7 +443,7 @@ INSERT INTO #{dest_table} (#{fields})
432
443
 
433
444
  def existing_tables(like:)
434
445
  query = "SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname = $1 AND tablename LIKE $2"
435
- execute(query, ["public", like]).map { |r| r["tablename"] }.sort
446
+ execute(query, [schema, like]).map { |r| r["tablename"] }.sort
436
447
  end
437
448
 
438
449
  def table_exists?(table)
@@ -440,7 +451,7 @@ INSERT INTO #{dest_table} (#{fields})
440
451
  end
441
452
 
442
453
  def columns(table)
443
- execute("SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' AND table_name = $1", [table]).map{ |r| r["column_name"] }
454
+ execute("SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2", [schema, table]).map{ |r| r["column_name"] }
444
455
  end
445
456
 
446
457
  # http://stackoverflow.com/a/20537829
@@ -460,7 +471,7 @@ INSERT INTO #{dest_table} (#{fields})
460
471
  pg_attribute.attnum = any(pg_index.indkey) AND
461
472
  indisprimary
462
473
  SQL
463
- row = execute(query, ["public", table])[0]
474
+ row = execute(query, [schema, table])[0]
464
475
  row && row["attname"]
465
476
  end
466
477
 
@@ -473,10 +484,11 @@ INSERT INTO #{dest_table} (#{fields})
473
484
  execute(query)[0]["max"].to_i
474
485
  end
475
486
 
476
- def min_id(table, primary_key, column, starting_time, where)
487
+ def min_id(table, primary_key, column, cast, starting_time, where)
488
+ cast = column_cast(table, column)
477
489
  query = "SELECT MIN(#{primary_key}) FROM #{table}"
478
490
  conditions = []
479
- conditions << "#{column} >= #{sql_date(starting_time)}" if starting_time
491
+ conditions << "#{column} >= #{sql_date(starting_time, cast)}" if starting_time
480
492
  conditions << where if where
481
493
  query << " WHERE #{conditions.join(" AND ")}" if conditions.any?
482
494
  (execute(query)[0]["min"] || 1).to_i
@@ -498,10 +510,10 @@ INSERT INTO #{dest_table} (#{fields})
498
510
  JOIN pg_attribute a ON (d.refobjid, d.refobjsubid) = (a.attrelid, a.attnum)
499
511
  JOIN pg_namespace n ON n.oid = s.relnamespace
500
512
  WHERE s.relkind = 'S'
501
- AND n.nspname = 'public'
502
- AND t.relname = $1
513
+ AND n.nspname = $1
514
+ AND t.relname = $2
503
515
  SQL
504
- execute(query, [table])
516
+ execute(query, [schema, table])
505
517
  end
506
518
 
507
519
  # helpers
@@ -518,8 +530,18 @@ INSERT INTO #{dest_table} (#{fields})
518
530
  "#{table}_retired"
519
531
  end
520
532
 
521
- def sql_date(time)
522
- "'#{time.strftime("%Y-%m-%d")}'::date"
533
+ def column_cast(table, column)
534
+ data_type = execute("SELECT data_type FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2 AND column_name = $3", [schema, table, column])[0]["data_type"]
535
+ data_type == "timestamp with time zone" ? "timestamptz" : "date"
536
+ end
537
+
538
+ def sql_date(time, cast)
539
+ if cast == "timestamptz"
540
+ fmt = "%Y-%m-%d %H:%M:%S UTC"
541
+ else
542
+ fmt = "%Y-%m-%d"
543
+ end
544
+ "'#{time.strftime(fmt)}'::#{cast}"
523
545
  end
524
546
 
525
547
  def name_format(period)
@@ -557,7 +579,7 @@ INSERT INTO #{dest_table} (#{fields})
557
579
  needs_comment = false
558
580
  comment = execute("SELECT obj_description(oid, 'pg_trigger') AS comment FROM pg_trigger WHERE tgname = $1 AND tgrelid = $2::regclass", [trigger_name, table])[0]
559
581
  if comment
560
- field, period = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil]
582
+ field, period, cast = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil, nil]
561
583
  end
562
584
 
563
585
  unless period
@@ -571,7 +593,14 @@ INSERT INTO #{dest_table} (#{fields})
571
593
  field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
572
594
  end
573
595
 
574
- [period, field, needs_comment]
596
+ # backwards compatibility with 0.2.3 and earlier (pre-timestamptz support)
597
+ unless cast
598
+ cast = "date"
599
+ # update comment to explicitly define cast
600
+ needs_comment = true
601
+ end
602
+
603
+ [period, field, cast, needs_comment]
575
604
  end
576
605
  end
577
606
  end
@@ -1,3 +1,3 @@
1
1
  module PgSlice
2
- VERSION = "0.2.3"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgslice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-10-11 00:00:00.000000000 Z
11
+ date: 2016-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop
@@ -76,6 +76,7 @@ extra_rdoc_files: []
76
76
  files:
77
77
  - ".gitignore"
78
78
  - CHANGELOG.md
79
+ - Dockerfile
79
80
  - Gemfile
80
81
  - README.md
81
82
  - Rakefile