pgslice 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,577 @@
1
+ module PgSlice
2
+ class Client < Thor
3
+ check_unknown_options!
4
+
5
+ class_option :url
6
+ class_option :dry_run, type: :boolean, default: false
7
+
8
+ map %w[--version -v] => :version
9
+
10
+ def self.exit_on_failure?
11
+ true
12
+ end
13
+
14
+ SQL_FORMAT = {
15
+ day: "YYYYMMDD",
16
+ month: "YYYYMM"
17
+ }
18
+
19
+ def initialize(*args)
20
+ $client = self
21
+ $stdout.sync = true
22
+ $stderr.sync = true
23
+ super
24
+ end
25
+
26
+ desc "prep TABLE [COLUMN] [PERIOD]", "Create an intermediate table for partitioning"
27
+ option :partition, type: :boolean, default: true
28
+ option :trigger_based, type: :boolean, default: false
29
+ def prep(table, column=nil, period=nil)
30
+ table = Table.new(qualify_table(table))
31
+ intermediate_table = table.intermediate_table
32
+ trigger_name = table.trigger_name
33
+
34
+ unless options[:partition]
35
+ abort "Usage: \"pgslice prep TABLE --no-partition\"" if column || period
36
+ abort "Can't use --trigger-based and --no-partition" if options[:trigger_based]
37
+ end
38
+ abort "Table not found: #{table}" unless table.exists?
39
+ abort "Table already exists: #{intermediate_table}" if intermediate_table.exists?
40
+
41
+ if options[:partition]
42
+ abort "Usage: \"pgslice prep TABLE COLUMN PERIOD\"" if !(column && period)
43
+ abort "Column not found: #{column}" unless table.columns.include?(column)
44
+ abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
45
+ end
46
+
47
+ queries = []
48
+
49
+ declarative = server_version_num >= 100000 && !options[:trigger_based]
50
+
51
+ if declarative && options[:partition]
52
+ queries << <<-SQL
53
+ CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS) PARTITION BY RANGE (#{quote_table(column)});
54
+ SQL
55
+
56
+ if server_version_num >= 110000
57
+ index_defs = table.index_defs
58
+ index_defs.each do |index_def|
59
+ queries << index_def.sub(/ ON \S+ USING /, " ON #{quote_table(intermediate_table)} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
60
+ end
61
+ end
62
+
63
+ # add comment
64
+ cast = table.column_cast(column)
65
+ queries << <<-SQL
66
+ COMMENT ON TABLE #{quote_table(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
67
+ SQL
68
+ else
69
+ queries << <<-SQL
70
+ CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING ALL);
71
+ SQL
72
+
73
+ table.foreign_keys.each do |fk_def|
74
+ queries << "ALTER TABLE #{quote_table(intermediate_table)} ADD #{fk_def};"
75
+ end
76
+ end
77
+
78
+ if options[:partition] && !declarative
79
+ queries << <<-SQL
80
+ CREATE FUNCTION #{quote_ident(trigger_name)}()
81
+ RETURNS trigger AS $$
82
+ BEGIN
83
+ RAISE EXCEPTION 'Create partitions first.';
84
+ END;
85
+ $$ LANGUAGE plpgsql;
86
+ SQL
87
+
88
+ queries << <<-SQL
89
+ CREATE TRIGGER #{quote_ident(trigger_name)}
90
+ BEFORE INSERT ON #{quote_table(intermediate_table)}
91
+ FOR EACH ROW EXECUTE PROCEDURE #{quote_ident(trigger_name)}();
92
+ SQL
93
+
94
+ cast = table.column_cast(column)
95
+ queries << <<-SQL
96
+ COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_table(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
97
+ SQL
98
+ end
99
+
100
+ run_queries(queries)
101
+ end
102
+
103
+ desc "unprep TABLE", "Undo prep"
104
+ def unprep(table)
105
+ table = Table.new(qualify_table(table))
106
+ intermediate_table = table.intermediate_table
107
+ trigger_name = table.trigger_name
108
+
109
+ abort "Table not found: #{intermediate_table}" unless intermediate_table.exists?
110
+
111
+ queries = [
112
+ "DROP TABLE #{quote_table(intermediate_table)} CASCADE;",
113
+ "DROP FUNCTION IF EXISTS #{quote_ident(trigger_name)}();"
114
+ ]
115
+ run_queries(queries)
116
+ end
117
+
118
+ desc "add_partitions TABLE", "Add partitions"
119
+ option :intermediate, type: :boolean, default: false
120
+ option :past, type: :numeric, default: 0
121
+ option :future, type: :numeric, default: 0
122
+ def add_partitions(table)
123
+ original_table = Table.new(qualify_table(table))
124
+ table = options[:intermediate] ? original_table.intermediate_table : original_table
125
+ trigger_name = original_table.trigger_name
126
+
127
+ abort "Table not found: #{table}" unless table.exists?
128
+
129
+ future = options[:future]
130
+ past = options[:past]
131
+ range = (-1 * past)..future
132
+
133
+ period, field, cast, needs_comment, declarative = settings_from_trigger(original_table, table)
134
+ unless period
135
+ message = "No settings found: #{table}"
136
+ message = "#{message}\nDid you mean to use --intermediate?" unless options[:intermediate]
137
+ abort message
138
+ end
139
+
140
+ queries = []
141
+
142
+ if needs_comment
143
+ queries << "COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_table(table)} is 'column:#{field},period:#{period},cast:#{cast}';"
144
+ end
145
+
146
+ # today = utc date
147
+ today = round_date(DateTime.now.new_offset(0).to_date, period)
148
+
149
+ schema_table =
150
+ if !declarative
151
+ table
152
+ elsif options[:intermediate]
153
+ original_table
154
+ else
155
+ Table.new(original_table.existing_partitions(period).last)
156
+ end
157
+
158
+ # indexes automatically propagate in Postgres 11+
159
+ index_defs =
160
+ if !declarative || server_version_num < 110000
161
+ schema_table.index_defs
162
+ else
163
+ []
164
+ end
165
+
166
+ fk_defs = schema_table.foreign_keys
167
+ primary_key = schema_table.primary_key
168
+
169
+ added_partitions = []
170
+ range.each do |n|
171
+ day = advance_date(today, period, n)
172
+
173
+ partition_name = Table.new("#{original_table}_#{day.strftime(name_format(period))}")
174
+ next if partition_name.exists?
175
+ added_partitions << partition_name.to_s
176
+
177
+ if declarative
178
+ queries << <<-SQL
179
+ CREATE TABLE #{quote_table(partition_name)} PARTITION OF #{quote_table(table)} FOR VALUES FROM (#{sql_date(day, cast, false)}) TO (#{sql_date(advance_date(day, period, 1), cast, false)});
180
+ SQL
181
+ else
182
+ queries << <<-SQL
183
+ CREATE TABLE #{quote_table(partition_name)}
184
+ (CHECK (#{quote_ident(field)} >= #{sql_date(day, cast)} AND #{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}))
185
+ INHERITS (#{quote_table(table)});
186
+ SQL
187
+ end
188
+
189
+ queries << "ALTER TABLE #{quote_table(partition_name)} ADD PRIMARY KEY (#{primary_key.map { |k| quote_ident(k) }.join(", ")});" if primary_key.any?
190
+
191
+ index_defs.each do |index_def|
192
+ queries << index_def.sub(/ ON \S+ USING /, " ON #{quote_table(partition_name)} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
193
+ end
194
+
195
+ fk_defs.each do |fk_def|
196
+ queries << "ALTER TABLE #{quote_table(partition_name)} ADD #{fk_def};"
197
+ end
198
+ end
199
+
200
+ unless declarative
201
+ # update trigger based on existing partitions
202
+ current_defs = []
203
+ future_defs = []
204
+ past_defs = []
205
+ name_format = self.name_format(period)
206
+ existing_tables = original_table.existing_partitions(period)
207
+ existing_tables = (existing_tables + added_partitions).uniq.sort
208
+
209
+ existing_tables.each do |existing_table|
210
+ day = DateTime.strptime(existing_table.split("_").last, name_format)
211
+ partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
212
+
213
+ sql = "(NEW.#{quote_ident(field)} >= #{sql_date(day, cast)} AND NEW.#{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
214
+ INSERT INTO #{quote_table(partition_name)} VALUES (NEW.*);"
215
+
216
+ if day.to_date < today
217
+ past_defs << sql
218
+ elsif advance_date(day, period, 1) < today
219
+ current_defs << sql
220
+ else
221
+ future_defs << sql
222
+ end
223
+ end
224
+
225
+ # order by current period, future periods asc, past periods desc
226
+ trigger_defs = current_defs + future_defs + past_defs.reverse
227
+
228
+ if trigger_defs.any?
229
+ queries << <<-SQL
230
+ CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
231
+ RETURNS trigger AS $$
232
+ BEGIN
233
+ IF #{trigger_defs.join("\n ELSIF ")}
234
+ ELSE
235
+ RAISE EXCEPTION 'Date out of range. Ensure partitions are created.';
236
+ END IF;
237
+ RETURN NULL;
238
+ END;
239
+ $$ LANGUAGE plpgsql;
240
+ SQL
241
+ end
242
+ end
243
+
244
+ run_queries(queries) if queries.any?
245
+ end
246
+
247
+ desc "fill TABLE", "Fill the partitions in batches"
248
+ option :batch_size, type: :numeric, default: 10000
249
+ option :swapped, type: :boolean, default: false
250
+ option :source_table
251
+ option :dest_table
252
+ option :start
253
+ option :where
254
+ option :sleep, type: :numeric
255
+ def fill(table)
256
+ table = Table.new(qualify_table(table))
257
+ source_table = Table.new(options[:source_table]) if options[:source_table]
258
+ dest_table = Table.new(options[:dest_table]) if options[:dest_table]
259
+
260
+ if options[:swapped]
261
+ source_table ||= table.retired_table
262
+ dest_table ||= table
263
+ else
264
+ source_table ||= table
265
+ dest_table ||= table.intermediate_table
266
+ end
267
+
268
+ abort "Table not found: #{source_table}" unless source_table.exists?
269
+ abort "Table not found: #{dest_table}" unless dest_table.exists?
270
+
271
+ period, field, cast, _needs_comment, declarative = settings_from_trigger(table, dest_table)
272
+
273
+ if period
274
+ name_format = self.name_format(period)
275
+
276
+ existing_tables = table.existing_partitions(period)
277
+ if existing_tables.any?
278
+ starting_time = DateTime.strptime(existing_tables.first.split("_").last, name_format)
279
+ ending_time = advance_date(DateTime.strptime(existing_tables.last.split("_").last, name_format), period, 1)
280
+ end
281
+ end
282
+
283
+ schema_table = period && declarative ? Table.new(existing_tables.last) : table
284
+
285
+ primary_key = schema_table.primary_key[0]
286
+ abort "No primary key" unless primary_key
287
+
288
+ max_source_id = nil
289
+ begin
290
+ max_source_id = source_table.max_id(primary_key)
291
+ rescue PG::UndefinedFunction
292
+ abort "Only numeric primary keys are supported"
293
+ end
294
+
295
+ max_dest_id =
296
+ if options[:start]
297
+ options[:start]
298
+ elsif options[:swapped]
299
+ dest_table.max_id(primary_key, where: options[:where], below: max_source_id)
300
+ else
301
+ dest_table.max_id(primary_key, where: options[:where])
302
+ end
303
+
304
+ if max_dest_id == 0 && !options[:swapped]
305
+ min_source_id = source_table.min_id(primary_key, field, cast, starting_time, options[:where])
306
+ max_dest_id = min_source_id - 1 if min_source_id
307
+ end
308
+
309
+ starting_id = max_dest_id
310
+ fields = source_table.columns.map { |c| quote_ident(c) }.join(", ")
311
+ batch_size = options[:batch_size]
312
+
313
+ i = 1
314
+ batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
315
+
316
+ if batch_count == 0
317
+ log_sql "/* nothing to fill */"
318
+ end
319
+
320
+ while starting_id < max_source_id
321
+ where = "#{quote_ident(primary_key)} > #{starting_id} AND #{quote_ident(primary_key)} <= #{starting_id + batch_size}"
322
+ if starting_time
323
+ where << " AND #{quote_ident(field)} >= #{sql_date(starting_time, cast)} AND #{quote_ident(field)} < #{sql_date(ending_time, cast)}"
324
+ end
325
+ if options[:where]
326
+ where << " AND #{options[:where]}"
327
+ end
328
+
329
+ query = <<-SQL
330
+ /* #{i} of #{batch_count} */
331
+ INSERT INTO #{quote_table(dest_table)} (#{fields})
332
+ SELECT #{fields} FROM #{quote_table(source_table)}
333
+ WHERE #{where}
334
+ SQL
335
+
336
+ run_query(query)
337
+
338
+ starting_id += batch_size
339
+ i += 1
340
+
341
+ if options[:sleep] && starting_id <= max_source_id
342
+ sleep(options[:sleep])
343
+ end
344
+ end
345
+ end
346
+
347
+ desc "swap TABLE", "Swap the intermediate table with the original table"
348
+ option :lock_timeout, default: "5s"
349
+ def swap(table)
350
+ table = Table.new(qualify_table(table))
351
+ intermediate_table = table.intermediate_table
352
+ retired_table = table.retired_table
353
+
354
+ abort "Table not found: #{table}" unless table.exists?
355
+ abort "Table not found: #{intermediate_table}" unless intermediate_table.exists?
356
+ abort "Table already exists: #{retired_table}" if retired_table.exists?
357
+
358
+ queries = [
359
+ "ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(retired_table)};",
360
+ "ALTER TABLE #{quote_table(intermediate_table)} RENAME TO #{quote_no_schema(table)};"
361
+ ]
362
+
363
+ table.sequences.each do |sequence|
364
+ queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
365
+ end
366
+
367
+ queries.unshift("SET LOCAL lock_timeout = '#{options[:lock_timeout]}';") if server_version_num >= 90300
368
+
369
+ run_queries(queries)
370
+ end
371
+
372
+ desc "unswap TABLE", "Undo swap"
373
+ def unswap(table)
374
+ table = Table.new(qualify_table(table))
375
+ intermediate_table = table.intermediate_table
376
+ retired_table = table.retired_table
377
+
378
+ abort "Table not found: #{table}" unless table.exists?
379
+ abort "Table not found: #{retired_table}" unless retired_table.exists?
380
+ abort "Table already exists: #{intermediate_table}" if intermediate_table.exists?
381
+
382
+ queries = [
383
+ "ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(intermediate_table)};",
384
+ "ALTER TABLE #{quote_table(retired_table)} RENAME TO #{quote_no_schema(table)};"
385
+ ]
386
+
387
+ table.sequences.each do |sequence|
388
+ queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
389
+ end
390
+
391
+ run_queries(queries)
392
+ end
393
+
394
+ desc "analyze TABLE", "Analyze tables"
395
+ option :swapped, type: :boolean, default: false
396
+ def analyze(table)
397
+ table = Table.new(qualify_table(table))
398
+ parent_table = options[:swapped] ? table : table.intermediate_table
399
+
400
+ existing_tables = table.existing_partitions
401
+ analyze_list = existing_tables + [parent_table]
402
+ run_queries_without_transaction(analyze_list.map { |t| "ANALYZE VERBOSE #{quote_table(t)};" })
403
+ end
404
+
405
+ desc "version", "Show version"
406
+ def version
407
+ log("pgslice #{PgSlice::VERSION}")
408
+ end
409
+
410
+ protected
411
+
412
+ # output
413
+
414
+ def log(message = nil)
415
+ error message
416
+ end
417
+
418
+ def log_sql(message = nil)
419
+ say message
420
+ end
421
+
422
+ def abort(message)
423
+ raise Thor::Error, message
424
+ end
425
+
426
+ # database connection
427
+
428
+ def connection
429
+ @connection ||= begin
430
+ url = options[:url] || ENV["PGSLICE_URL"]
431
+ abort "Set PGSLICE_URL or use the --url option" unless url
432
+
433
+ uri = URI.parse(url)
434
+ params = CGI.parse(uri.query.to_s)
435
+ # remove schema
436
+ @schema = Array(params.delete("schema") || "public")[0]
437
+ uri.query = URI.encode_www_form(params)
438
+
439
+ ENV["PGCONNECT_TIMEOUT"] ||= "1"
440
+ PG::Connection.new(uri.to_s)
441
+ end
442
+ rescue PG::ConnectionBad => e
443
+ abort e.message
444
+ rescue URI::InvalidURIError
445
+ abort "Invalid url"
446
+ end
447
+
448
+ def schema
449
+ connection # ensure called first
450
+ @schema
451
+ end
452
+
453
+ def execute(query, params = [])
454
+ connection.exec_params(query, params).to_a
455
+ end
456
+
457
+ def run_queries(queries)
458
+ connection.transaction do
459
+ execute("SET LOCAL client_min_messages TO warning") unless options[:dry_run]
460
+ log_sql "BEGIN;"
461
+ log_sql
462
+ run_queries_without_transaction(queries)
463
+ log_sql "COMMIT;"
464
+ end
465
+ end
466
+
467
+ def run_query(query)
468
+ log_sql query
469
+ unless options[:dry_run]
470
+ begin
471
+ execute(query)
472
+ rescue PG::ServerError => e
473
+ abort("#{e.class.name}: #{e.message}")
474
+ end
475
+ end
476
+ log_sql
477
+ end
478
+
479
+ def run_queries_without_transaction(queries)
480
+ queries.each do |query|
481
+ run_query(query)
482
+ end
483
+ end
484
+
485
+ def server_version_num
486
+ execute("SHOW server_version_num")[0]["server_version_num"].to_i
487
+ end
488
+
489
+ # helpers
490
+
491
+ def sql_date(time, cast, add_cast = true)
492
+ if cast == "timestamptz"
493
+ fmt = "%Y-%m-%d %H:%M:%S UTC"
494
+ else
495
+ fmt = "%Y-%m-%d"
496
+ end
497
+ str = "'#{time.strftime(fmt)}'"
498
+ add_cast ? "#{str}::#{cast}" : str
499
+ end
500
+
501
+ def name_format(period)
502
+ case period.to_sym
503
+ when :day
504
+ "%Y%m%d"
505
+ else
506
+ "%Y%m"
507
+ end
508
+ end
509
+
510
+ def round_date(date, period)
511
+ date = date.to_date
512
+ case period.to_sym
513
+ when :day
514
+ date
515
+ else
516
+ Date.new(date.year, date.month)
517
+ end
518
+ end
519
+
520
+ def advance_date(date, period, count = 1)
521
+ date = date.to_date
522
+ case period.to_sym
523
+ when :day
524
+ date.next_day(count)
525
+ else
526
+ date.next_month(count)
527
+ end
528
+ end
529
+
530
+ def quote_ident(value)
531
+ PG::Connection.quote_ident(value)
532
+ end
533
+
534
+ def quote_table(table)
535
+ table.to_s.split(".", 2).map { |v| quote_ident(v) }.join(".")
536
+ end
537
+
538
+ def quote_no_schema(table)
539
+ quote_ident(table.to_s.split(".", 2)[-1])
540
+ end
541
+
542
+ def qualify_table(table)
543
+ table.to_s.include?(".") ? table : [schema, table].join(".")
544
+ end
545
+
546
+ def settings_from_trigger(original_table, table)
547
+ trigger_name = original_table.trigger_name
548
+
549
+ needs_comment = false
550
+ trigger_comment = table.fetch_trigger(trigger_name)
551
+ comment = trigger_comment || table.fetch_comment
552
+ if comment
553
+ field, period, cast = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil, nil]
554
+ end
555
+
556
+ unless period
557
+ needs_comment = true
558
+ function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
559
+ return [] unless function_def
560
+ function_def = function_def["pg_get_functiondef"]
561
+ sql_format = SQL_FORMAT.find { |_, f| function_def.include?("'#{f}'") }
562
+ return [] unless sql_format
563
+ period = sql_format[0]
564
+ field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
565
+ end
566
+
567
+ # backwards compatibility with 0.2.3 and earlier (pre-timestamptz support)
568
+ unless cast
569
+ cast = "date"
570
+ # update comment to explicitly define cast
571
+ needs_comment = true
572
+ end
573
+
574
+ [period, field, cast, needs_comment, !trigger_comment]
575
+ end
576
+ end
577
+ end