pgslice 0.4.2 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,577 @@
1
+ module PgSlice
2
+ class Client < Thor
3
+ check_unknown_options!
4
+
5
+ class_option :url
6
+ class_option :dry_run, type: :boolean, default: false
7
+
8
+ map %w[--version -v] => :version
9
+
10
+ def self.exit_on_failure?
11
+ true
12
+ end
13
+
14
+ SQL_FORMAT = {
15
+ day: "YYYYMMDD",
16
+ month: "YYYYMM"
17
+ }
18
+
19
+ def initialize(*args)
20
+ $client = self
21
+ $stdout.sync = true
22
+ $stderr.sync = true
23
+ super
24
+ end
25
+
26
+ desc "prep TABLE [COLUMN] [PERIOD]", "Create an intermediate table for partitioning"
27
+ option :partition, type: :boolean, default: true
28
+ option :trigger_based, type: :boolean, default: false
29
+ def prep(table, column=nil, period=nil)
30
+ table = Table.new(qualify_table(table))
31
+ intermediate_table = table.intermediate_table
32
+ trigger_name = table.trigger_name
33
+
34
+ unless options[:partition]
35
+ abort "Usage: \"pgslice prep TABLE --no-partition\"" if column || period
36
+ abort "Can't use --trigger-based and --no-partition" if options[:trigger_based]
37
+ end
38
+ abort "Table not found: #{table}" unless table.exists?
39
+ abort "Table already exists: #{intermediate_table}" if intermediate_table.exists?
40
+
41
+ if options[:partition]
42
+ abort "Usage: \"pgslice prep TABLE COLUMN PERIOD\"" if !(column && period)
43
+ abort "Column not found: #{column}" unless table.columns.include?(column)
44
+ abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
45
+ end
46
+
47
+ queries = []
48
+
49
+ declarative = server_version_num >= 100000 && !options[:trigger_based]
50
+
51
+ if declarative && options[:partition]
52
+ queries << <<-SQL
53
+ CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS) PARTITION BY RANGE (#{quote_table(column)});
54
+ SQL
55
+
56
+ if server_version_num >= 110000
57
+ index_defs = table.index_defs
58
+ index_defs.each do |index_def|
59
+ queries << index_def.sub(/ ON \S+ USING /, " ON #{quote_table(intermediate_table)} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
60
+ end
61
+ end
62
+
63
+ # add comment
64
+ cast = table.column_cast(column)
65
+ queries << <<-SQL
66
+ COMMENT ON TABLE #{quote_table(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
67
+ SQL
68
+ else
69
+ queries << <<-SQL
70
+ CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING ALL);
71
+ SQL
72
+
73
+ table.foreign_keys.each do |fk_def|
74
+ queries << "ALTER TABLE #{quote_table(intermediate_table)} ADD #{fk_def};"
75
+ end
76
+ end
77
+
78
+ if options[:partition] && !declarative
79
+ queries << <<-SQL
80
+ CREATE FUNCTION #{quote_ident(trigger_name)}()
81
+ RETURNS trigger AS $$
82
+ BEGIN
83
+ RAISE EXCEPTION 'Create partitions first.';
84
+ END;
85
+ $$ LANGUAGE plpgsql;
86
+ SQL
87
+
88
+ queries << <<-SQL
89
+ CREATE TRIGGER #{quote_ident(trigger_name)}
90
+ BEFORE INSERT ON #{quote_table(intermediate_table)}
91
+ FOR EACH ROW EXECUTE PROCEDURE #{quote_ident(trigger_name)}();
92
+ SQL
93
+
94
+ cast = table.column_cast(column)
95
+ queries << <<-SQL
96
+ COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_table(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
97
+ SQL
98
+ end
99
+
100
+ run_queries(queries)
101
+ end
102
+
103
+ desc "unprep TABLE", "Undo prep"
104
+ def unprep(table)
105
+ table = Table.new(qualify_table(table))
106
+ intermediate_table = table.intermediate_table
107
+ trigger_name = table.trigger_name
108
+
109
+ abort "Table not found: #{intermediate_table}" unless intermediate_table.exists?
110
+
111
+ queries = [
112
+ "DROP TABLE #{quote_table(intermediate_table)} CASCADE;",
113
+ "DROP FUNCTION IF EXISTS #{quote_ident(trigger_name)}();"
114
+ ]
115
+ run_queries(queries)
116
+ end
117
+
118
+ desc "add_partitions TABLE", "Add partitions"
119
+ option :intermediate, type: :boolean, default: false
120
+ option :past, type: :numeric, default: 0
121
+ option :future, type: :numeric, default: 0
122
+ def add_partitions(table)
123
+ original_table = Table.new(qualify_table(table))
124
+ table = options[:intermediate] ? original_table.intermediate_table : original_table
125
+ trigger_name = original_table.trigger_name
126
+
127
+ abort "Table not found: #{table}" unless table.exists?
128
+
129
+ future = options[:future]
130
+ past = options[:past]
131
+ range = (-1 * past)..future
132
+
133
+ period, field, cast, needs_comment, declarative = settings_from_trigger(original_table, table)
134
+ unless period
135
+ message = "No settings found: #{table}"
136
+ message = "#{message}\nDid you mean to use --intermediate?" unless options[:intermediate]
137
+ abort message
138
+ end
139
+
140
+ queries = []
141
+
142
+ if needs_comment
143
+ queries << "COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_table(table)} is 'column:#{field},period:#{period},cast:#{cast}';"
144
+ end
145
+
146
+ # today = utc date
147
+ today = round_date(DateTime.now.new_offset(0).to_date, period)
148
+
149
+ schema_table =
150
+ if !declarative
151
+ table
152
+ elsif options[:intermediate]
153
+ original_table
154
+ else
155
+ Table.new(original_table.existing_partitions(period).last)
156
+ end
157
+
158
+ # indexes automatically propagate in Postgres 11+
159
+ index_defs =
160
+ if !declarative || server_version_num < 110000
161
+ schema_table.index_defs
162
+ else
163
+ []
164
+ end
165
+
166
+ fk_defs = schema_table.foreign_keys
167
+ primary_key = schema_table.primary_key
168
+
169
+ added_partitions = []
170
+ range.each do |n|
171
+ day = advance_date(today, period, n)
172
+
173
+ partition_name = Table.new("#{original_table}_#{day.strftime(name_format(period))}")
174
+ next if partition_name.exists?
175
+ added_partitions << partition_name.to_s
176
+
177
+ if declarative
178
+ queries << <<-SQL
179
+ CREATE TABLE #{quote_table(partition_name)} PARTITION OF #{quote_table(table)} FOR VALUES FROM (#{sql_date(day, cast, false)}) TO (#{sql_date(advance_date(day, period, 1), cast, false)});
180
+ SQL
181
+ else
182
+ queries << <<-SQL
183
+ CREATE TABLE #{quote_table(partition_name)}
184
+ (CHECK (#{quote_ident(field)} >= #{sql_date(day, cast)} AND #{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}))
185
+ INHERITS (#{quote_table(table)});
186
+ SQL
187
+ end
188
+
189
+ queries << "ALTER TABLE #{quote_table(partition_name)} ADD PRIMARY KEY (#{primary_key.map { |k| quote_ident(k) }.join(", ")});" if primary_key.any?
190
+
191
+ index_defs.each do |index_def|
192
+ queries << index_def.sub(/ ON \S+ USING /, " ON #{quote_table(partition_name)} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
193
+ end
194
+
195
+ fk_defs.each do |fk_def|
196
+ queries << "ALTER TABLE #{quote_table(partition_name)} ADD #{fk_def};"
197
+ end
198
+ end
199
+
200
+ unless declarative
201
+ # update trigger based on existing partitions
202
+ current_defs = []
203
+ future_defs = []
204
+ past_defs = []
205
+ name_format = self.name_format(period)
206
+ existing_tables = original_table.existing_partitions(period)
207
+ existing_tables = (existing_tables + added_partitions).uniq.sort
208
+
209
+ existing_tables.each do |existing_table|
210
+ day = DateTime.strptime(existing_table.split("_").last, name_format)
211
+ partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
212
+
213
+ sql = "(NEW.#{quote_ident(field)} >= #{sql_date(day, cast)} AND NEW.#{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
214
+ INSERT INTO #{quote_table(partition_name)} VALUES (NEW.*);"
215
+
216
+ if day.to_date < today
217
+ past_defs << sql
218
+ elsif advance_date(day, period, 1) < today
219
+ current_defs << sql
220
+ else
221
+ future_defs << sql
222
+ end
223
+ end
224
+
225
+ # order by current period, future periods asc, past periods desc
226
+ trigger_defs = current_defs + future_defs + past_defs.reverse
227
+
228
+ if trigger_defs.any?
229
+ queries << <<-SQL
230
+ CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
231
+ RETURNS trigger AS $$
232
+ BEGIN
233
+ IF #{trigger_defs.join("\n ELSIF ")}
234
+ ELSE
235
+ RAISE EXCEPTION 'Date out of range. Ensure partitions are created.';
236
+ END IF;
237
+ RETURN NULL;
238
+ END;
239
+ $$ LANGUAGE plpgsql;
240
+ SQL
241
+ end
242
+ end
243
+
244
+ run_queries(queries) if queries.any?
245
+ end
246
+
247
+ desc "fill TABLE", "Fill the partitions in batches"
248
+ option :batch_size, type: :numeric, default: 10000
249
+ option :swapped, type: :boolean, default: false
250
+ option :source_table
251
+ option :dest_table
252
+ option :start
253
+ option :where
254
+ option :sleep, type: :numeric
255
+ def fill(table)
256
+ table = Table.new(qualify_table(table))
257
+ source_table = Table.new(options[:source_table]) if options[:source_table]
258
+ dest_table = Table.new(options[:dest_table]) if options[:dest_table]
259
+
260
+ if options[:swapped]
261
+ source_table ||= table.retired_table
262
+ dest_table ||= table
263
+ else
264
+ source_table ||= table
265
+ dest_table ||= table.intermediate_table
266
+ end
267
+
268
+ abort "Table not found: #{source_table}" unless source_table.exists?
269
+ abort "Table not found: #{dest_table}" unless dest_table.exists?
270
+
271
+ period, field, cast, _needs_comment, declarative = settings_from_trigger(table, dest_table)
272
+
273
+ if period
274
+ name_format = self.name_format(period)
275
+
276
+ existing_tables = table.existing_partitions(period)
277
+ if existing_tables.any?
278
+ starting_time = DateTime.strptime(existing_tables.first.split("_").last, name_format)
279
+ ending_time = advance_date(DateTime.strptime(existing_tables.last.split("_").last, name_format), period, 1)
280
+ end
281
+ end
282
+
283
+ schema_table = period && declarative ? Table.new(existing_tables.last) : table
284
+
285
+ primary_key = schema_table.primary_key[0]
286
+ abort "No primary key" unless primary_key
287
+
288
+ max_source_id = nil
289
+ begin
290
+ max_source_id = source_table.max_id(primary_key)
291
+ rescue PG::UndefinedFunction
292
+ abort "Only numeric primary keys are supported"
293
+ end
294
+
295
+ max_dest_id =
296
+ if options[:start]
297
+ options[:start]
298
+ elsif options[:swapped]
299
+ dest_table.max_id(primary_key, where: options[:where], below: max_source_id)
300
+ else
301
+ dest_table.max_id(primary_key, where: options[:where])
302
+ end
303
+
304
+ if max_dest_id == 0 && !options[:swapped]
305
+ min_source_id = source_table.min_id(primary_key, field, cast, starting_time, options[:where])
306
+ max_dest_id = min_source_id - 1 if min_source_id
307
+ end
308
+
309
+ starting_id = max_dest_id
310
+ fields = source_table.columns.map { |c| quote_ident(c) }.join(", ")
311
+ batch_size = options[:batch_size]
312
+
313
+ i = 1
314
+ batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
315
+
316
+ if batch_count == 0
317
+ log_sql "/* nothing to fill */"
318
+ end
319
+
320
+ while starting_id < max_source_id
321
+ where = "#{quote_ident(primary_key)} > #{starting_id} AND #{quote_ident(primary_key)} <= #{starting_id + batch_size}"
322
+ if starting_time
323
+ where << " AND #{quote_ident(field)} >= #{sql_date(starting_time, cast)} AND #{quote_ident(field)} < #{sql_date(ending_time, cast)}"
324
+ end
325
+ if options[:where]
326
+ where << " AND #{options[:where]}"
327
+ end
328
+
329
+ query = <<-SQL
330
+ /* #{i} of #{batch_count} */
331
+ INSERT INTO #{quote_table(dest_table)} (#{fields})
332
+ SELECT #{fields} FROM #{quote_table(source_table)}
333
+ WHERE #{where}
334
+ SQL
335
+
336
+ run_query(query)
337
+
338
+ starting_id += batch_size
339
+ i += 1
340
+
341
+ if options[:sleep] && starting_id <= max_source_id
342
+ sleep(options[:sleep])
343
+ end
344
+ end
345
+ end
346
+
347
+ desc "swap TABLE", "Swap the intermediate table with the original table"
348
+ option :lock_timeout, default: "5s"
349
+ def swap(table)
350
+ table = Table.new(qualify_table(table))
351
+ intermediate_table = table.intermediate_table
352
+ retired_table = table.retired_table
353
+
354
+ abort "Table not found: #{table}" unless table.exists?
355
+ abort "Table not found: #{intermediate_table}" unless intermediate_table.exists?
356
+ abort "Table already exists: #{retired_table}" if retired_table.exists?
357
+
358
+ queries = [
359
+ "ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(retired_table)};",
360
+ "ALTER TABLE #{quote_table(intermediate_table)} RENAME TO #{quote_no_schema(table)};"
361
+ ]
362
+
363
+ table.sequences.each do |sequence|
364
+ queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
365
+ end
366
+
367
+ queries.unshift("SET LOCAL lock_timeout = '#{options[:lock_timeout]}';") if server_version_num >= 90300
368
+
369
+ run_queries(queries)
370
+ end
371
+
372
+ desc "unswap TABLE", "Undo swap"
373
+ def unswap(table)
374
+ table = Table.new(qualify_table(table))
375
+ intermediate_table = table.intermediate_table
376
+ retired_table = table.retired_table
377
+
378
+ abort "Table not found: #{table}" unless table.exists?
379
+ abort "Table not found: #{retired_table}" unless retired_table.exists?
380
+ abort "Table already exists: #{intermediate_table}" if intermediate_table.exists?
381
+
382
+ queries = [
383
+ "ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(intermediate_table)};",
384
+ "ALTER TABLE #{quote_table(retired_table)} RENAME TO #{quote_no_schema(table)};"
385
+ ]
386
+
387
+ table.sequences.each do |sequence|
388
+ queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
389
+ end
390
+
391
+ run_queries(queries)
392
+ end
393
+
394
+ desc "analyze TABLE", "Analyze tables"
395
+ option :swapped, type: :boolean, default: false
396
+ def analyze(table)
397
+ table = Table.new(qualify_table(table))
398
+ parent_table = options[:swapped] ? table : table.intermediate_table
399
+
400
+ existing_tables = table.existing_partitions
401
+ analyze_list = existing_tables + [parent_table]
402
+ run_queries_without_transaction(analyze_list.map { |t| "ANALYZE VERBOSE #{quote_table(t)};" })
403
+ end
404
+
405
+ desc "version", "Show version"
406
+ def version
407
+ log("pgslice #{PgSlice::VERSION}")
408
+ end
409
+
410
+ protected
411
+
412
+ # output
413
+
414
+ def log(message = nil)
415
+ error message
416
+ end
417
+
418
+ def log_sql(message = nil)
419
+ say message
420
+ end
421
+
422
+ def abort(message)
423
+ raise Thor::Error, message
424
+ end
425
+
426
+ # database connection
427
+
428
+ def connection
429
+ @connection ||= begin
430
+ url = options[:url] || ENV["PGSLICE_URL"]
431
+ abort "Set PGSLICE_URL or use the --url option" unless url
432
+
433
+ uri = URI.parse(url)
434
+ params = CGI.parse(uri.query.to_s)
435
+ # remove schema
436
+ @schema = Array(params.delete("schema") || "public")[0]
437
+ uri.query = URI.encode_www_form(params)
438
+
439
+ ENV["PGCONNECT_TIMEOUT"] ||= "1"
440
+ PG::Connection.new(uri.to_s)
441
+ end
442
+ rescue PG::ConnectionBad => e
443
+ abort e.message
444
+ rescue URI::InvalidURIError
445
+ abort "Invalid url"
446
+ end
447
+
448
+ def schema
449
+ connection # ensure called first
450
+ @schema
451
+ end
452
+
453
+ def execute(query, params = [])
454
+ connection.exec_params(query, params).to_a
455
+ end
456
+
457
+ def run_queries(queries)
458
+ connection.transaction do
459
+ execute("SET LOCAL client_min_messages TO warning") unless options[:dry_run]
460
+ log_sql "BEGIN;"
461
+ log_sql
462
+ run_queries_without_transaction(queries)
463
+ log_sql "COMMIT;"
464
+ end
465
+ end
466
+
467
+ def run_query(query)
468
+ log_sql query
469
+ unless options[:dry_run]
470
+ begin
471
+ execute(query)
472
+ rescue PG::ServerError => e
473
+ abort("#{e.class.name}: #{e.message}")
474
+ end
475
+ end
476
+ log_sql
477
+ end
478
+
479
+ def run_queries_without_transaction(queries)
480
+ queries.each do |query|
481
+ run_query(query)
482
+ end
483
+ end
484
+
485
+ def server_version_num
486
+ execute("SHOW server_version_num")[0]["server_version_num"].to_i
487
+ end
488
+
489
+ # helpers
490
+
491
+ def sql_date(time, cast, add_cast = true)
492
+ if cast == "timestamptz"
493
+ fmt = "%Y-%m-%d %H:%M:%S UTC"
494
+ else
495
+ fmt = "%Y-%m-%d"
496
+ end
497
+ str = "'#{time.strftime(fmt)}'"
498
+ add_cast ? "#{str}::#{cast}" : str
499
+ end
500
+
501
+ def name_format(period)
502
+ case period.to_sym
503
+ when :day
504
+ "%Y%m%d"
505
+ else
506
+ "%Y%m"
507
+ end
508
+ end
509
+
510
+ def round_date(date, period)
511
+ date = date.to_date
512
+ case period.to_sym
513
+ when :day
514
+ date
515
+ else
516
+ Date.new(date.year, date.month)
517
+ end
518
+ end
519
+
520
+ def advance_date(date, period, count = 1)
521
+ date = date.to_date
522
+ case period.to_sym
523
+ when :day
524
+ date.next_day(count)
525
+ else
526
+ date.next_month(count)
527
+ end
528
+ end
529
+
530
+ def quote_ident(value)
531
+ PG::Connection.quote_ident(value)
532
+ end
533
+
534
+ def quote_table(table)
535
+ table.to_s.split(".", 2).map { |v| quote_ident(v) }.join(".")
536
+ end
537
+
538
+ def quote_no_schema(table)
539
+ quote_ident(table.to_s.split(".", 2)[-1])
540
+ end
541
+
542
+ def qualify_table(table)
543
+ table.to_s.include?(".") ? table : [schema, table].join(".")
544
+ end
545
+
546
+ def settings_from_trigger(original_table, table)
547
+ trigger_name = original_table.trigger_name
548
+
549
+ needs_comment = false
550
+ trigger_comment = table.fetch_trigger(trigger_name)
551
+ comment = trigger_comment || table.fetch_comment
552
+ if comment
553
+ field, period, cast = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil, nil]
554
+ end
555
+
556
+ unless period
557
+ needs_comment = true
558
+ function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
559
+ return [] unless function_def
560
+ function_def = function_def["pg_get_functiondef"]
561
+ sql_format = SQL_FORMAT.find { |_, f| function_def.include?("'#{f}'") }
562
+ return [] unless sql_format
563
+ period = sql_format[0]
564
+ field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
565
+ end
566
+
567
+ # backwards compatibility with 0.2.3 and earlier (pre-timestamptz support)
568
+ unless cast
569
+ cast = "date"
570
+ # update comment to explicitly define cast
571
+ needs_comment = true
572
+ end
573
+
574
+ [period, field, cast, needs_comment, !trigger_comment]
575
+ end
576
+ end
577
+ end