pgslice 0.3.6 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 07d87eec1238827086898708a5b4e026746a0c11
4
- data.tar.gz: 76024edd24065ec3e24b74d72563583e01cecc6e
3
+ metadata.gz: 9db8d56e7e5ea8f2b5769bea524b2db90731c8a3
4
+ data.tar.gz: 4d05fabf0ded181cc1ab972988d411fb22518168
5
5
  SHA512:
6
- metadata.gz: 9c6042dc4fe0d093c3b38c05d91b445ad1882241f5c3b814f4fe7124ae3609b591ead4d70de39ebf496164e72221ffb409703173689a0daa6f37f884e73dce45
7
- data.tar.gz: b79e389ab3529ad08eadb2e18b6c374d40436007e4fe788e2898888e5caf5592b3839326fee9c4d8d658bbfd284c0c953683171d8789de826b86fa79b7d3354c
6
+ metadata.gz: c698c0b5176fb902460595f5702d37ea612cc60ddc1438a164810023e540e8daf9a9cff0b505ecbc770e9e4f25c568be0daeb91f86f975fa961b42a1dec9ed10
7
+ data.tar.gz: 761b3365b4bb1fe85acfd737ad18fa5e1c7938162dabf20c68d53115dfbbfa429d075d885b5bb6c7b6a3ba71fb38cb84187456c12fb0134ca06a45e08a7c3493
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+ rvm: 2.4.1
3
+ cache: bundler
4
+ sudo: false
5
+ script: bundle exec rake test
6
+ addons:
7
+ postgresql: "9.4"
8
+ before_script:
9
+ - psql -c 'create database pgslice_test;' -U postgres
10
+ notifications:
11
+ email:
12
+ on_success: never
13
+ on_failure: change
@@ -1,3 +1,8 @@
1
+ ## 0.4.0
2
+
3
+ - Added support for declarative partitioning
4
+ - Added support for foreign keys
5
+
1
6
  ## 0.3.6
2
7
 
3
8
  - Fixed drop trigger on `unprep` for non-lowercase tables
data/README.md CHANGED
@@ -4,6 +4,8 @@ Postgres partitioning as easy as pie. Works great for both new and existing tabl
4
4
 
5
5
  :tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
6
6
 
7
+ [![Build Status](https://travis-ci.org/ankane/pgslice.svg?branch=master)](https://travis-ci.org/ankane/pgslice)
8
+
7
9
  ## Install
8
10
 
9
11
  pgslice is a command line tool. To install, run:
@@ -280,7 +282,7 @@ This set up allows you to read and write with the original table name with no kn
280
282
 
281
283
  ### Writes
282
284
 
283
- If you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
285
+ Before Postgres 10, if you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
284
286
 
285
287
  1. Insert directly into the partition
286
288
  2. Get value before the insert with `SELECT nextval('sequence_name')` (for multiple rows, append `FROM generate_series(1, n)`)
@@ -314,6 +316,17 @@ pgslice fill <table> --where "id > 1000" # use any conditions
314
316
  pgslice swap <table>
315
317
  ```
316
318
 
319
+ ## Schema Updates
320
+
321
+ Once a table is partitioned, here’s how to change the schema:
322
+
323
+ - To add, remove, or modify a column, make the update on the master table only
324
+ - To add or remove an index, make the update on the master table and all partitions
325
+
326
+ ## Declarative Partitioning
327
+
328
+ Postgres 10 introduces [declarative partitioning](https://www.postgresql.org/docs/10/static/ddl-partitioning.html#ddl-partitioning-declarative). A major benefit is `INSERT` statements with a `RETURNING` clause work as expected. If you prefer to use trigger-based partitioning instead (not recommended), pass the `--trigger-based` option to the `prep` command.
329
+
317
330
  ## Upgrading
318
331
 
319
332
  Run:
@@ -364,3 +377,13 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
364
377
  - Fix bugs and [submit pull requests](https://github.com/ankane/pgslice/pulls)
365
378
  - Write, clarify, or fix documentation
366
379
  - Suggest or add new features
380
+
381
+ To run tests, do:
382
+
383
+ ```sh
384
+ git clone https://github.com/ankane/pgslice.git
385
+ cd pgslice
386
+ bundle install
387
+ createdb pgslice_test
388
+ bundle exec rake
389
+ ```
@@ -59,6 +59,7 @@ module PgSlice
59
59
 
60
60
  if options[:no_partition]
61
61
  abort "Usage: pgslice prep <table> --no-partition" if arguments.length != 1
62
+ abort "Can't use --trigger-based and --no-partition" if options[:trigger_based]
62
63
  else
63
64
  abort "Usage: pgslice prep <table> <column> <period>" if arguments.length != 3
64
65
  end
@@ -72,11 +73,29 @@ module PgSlice
72
73
 
73
74
  queries = []
74
75
 
75
- queries << <<-SQL
76
+ declarative = server_version_num >= 100000 && !options[:trigger_based]
77
+
78
+ if declarative && !options[:no_partition]
79
+ queries << <<-SQL
80
+ CREATE TABLE #{quote_ident(intermediate_table)} (LIKE #{quote_ident(table)} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS) PARTITION BY RANGE (#{quote_ident(column)});
81
+ SQL
82
+
83
+ # add comment
84
+ cast = column_cast(table, column)
85
+ queries << <<-SQL
86
+ COMMENT ON TABLE #{quote_ident(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
87
+ SQL
88
+ else
89
+ queries << <<-SQL
76
90
  CREATE TABLE #{quote_ident(intermediate_table)} (LIKE #{quote_ident(table)} INCLUDING ALL);
77
- SQL
91
+ SQL
78
92
 
79
- unless options[:no_partition]
93
+ foreign_keys(table).each do |fk_def|
94
+ queries << "ALTER TABLE #{quote_ident(intermediate_table)} ADD #{fk_def};"
95
+ end
96
+ end
97
+
98
+ if !options[:no_partition] && !declarative
80
99
  sql_format = SQL_FORMAT[period.to_sym]
81
100
  queries << <<-SQL
82
101
  CREATE FUNCTION #{quote_ident(trigger_name)}()
@@ -91,12 +110,12 @@ CREATE FUNCTION #{quote_ident(trigger_name)}()
91
110
  CREATE TRIGGER #{quote_ident(trigger_name)}
92
111
  BEFORE INSERT ON #{quote_ident(intermediate_table)}
93
112
  FOR EACH ROW EXECUTE PROCEDURE #{quote_ident(trigger_name)}();
94
- SQL
113
+ SQL
95
114
 
96
115
  cast = column_cast(table, column)
97
116
  queries << <<-SQL
98
117
  COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_ident(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
99
- SQL
118
+ SQL
100
119
  end
101
120
 
102
121
  run_queries(queries)
@@ -129,23 +148,34 @@ SQL
129
148
  past = options[:past]
130
149
  range = (-1 * past)..future
131
150
 
132
- # ensure table has trigger
133
- abort "No trigger on table: #{table}\nDid you mean to use --intermediate?" unless has_trigger?(trigger_name, table)
134
-
135
- index_defs = execute("SELECT pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = #{regclass(schema, original_table)} AND indisprimary = 'f'").map { |r| r["pg_get_indexdef"] }
136
- primary_key = self.primary_key(table)
151
+ period, field, cast, needs_comment, declarative = settings_from_trigger(original_table, table)
152
+ unless period
153
+ message = "No settings found: #{table}"
154
+ message = "#{message}\nDid you mean to use --intermediate?" unless options[:intermediate]
155
+ abort message
156
+ end
137
157
 
138
158
  queries = []
139
159
 
140
- period, field, cast, needs_comment = settings_from_trigger(original_table, table)
141
- abort "Could not read settings" unless period
142
-
143
160
  if needs_comment
144
161
  queries << "COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_ident(table)} is 'column:#{field},period:#{period},cast:#{cast}';"
145
162
  end
146
163
 
147
164
  # today = utc date
148
165
  today = round_date(DateTime.now.new_offset(0).to_date, period)
166
+
167
+ schema_table =
168
+ if !declarative
169
+ table
170
+ elsif options[:intermediate]
171
+ original_table
172
+ else
173
+ "#{original_table}_#{today.strftime(name_format(period))}"
174
+ end
175
+ index_defs = execute("SELECT pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = #{regclass(schema, schema_table)} AND indisprimary = 'f'").map { |r| r["pg_get_indexdef"] }
176
+ fk_defs = foreign_keys(schema_table)
177
+ primary_key = self.primary_key(schema_table)
178
+
149
179
  added_partitions = []
150
180
  range.each do |n|
151
181
  day = advance_date(today, period, n)
@@ -154,48 +184,59 @@ SQL
154
184
  next if table_exists?(partition_name)
155
185
  added_partitions << partition_name
156
186
 
157
- queries << <<-SQL
187
+ if declarative
188
+ queries << <<-SQL
189
+ CREATE TABLE #{quote_ident(partition_name)} PARTITION OF #{quote_ident(table)} FOR VALUES FROM (#{sql_date(day, cast, false)}) TO (#{sql_date(advance_date(day, period, 1), cast, false)});
190
+ SQL
191
+ else
192
+ queries << <<-SQL
158
193
  CREATE TABLE #{quote_ident(partition_name)}
159
194
  (CHECK (#{quote_ident(field)} >= #{sql_date(day, cast)} AND #{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}))
160
195
  INHERITS (#{quote_ident(table)});
161
- SQL
196
+ SQL
197
+ end
162
198
 
163
199
  queries << "ALTER TABLE #{quote_ident(partition_name)} ADD PRIMARY KEY (#{quote_ident(primary_key)});" if primary_key
164
200
 
165
201
  index_defs.each do |index_def|
166
202
  queries << index_def.sub(/ ON \S+ USING /, " ON #{quote_ident(partition_name)} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
167
203
  end
168
- end
169
204
 
170
- # update trigger based on existing partitions
171
- current_defs = []
172
- future_defs = []
173
- past_defs = []
174
- name_format = self.name_format(period)
175
- existing_tables = existing_partitions(original_table)
176
- existing_tables = (existing_tables + added_partitions).uniq.sort
177
-
178
- existing_tables.each do |table|
179
- day = DateTime.strptime(table.split("_").last, name_format)
180
- partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
181
-
182
- sql = "(NEW.#{quote_ident(field)} >= #{sql_date(day, cast)} AND NEW.#{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
183
- INSERT INTO #{quote_ident(partition_name)} VALUES (NEW.*);"
184
-
185
- if day.to_date < today
186
- past_defs << sql
187
- elsif advance_date(day, period, 1) < today
188
- current_defs << sql
189
- else
190
- future_defs << sql
205
+ fk_defs.each do |fk_def|
206
+ queries << "ALTER TABLE #{quote_ident(partition_name)} ADD #{fk_def};"
191
207
  end
192
208
  end
193
209
 
194
- # order by current period, future periods asc, past periods desc
195
- trigger_defs = current_defs + future_defs + past_defs.reverse
210
+ unless declarative
211
+ # update trigger based on existing partitions
212
+ current_defs = []
213
+ future_defs = []
214
+ past_defs = []
215
+ name_format = self.name_format(period)
216
+ existing_tables = existing_partitions(original_table)
217
+ existing_tables = (existing_tables + added_partitions).uniq.sort
218
+
219
+ existing_tables.each do |table|
220
+ day = DateTime.strptime(table.split("_").last, name_format)
221
+ partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
222
+
223
+ sql = "(NEW.#{quote_ident(field)} >= #{sql_date(day, cast)} AND NEW.#{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
224
+ INSERT INTO #{quote_ident(partition_name)} VALUES (NEW.*);"
225
+
226
+ if day.to_date < today
227
+ past_defs << sql
228
+ elsif advance_date(day, period, 1) < today
229
+ current_defs << sql
230
+ else
231
+ future_defs << sql
232
+ end
233
+ end
234
+
235
+ # order by current period, future periods asc, past periods desc
236
+ trigger_defs = current_defs + future_defs + past_defs.reverse
196
237
 
197
- if trigger_defs.any?
198
- queries << <<-SQL
238
+ if trigger_defs.any?
239
+ queries << <<-SQL
199
240
  CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
200
241
  RETURNS trigger AS $$
201
242
  BEGIN
@@ -206,7 +247,8 @@ CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
206
247
  RETURN NULL;
207
248
  END;
208
249
  $$ LANGUAGE plpgsql;
209
- SQL
250
+ SQL
251
+ end
210
252
  end
211
253
 
212
254
  run_queries(queries) if queries.any?
@@ -231,7 +273,7 @@ CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
231
273
  abort "Table not found: #{source_table}" unless table_exists?(source_table)
232
274
  abort "Table not found: #{dest_table}" unless table_exists?(dest_table)
233
275
 
234
- period, field, cast, needs_comment = settings_from_trigger(table, dest_table)
276
+ period, field, cast, needs_comment, declarative = settings_from_trigger(table, dest_table)
235
277
 
236
278
  if period
237
279
  name_format = self.name_format(period)
@@ -243,7 +285,8 @@ CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
243
285
  end
244
286
  end
245
287
 
246
- primary_key = self.primary_key(table)
288
+ schema_table = period && declarative ? existing_tables.last : table
289
+ primary_key = self.primary_key(schema_table)
247
290
  abort "No primary key" unless primary_key
248
291
  max_source_id = max_id(source_table, primary_key)
249
292
 
@@ -368,6 +411,7 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
368
411
  o.integer "--batch-size", default: 10000
369
412
  o.boolean "--dry-run", default: false
370
413
  o.boolean "--no-partition", default: false
414
+ o.boolean "--trigger-based", default: false
371
415
  o.integer "--start"
372
416
  o.string "--url"
373
417
  o.string "--source-table"
@@ -524,6 +568,10 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
524
568
  !fetch_trigger(trigger_name, table).nil?
525
569
  end
526
570
 
571
+ def fetch_comment(table)
572
+ execute("SELECT obj_description(#{regclass(schema, table)}) AS comment")[0]
573
+ end
574
+
527
575
  # http://www.dbforums.com/showthread.php?1667561-How-to-list-sequences-and-the-columns-by-SQL
528
576
  def sequences(table)
529
577
  query = <<-SQL
@@ -561,13 +609,14 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
561
609
  data_type == "timestamp with time zone" ? "timestamptz" : "date"
562
610
  end
563
611
 
564
- def sql_date(time, cast)
612
+ def sql_date(time, cast, add_cast = true)
565
613
  if cast == "timestamptz"
566
614
  fmt = "%Y-%m-%d %H:%M:%S UTC"
567
615
  else
568
616
  fmt = "%Y-%m-%d"
569
617
  end
570
- "'#{time.strftime(fmt)}'::#{cast}"
618
+ str = "'#{time.strftime(fmt)}'"
619
+ add_cast ? "#{str}::#{cast}" : str
571
620
  end
572
621
 
573
622
  def name_format(period)
@@ -615,7 +664,8 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
615
664
  trigger_name = self.trigger_name(original_table)
616
665
 
617
666
  needs_comment = false
618
- comment = fetch_trigger(trigger_name, table)
667
+ trigger_comment = fetch_trigger(trigger_name, table)
668
+ comment = trigger_comment || fetch_comment(table)
619
669
  if comment
620
670
  field, period, cast = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil, nil]
621
671
  end
@@ -623,10 +673,10 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
623
673
  unless period
624
674
  needs_comment = true
625
675
  function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
626
- return [nil, nil] unless function_def
676
+ return [] unless function_def
627
677
  function_def = function_def["pg_get_functiondef"]
628
678
  sql_format = SQL_FORMAT.find { |_, f| function_def.include?("'#{f}'") }
629
- return [nil, nil] unless sql_format
679
+ return [] unless sql_format
630
680
  period = sql_format[0]
631
681
  field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
632
682
  end
@@ -638,7 +688,15 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
638
688
  needs_comment = true
639
689
  end
640
690
 
641
- [period, field, cast, needs_comment]
691
+ [period, field, cast, needs_comment, !trigger_comment]
692
+ end
693
+
694
+ def foreign_keys(table)
695
+ execute("SELECT pg_get_constraintdef(oid) FROM pg_constraint WHERE conrelid = #{regclass(schema, table)} AND contype ='f'").map { |r| r["pg_get_constraintdef"] }
696
+ end
697
+
698
+ def server_version_num
699
+ execute("SHOW server_version_num").first["server_version_num"].to_i
642
700
  end
643
701
  end
644
702
  end
@@ -1,3 +1,3 @@
1
1
  module PgSlice
2
- VERSION = "0.3.6"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgslice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-10 00:00:00.000000000 Z
11
+ date: 2017-10-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop
@@ -89,6 +89,7 @@ extensions: []
89
89
  extra_rdoc_files: []
90
90
  files:
91
91
  - ".gitignore"
92
+ - ".travis.yml"
92
93
  - CHANGELOG.md
93
94
  - Dockerfile
94
95
  - Gemfile
@@ -117,7 +118,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
117
118
  version: '0'
118
119
  requirements: []
119
120
  rubyforge_project:
120
- rubygems_version: 2.6.11
121
+ rubygems_version: 2.6.13
121
122
  signing_key:
122
123
  specification_version: 4
123
124
  summary: Postgres partitioning as easy as pie