pgslice 0.3.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 07d87eec1238827086898708a5b4e026746a0c11
4
- data.tar.gz: 76024edd24065ec3e24b74d72563583e01cecc6e
3
+ metadata.gz: 9db8d56e7e5ea8f2b5769bea524b2db90731c8a3
4
+ data.tar.gz: 4d05fabf0ded181cc1ab972988d411fb22518168
5
5
  SHA512:
6
- metadata.gz: 9c6042dc4fe0d093c3b38c05d91b445ad1882241f5c3b814f4fe7124ae3609b591ead4d70de39ebf496164e72221ffb409703173689a0daa6f37f884e73dce45
7
- data.tar.gz: b79e389ab3529ad08eadb2e18b6c374d40436007e4fe788e2898888e5caf5592b3839326fee9c4d8d658bbfd284c0c953683171d8789de826b86fa79b7d3354c
6
+ metadata.gz: c698c0b5176fb902460595f5702d37ea612cc60ddc1438a164810023e540e8daf9a9cff0b505ecbc770e9e4f25c568be0daeb91f86f975fa961b42a1dec9ed10
7
+ data.tar.gz: 761b3365b4bb1fe85acfd737ad18fa5e1c7938162dabf20c68d53115dfbbfa429d075d885b5bb6c7b6a3ba71fb38cb84187456c12fb0134ca06a45e08a7c3493
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+ rvm: 2.4.1
3
+ cache: bundler
4
+ sudo: false
5
+ script: bundle exec rake test
6
+ addons:
7
+ postgresql: "9.4"
8
+ before_script:
9
+ - psql -c 'create database pgslice_test;' -U postgres
10
+ notifications:
11
+ email:
12
+ on_success: never
13
+ on_failure: change
@@ -1,3 +1,8 @@
1
+ ## 0.4.0
2
+
3
+ - Added support for declarative partitioning
4
+ - Added support for foreign keys
5
+
1
6
  ## 0.3.6
2
7
 
3
8
  - Fixed drop trigger on `unprep` for non-lowercase tables
data/README.md CHANGED
@@ -4,6 +4,8 @@ Postgres partitioning as easy as pie. Works great for both new and existing tabl
4
4
 
5
5
  :tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
6
6
 
7
+ [![Build Status](https://travis-ci.org/ankane/pgslice.svg?branch=master)](https://travis-ci.org/ankane/pgslice)
8
+
7
9
  ## Install
8
10
 
9
11
  pgslice is a command line tool. To install, run:
@@ -280,7 +282,7 @@ This set up allows you to read and write with the original table name with no kn
280
282
 
281
283
  ### Writes
282
284
 
283
- If you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
285
+ Before Postgres 10, if you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
284
286
 
285
287
  1. Insert directly into the partition
286
288
  2. Get value before the insert with `SELECT nextval('sequence_name')` (for multiple rows, append `FROM generate_series(1, n)`)
@@ -314,6 +316,17 @@ pgslice fill <table> --where "id > 1000" # use any conditions
314
316
  pgslice swap <table>
315
317
  ```
316
318
 
319
+ ## Schema Updates
320
+
321
+ Once a table is partitioned, here’s how to change the schema:
322
+
323
+ - To add, remove, or modify a column, make the update on the master table only
324
+ - To add or remove an index, make the update on the master table and all partitions
325
+
326
+ ## Declarative Partitioning
327
+
328
+ Postgres 10 introduces [declarative partitioning](https://www.postgresql.org/docs/10/static/ddl-partitioning.html#ddl-partitioning-declarative). A major benefit is `INSERT` statements with a `RETURNING` clause work as expected. If you prefer to use trigger-based partitioning instead (not recommended), pass the `--trigger-based` option to the `prep` command.
329
+
317
330
  ## Upgrading
318
331
 
319
332
  Run:
@@ -364,3 +377,13 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
364
377
  - Fix bugs and [submit pull requests](https://github.com/ankane/pgslice/pulls)
365
378
  - Write, clarify, or fix documentation
366
379
  - Suggest or add new features
380
+
381
+ To run tests, do:
382
+
383
+ ```sh
384
+ git clone https://github.com/ankane/pgslice.git
385
+ cd pgslice
386
+ bundle install
387
+ createdb pgslice_test
388
+ bundle exec rake
389
+ ```
@@ -59,6 +59,7 @@ module PgSlice
59
59
 
60
60
  if options[:no_partition]
61
61
  abort "Usage: pgslice prep <table> --no-partition" if arguments.length != 1
62
+ abort "Can't use --trigger-based and --no-partition" if options[:trigger_based]
62
63
  else
63
64
  abort "Usage: pgslice prep <table> <column> <period>" if arguments.length != 3
64
65
  end
@@ -72,11 +73,29 @@ module PgSlice
72
73
 
73
74
  queries = []
74
75
 
75
- queries << <<-SQL
76
+ declarative = server_version_num >= 100000 && !options[:trigger_based]
77
+
78
+ if declarative && !options[:no_partition]
79
+ queries << <<-SQL
80
+ CREATE TABLE #{quote_ident(intermediate_table)} (LIKE #{quote_ident(table)} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS) PARTITION BY RANGE (#{quote_ident(column)});
81
+ SQL
82
+
83
+ # add comment
84
+ cast = column_cast(table, column)
85
+ queries << <<-SQL
86
+ COMMENT ON TABLE #{quote_ident(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
87
+ SQL
88
+ else
89
+ queries << <<-SQL
76
90
  CREATE TABLE #{quote_ident(intermediate_table)} (LIKE #{quote_ident(table)} INCLUDING ALL);
77
- SQL
91
+ SQL
78
92
 
79
- unless options[:no_partition]
93
+ foreign_keys(table).each do |fk_def|
94
+ queries << "ALTER TABLE #{quote_ident(intermediate_table)} ADD #{fk_def};"
95
+ end
96
+ end
97
+
98
+ if !options[:no_partition] && !declarative
80
99
  sql_format = SQL_FORMAT[period.to_sym]
81
100
  queries << <<-SQL
82
101
  CREATE FUNCTION #{quote_ident(trigger_name)}()
@@ -91,12 +110,12 @@ CREATE FUNCTION #{quote_ident(trigger_name)}()
91
110
  CREATE TRIGGER #{quote_ident(trigger_name)}
92
111
  BEFORE INSERT ON #{quote_ident(intermediate_table)}
93
112
  FOR EACH ROW EXECUTE PROCEDURE #{quote_ident(trigger_name)}();
94
- SQL
113
+ SQL
95
114
 
96
115
  cast = column_cast(table, column)
97
116
  queries << <<-SQL
98
117
  COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_ident(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
99
- SQL
118
+ SQL
100
119
  end
101
120
 
102
121
  run_queries(queries)
@@ -129,23 +148,34 @@ SQL
129
148
  past = options[:past]
130
149
  range = (-1 * past)..future
131
150
 
132
- # ensure table has trigger
133
- abort "No trigger on table: #{table}\nDid you mean to use --intermediate?" unless has_trigger?(trigger_name, table)
134
-
135
- index_defs = execute("SELECT pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = #{regclass(schema, original_table)} AND indisprimary = 'f'").map { |r| r["pg_get_indexdef"] }
136
- primary_key = self.primary_key(table)
151
+ period, field, cast, needs_comment, declarative = settings_from_trigger(original_table, table)
152
+ unless period
153
+ message = "No settings found: #{table}"
154
+ message = "#{message}\nDid you mean to use --intermediate?" unless options[:intermediate]
155
+ abort message
156
+ end
137
157
 
138
158
  queries = []
139
159
 
140
- period, field, cast, needs_comment = settings_from_trigger(original_table, table)
141
- abort "Could not read settings" unless period
142
-
143
160
  if needs_comment
144
161
  queries << "COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_ident(table)} is 'column:#{field},period:#{period},cast:#{cast}';"
145
162
  end
146
163
 
147
164
  # today = utc date
148
165
  today = round_date(DateTime.now.new_offset(0).to_date, period)
166
+
167
+ schema_table =
168
+ if !declarative
169
+ table
170
+ elsif options[:intermediate]
171
+ original_table
172
+ else
173
+ "#{original_table}_#{today.strftime(name_format(period))}"
174
+ end
175
+ index_defs = execute("SELECT pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = #{regclass(schema, schema_table)} AND indisprimary = 'f'").map { |r| r["pg_get_indexdef"] }
176
+ fk_defs = foreign_keys(schema_table)
177
+ primary_key = self.primary_key(schema_table)
178
+
149
179
  added_partitions = []
150
180
  range.each do |n|
151
181
  day = advance_date(today, period, n)
@@ -154,48 +184,59 @@ SQL
154
184
  next if table_exists?(partition_name)
155
185
  added_partitions << partition_name
156
186
 
157
- queries << <<-SQL
187
+ if declarative
188
+ queries << <<-SQL
189
+ CREATE TABLE #{quote_ident(partition_name)} PARTITION OF #{quote_ident(table)} FOR VALUES FROM (#{sql_date(day, cast, false)}) TO (#{sql_date(advance_date(day, period, 1), cast, false)});
190
+ SQL
191
+ else
192
+ queries << <<-SQL
158
193
  CREATE TABLE #{quote_ident(partition_name)}
159
194
  (CHECK (#{quote_ident(field)} >= #{sql_date(day, cast)} AND #{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}))
160
195
  INHERITS (#{quote_ident(table)});
161
- SQL
196
+ SQL
197
+ end
162
198
 
163
199
  queries << "ALTER TABLE #{quote_ident(partition_name)} ADD PRIMARY KEY (#{quote_ident(primary_key)});" if primary_key
164
200
 
165
201
  index_defs.each do |index_def|
166
202
  queries << index_def.sub(/ ON \S+ USING /, " ON #{quote_ident(partition_name)} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
167
203
  end
168
- end
169
204
 
170
- # update trigger based on existing partitions
171
- current_defs = []
172
- future_defs = []
173
- past_defs = []
174
- name_format = self.name_format(period)
175
- existing_tables = existing_partitions(original_table)
176
- existing_tables = (existing_tables + added_partitions).uniq.sort
177
-
178
- existing_tables.each do |table|
179
- day = DateTime.strptime(table.split("_").last, name_format)
180
- partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
181
-
182
- sql = "(NEW.#{quote_ident(field)} >= #{sql_date(day, cast)} AND NEW.#{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
183
- INSERT INTO #{quote_ident(partition_name)} VALUES (NEW.*);"
184
-
185
- if day.to_date < today
186
- past_defs << sql
187
- elsif advance_date(day, period, 1) < today
188
- current_defs << sql
189
- else
190
- future_defs << sql
205
+ fk_defs.each do |fk_def|
206
+ queries << "ALTER TABLE #{quote_ident(partition_name)} ADD #{fk_def};"
191
207
  end
192
208
  end
193
209
 
194
- # order by current period, future periods asc, past periods desc
195
- trigger_defs = current_defs + future_defs + past_defs.reverse
210
+ unless declarative
211
+ # update trigger based on existing partitions
212
+ current_defs = []
213
+ future_defs = []
214
+ past_defs = []
215
+ name_format = self.name_format(period)
216
+ existing_tables = existing_partitions(original_table)
217
+ existing_tables = (existing_tables + added_partitions).uniq.sort
218
+
219
+ existing_tables.each do |table|
220
+ day = DateTime.strptime(table.split("_").last, name_format)
221
+ partition_name = "#{original_table}_#{day.strftime(name_format(period))}"
222
+
223
+ sql = "(NEW.#{quote_ident(field)} >= #{sql_date(day, cast)} AND NEW.#{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
224
+ INSERT INTO #{quote_ident(partition_name)} VALUES (NEW.*);"
225
+
226
+ if day.to_date < today
227
+ past_defs << sql
228
+ elsif advance_date(day, period, 1) < today
229
+ current_defs << sql
230
+ else
231
+ future_defs << sql
232
+ end
233
+ end
234
+
235
+ # order by current period, future periods asc, past periods desc
236
+ trigger_defs = current_defs + future_defs + past_defs.reverse
196
237
 
197
- if trigger_defs.any?
198
- queries << <<-SQL
238
+ if trigger_defs.any?
239
+ queries << <<-SQL
199
240
  CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
200
241
  RETURNS trigger AS $$
201
242
  BEGIN
@@ -206,7 +247,8 @@ CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
206
247
  RETURN NULL;
207
248
  END;
208
249
  $$ LANGUAGE plpgsql;
209
- SQL
250
+ SQL
251
+ end
210
252
  end
211
253
 
212
254
  run_queries(queries) if queries.any?
@@ -231,7 +273,7 @@ CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
231
273
  abort "Table not found: #{source_table}" unless table_exists?(source_table)
232
274
  abort "Table not found: #{dest_table}" unless table_exists?(dest_table)
233
275
 
234
- period, field, cast, needs_comment = settings_from_trigger(table, dest_table)
276
+ period, field, cast, needs_comment, declarative = settings_from_trigger(table, dest_table)
235
277
 
236
278
  if period
237
279
  name_format = self.name_format(period)
@@ -243,7 +285,8 @@ CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}()
243
285
  end
244
286
  end
245
287
 
246
- primary_key = self.primary_key(table)
288
+ schema_table = period && declarative ? existing_tables.last : table
289
+ primary_key = self.primary_key(schema_table)
247
290
  abort "No primary key" unless primary_key
248
291
  max_source_id = max_id(source_table, primary_key)
249
292
 
@@ -368,6 +411,7 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
368
411
  o.integer "--batch-size", default: 10000
369
412
  o.boolean "--dry-run", default: false
370
413
  o.boolean "--no-partition", default: false
414
+ o.boolean "--trigger-based", default: false
371
415
  o.integer "--start"
372
416
  o.string "--url"
373
417
  o.string "--source-table"
@@ -524,6 +568,10 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
524
568
  !fetch_trigger(trigger_name, table).nil?
525
569
  end
526
570
 
571
+ def fetch_comment(table)
572
+ execute("SELECT obj_description(#{regclass(schema, table)}) AS comment")[0]
573
+ end
574
+
527
575
  # http://www.dbforums.com/showthread.php?1667561-How-to-list-sequences-and-the-columns-by-SQL
528
576
  def sequences(table)
529
577
  query = <<-SQL
@@ -561,13 +609,14 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
561
609
  data_type == "timestamp with time zone" ? "timestamptz" : "date"
562
610
  end
563
611
 
564
- def sql_date(time, cast)
612
+ def sql_date(time, cast, add_cast = true)
565
613
  if cast == "timestamptz"
566
614
  fmt = "%Y-%m-%d %H:%M:%S UTC"
567
615
  else
568
616
  fmt = "%Y-%m-%d"
569
617
  end
570
- "'#{time.strftime(fmt)}'::#{cast}"
618
+ str = "'#{time.strftime(fmt)}'"
619
+ add_cast ? "#{str}::#{cast}" : str
571
620
  end
572
621
 
573
622
  def name_format(period)
@@ -615,7 +664,8 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
615
664
  trigger_name = self.trigger_name(original_table)
616
665
 
617
666
  needs_comment = false
618
- comment = fetch_trigger(trigger_name, table)
667
+ trigger_comment = fetch_trigger(trigger_name, table)
668
+ comment = trigger_comment || fetch_comment(table)
619
669
  if comment
620
670
  field, period, cast = comment["comment"].split(",").map { |v| v.split(":").last } rescue [nil, nil, nil]
621
671
  end
@@ -623,10 +673,10 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
623
673
  unless period
624
674
  needs_comment = true
625
675
  function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
626
- return [nil, nil] unless function_def
676
+ return [] unless function_def
627
677
  function_def = function_def["pg_get_functiondef"]
628
678
  sql_format = SQL_FORMAT.find { |_, f| function_def.include?("'#{f}'") }
629
- return [nil, nil] unless sql_format
679
+ return [] unless sql_format
630
680
  period = sql_format[0]
631
681
  field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
632
682
  end
@@ -638,7 +688,15 @@ INSERT INTO #{quote_ident(dest_table)} (#{fields})
638
688
  needs_comment = true
639
689
  end
640
690
 
641
- [period, field, cast, needs_comment]
691
+ [period, field, cast, needs_comment, !trigger_comment]
692
+ end
693
+
694
+ def foreign_keys(table)
695
+ execute("SELECT pg_get_constraintdef(oid) FROM pg_constraint WHERE conrelid = #{regclass(schema, table)} AND contype ='f'").map { |r| r["pg_get_constraintdef"] }
696
+ end
697
+
698
+ def server_version_num
699
+ execute("SHOW server_version_num").first["server_version_num"].to_i
642
700
  end
643
701
  end
644
702
  end
@@ -1,3 +1,3 @@
1
1
  module PgSlice
2
- VERSION = "0.3.6"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgslice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-10 00:00:00.000000000 Z
11
+ date: 2017-10-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop
@@ -89,6 +89,7 @@ extensions: []
89
89
  extra_rdoc_files: []
90
90
  files:
91
91
  - ".gitignore"
92
+ - ".travis.yml"
92
93
  - CHANGELOG.md
93
94
  - Dockerfile
94
95
  - Gemfile
@@ -117,7 +118,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
117
118
  version: '0'
118
119
  requirements: []
119
120
  rubyforge_project:
120
- rubygems_version: 2.6.11
121
+ rubygems_version: 2.6.13
121
122
  signing_key:
122
123
  specification_version: 4
123
124
  summary: Postgres partitioning as easy as pie