pgslice 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 27fdf533604b2635d75b2eb92043617f8a53ed7a
4
- data.tar.gz: 5f5579762fdfe759f38a4eb03cce91acb90a7397
3
+ metadata.gz: 63369b304364c5cbc5b6b96e9303d3f46293d0b3
4
+ data.tar.gz: 15e76add4582111650e46daad02bbe755b3e45e1
5
5
  SHA512:
6
- metadata.gz: ac1abb9261c93e8b0bf057b6b16fd1a864f3e2ddd9b85f196df095731fbbbdac0b49e3878b966632c15ce9d7c6774d64231dc03d2e4a42b74fd185ffc177a8b5
7
- data.tar.gz: 9524851474990255e54c008477db65de535fcea6412900ea0c68a801518a79b7124175cde7cddd5e932a6575c76d282047642d7c8f4016e6cce13fd703a90877
6
+ metadata.gz: 64d23629b73f8adf89ce6adc03c2cc951b9b67e7edc8e03eb596546e43b40f1415ea494ccdcab4efb460686ff967941ddf4ba66491525d3b4612490b017b4a0f
7
+ data.tar.gz: 864e600e79f944667c11a151a23341366085b39d1cf81b10f1ef03408c9d6101690ad3697d36369f0151353baa90d7f7d0429b0507c6d738c33396ffb3cefa47
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.1.4
2
+
3
+ - Added sequence ownership
4
+ - Default to 0 for `--past` and `--future` options
5
+ - Better `fill` with `--swapped`
6
+
1
7
  ## 0.1.3
2
8
 
3
9
  - Fixed table inheritance
data/README.md CHANGED
@@ -92,21 +92,19 @@ pgslice unswap <table>
92
92
  $ pgslice prep locations created_at day
93
93
  BEGIN;
94
94
 
95
- CREATE TABLE locations_intermediate (
96
- LIKE locations INCLUDING ALL
97
- );
95
+ CREATE TABLE locations_intermediate (LIKE locations INCLUDING ALL);
98
96
 
99
97
  CREATE FUNCTION locations_insert_trigger()
100
- RETURNS trigger AS $$
101
- BEGIN
102
- EXECUTE 'INSERT INTO public.locations_' || to_char(NEW.created_at, 'YYYYMMDD') || ' VALUES ($1.*)' USING NEW;
103
- RETURN NULL;
104
- END;
105
- $$ LANGUAGE plpgsql;
98
+ RETURNS trigger AS $$
99
+ BEGIN
100
+ EXECUTE 'INSERT INTO locations_' || to_char(NEW.created_at, 'YYYYMMDD') || ' VALUES ($1.*)' USING NEW;
101
+ RETURN NULL;
102
+ END;
103
+ $$ LANGUAGE plpgsql;
106
104
 
107
105
  CREATE TRIGGER locations_insert_trigger
108
- BEFORE INSERT ON locations_intermediate
109
- FOR EACH ROW EXECUTE PROCEDURE locations_insert_trigger();
106
+ BEFORE INSERT ON locations_intermediate
107
+ FOR EACH ROW EXECUTE PROCEDURE locations_insert_trigger();
110
108
 
111
109
  COMMIT;
112
110
  ```
@@ -115,25 +113,25 @@ COMMIT;
115
113
  $ pgslice add_partitions locations --intermediate --past 1 --future 1
116
114
  BEGIN;
117
115
 
118
- CREATE TABLE locations_20160423 (
119
- CHECK (created_at >= '2016-04-23'::date AND created_at < '2016-04-24'::date)
120
- ) INHERITS (locations_intermediate);
116
+ CREATE TABLE locations_20160423
117
+ (CHECK (created_at >= '2016-04-23'::date AND created_at < '2016-04-24'::date))
118
+ INHERITS (locations_intermediate);
121
119
 
122
120
  ALTER TABLE locations_20160423 ADD PRIMARY KEY (id);
123
121
 
124
122
  CREATE INDEX ON locations_20160423 USING btree (updated_at, shopper_id);
125
123
 
126
- CREATE TABLE locations_20160424 (
127
- CHECK (created_at >= '2016-04-24'::date AND created_at < '2016-04-25'::date)
128
- ) INHERITS (locations_intermediate);
124
+ CREATE TABLE locations_20160424
125
+ (CHECK (created_at >= '2016-04-24'::date AND created_at < '2016-04-25'::date))
126
+ INHERITS (locations_intermediate);
129
127
 
130
128
  ALTER TABLE locations_20160424 ADD PRIMARY KEY (id);
131
129
 
132
130
  CREATE INDEX ON locations_20160424 USING btree (updated_at, shopper_id);
133
131
 
134
- CREATE TABLE locations_20160425 (
135
- CHECK (created_at >= '2016-04-25'::date AND created_at < '2016-04-26'::date)
136
- ) INHERITS (locations_intermediate);
132
+ CREATE TABLE locations_20160425
133
+ (CHECK (created_at >= '2016-04-25'::date AND created_at < '2016-04-26'::date))
134
+ INHERITS (locations_intermediate);
137
135
 
138
136
  ALTER TABLE locations_20160425 ADD PRIMARY KEY (id);
139
137
 
@@ -142,6 +140,24 @@ CREATE INDEX ON locations_20160425 USING btree (updated_at, shopper_id);
142
140
  COMMIT;
143
141
  ```
144
142
 
143
+ ```console
144
+ $ pgslice fill locations
145
+ /* 1 of 3 */
146
+ INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
147
+ SELECT id, latitude, longitude, created_at FROM locations
148
+ WHERE id > 0 AND id <= 10000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
149
+
150
+ /* 2 of 3 */
151
+ INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
152
+ SELECT id, latitude, longitude, created_at FROM locations
153
+ WHERE id > 10000 AND id <= 20000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
154
+
155
+ /* 3 of 3 */
156
+ INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
157
+ SELECT id, latitude, longitude, created_at FROM locations
158
+ WHERE id > 20000 AND id <= 30000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
159
+ ```
160
+
145
161
  ```console
146
162
  $ pgslice swap locations
147
163
  BEGIN;
@@ -150,6 +166,23 @@ ALTER TABLE locations RENAME TO locations_retired;
150
166
 
151
167
  ALTER TABLE locations_intermediate RENAME TO locations;
152
168
 
169
+ ALTER SEQUENCE locations_id_seq OWNED BY locations.id;
170
+
171
+ COMMIT;
172
+ ```
173
+
174
+ ```console
175
+ $ pgslice add_partitions locations --future 2
176
+ BEGIN;
177
+
178
+ CREATE TABLE locations_20160426
179
+ (CHECK (created_at >= '2016-04-26'::date AND created_at < '2016-04-27'::date))
180
+ INHERITS (locations);
181
+
182
+ ALTER TABLE locations_20160426 ADD PRIMARY KEY (id);
183
+
184
+ CREATE INDEX ON locations_20160426 USING btree (updated_at, shopper_id);
185
+
153
186
  COMMIT;
154
187
  ```
155
188
 
data/lib/pgslice.rb CHANGED
@@ -15,6 +15,8 @@ module PgSlice
15
15
  }
16
16
 
17
17
  def initialize(args)
18
+ $stdout.sync = true
19
+ $stderr.sync = true
18
20
  parse_args(args)
19
21
  @command = @arguments.shift
20
22
  end
@@ -62,26 +64,24 @@ module PgSlice
62
64
  queries = []
63
65
 
64
66
  queries << <<-SQL
65
- CREATE TABLE #{intermediate_table} (
66
- LIKE #{table} INCLUDING ALL
67
- );
67
+ CREATE TABLE #{intermediate_table} (LIKE #{table} INCLUDING ALL);
68
68
  SQL
69
69
 
70
70
  sql_format = SQL_FORMAT[period.to_sym]
71
71
  queries << <<-SQL
72
72
  CREATE FUNCTION #{trigger_name}()
73
- RETURNS trigger AS $$
74
- BEGIN
75
- EXECUTE 'INSERT INTO public.#{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
76
- RETURN NULL;
77
- END;
78
- $$ LANGUAGE plpgsql;
73
+ RETURNS trigger AS $$
74
+ BEGIN
75
+ EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
76
+ RETURN NULL;
77
+ END;
78
+ $$ LANGUAGE plpgsql;
79
79
  SQL
80
80
 
81
81
  queries << <<-SQL
82
82
  CREATE TRIGGER #{trigger_name}
83
- BEFORE INSERT ON #{intermediate_table}
84
- FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
83
+ BEFORE INSERT ON #{intermediate_table}
84
+ FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
85
85
  SQL
86
86
 
87
87
  run_queries(queries)
@@ -129,15 +129,13 @@ FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
129
129
  partition_name = "#{original_table}_#{day.strftime(name_format)}"
130
130
  next if table_exists?(partition_name)
131
131
 
132
- date_format = "%Y-%m-%d"
133
-
134
132
  queries << <<-SQL
135
- CREATE TABLE #{partition_name} (
136
- CHECK (#{field} >= '#{day.strftime(date_format)}'::date AND #{field} < '#{(day + inc).strftime(date_format)}'::date)
137
- ) INHERITS (#{table});
133
+ CREATE TABLE #{partition_name}
134
+ (CHECK (#{field} >= #{sql_date(day)} AND #{field} < #{sql_date(day + inc)}))
135
+ INHERITS (#{table});
138
136
  SQL
139
137
 
140
- queries << "ALTER TABLE #{partition_name} ADD PRIMARY KEY (#{primary_key});"
138
+ queries << "ALTER TABLE #{partition_name} ADD PRIMARY KEY (#{primary_key});" if primary_key
141
139
 
142
140
  index_defs.each do |index_def|
143
141
  queries << index_def.sub(" ON #{original_table} USING ", " ON #{partition_name} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
@@ -165,34 +163,44 @@ CREATE TABLE #{partition_name} (
165
163
 
166
164
  period, field, name_format, inc, today = settings_from_table(table, dest_table)
167
165
 
168
- date_format = "%Y-%m-%d"
169
- existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }
166
+ existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }.sort
170
167
  starting_time = DateTime.strptime(existing_tables.first.last(8), name_format)
171
168
  ending_time = DateTime.strptime(existing_tables.last.last(8), name_format) + inc
172
169
 
173
170
  primary_key = self.primary_key(table)
174
171
  max_source_id = max_id(source_table, primary_key)
175
- max_dest_id = max_id(dest_table, primary_key)
172
+ max_dest_id =
173
+ if options[:swapped]
174
+ max_id(dest_table, primary_key, below: max_source_id)
175
+ else
176
+ max_id(dest_table, primary_key)
177
+ end
178
+
179
+ if max_dest_id == 0 && !options[:swapped]
180
+ min_source_id = min_id(source_table, primary_key, field, starting_time)
181
+ max_dest_id = min_source_id - 1 if min_source_id
182
+ end
176
183
 
177
- starting_id = max_dest_id + 1
184
+ starting_id = max_dest_id
178
185
  fields = columns(source_table).join(", ")
179
186
  batch_size = options[:batch_size]
180
187
 
181
- log "Overview"
182
- log "#{source_table} max #{primary_key}: #{max_source_id}"
183
- log "#{dest_table} max #{primary_key}: #{max_dest_id}"
184
- log "time period: #{starting_time.to_date} -> #{ending_time.to_date}"
185
- log
186
-
187
- log "Batches"
188
- while starting_id <= max_source_id
189
- log "#{starting_id}..#{[starting_id + batch_size - 1, max_source_id].min}"
188
+ i = 1
189
+ batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
190
+ while starting_id < max_source_id
191
+ query = <<-SQL
192
+ /* #{i} of #{batch_count} */
193
+ INSERT INTO #{dest_table} (#{fields})
194
+ SELECT #{fields} FROM #{source_table}
195
+ WHERE #{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size} AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}
196
+ SQL
190
197
 
191
- query = "INSERT INTO #{dest_table} (#{fields}) SELECT #{fields} FROM #{source_table} WHERE #{primary_key} >= #{starting_id} AND #{primary_key} < #{starting_id + batch_size} AND #{field} >= '#{starting_time.strftime(date_format)}'::date AND #{field} < '#{ending_time.strftime(date_format)}'::date"
192
- log query if options[:debug]
198
+ log_sql(query)
199
+ log_sql
193
200
  execute(query)
194
201
 
195
202
  starting_id += batch_size
203
+ i += 1
196
204
 
197
205
  if options[:sleep] && starting_id <= max_source_id
198
206
  sleep(options[:sleep])
@@ -214,6 +222,11 @@ CREATE TABLE #{partition_name} (
214
222
  "ALTER TABLE #{table} RENAME TO #{retired_table};",
215
223
  "ALTER TABLE #{intermediate_table} RENAME TO #{table};"
216
224
  ]
225
+
226
+ self.sequences(table).each do |sequence|
227
+ queries << "ALTER SEQUENCE #{sequence["sequence_name"]} OWNED BY #{table}.#{sequence["related_column"]};"
228
+ end
229
+
217
230
  run_queries(queries)
218
231
  end
219
232
 
@@ -231,6 +244,11 @@ CREATE TABLE #{partition_name} (
231
244
  "ALTER TABLE #{table} RENAME TO #{intermediate_table};",
232
245
  "ALTER TABLE #{retired_table} RENAME TO #{table};"
233
246
  ]
247
+
248
+ self.sequences(table).each do |sequence|
249
+ queries << "ALTER SEQUENCE #{sequence["sequence_name"]} OWNED BY #{table}.#{sequence["related_column"]};"
250
+ end
251
+
234
252
  run_queries(queries)
235
253
  end
236
254
 
@@ -242,8 +260,8 @@ CREATE TABLE #{partition_name} (
242
260
  o.boolean "--swapped"
243
261
  o.boolean "--debug"
244
262
  o.float "--sleep"
245
- o.integer "--future", default: 3
246
- o.integer "--past", default: 3
263
+ o.integer "--future", default: 0
264
+ o.integer "--past", default: 0
247
265
  o.integer "--batch-size", default: 10000
248
266
  o.boolean "--dry-run", default: false
249
267
  o.on "-v", "--version", "print the version" do
@@ -343,12 +361,37 @@ CREATE TABLE #{partition_name} (
343
361
  row && row["attname"]
344
362
  end
345
363
 
346
- def max_id(table, primary_key)
347
- execute("SELECT MAX(#{primary_key}) FROM #{table}")[0]["max"].to_i
364
+ def max_id(table, primary_key, below: nil)
365
+ query = "SELECT MAX(#{primary_key}) FROM #{table}"
366
+ query << " WHERE #{primary_key} <= #{below}" if below
367
+ execute(query)[0]["max"].to_i
368
+ end
369
+
370
+ def min_id(table, primary_key, column, starting_time)
371
+ query = "SELECT MIN(#{primary_key}) FROM #{table} WHERE #{column} >= #{sql_date(starting_time)}"
372
+ execute(query)[0]["min"].to_i
348
373
  end
349
374
 
350
375
  def has_trigger?(trigger_name, table)
351
- execute("SELECT 1 FROM pg_trigger WHERE tgname = $1 AND tgrelid = $2::regclass", [trigger_name, "public.#{table}"]).any?
376
+ execute("SELECT 1 FROM pg_trigger WHERE tgname = $1 AND tgrelid = $2::regclass", [trigger_name, table]).any?
377
+ end
378
+
379
+ # http://www.dbforums.com/showthread.php?1667561-How-to-list-sequences-and-the-columns-by-SQL
380
+ def sequences(table)
381
+ query = <<-SQL
382
+ SELECT
383
+ a.attname as related_column,
384
+ s.relname as sequence_name
385
+ FROM pg_class s
386
+ JOIN pg_depend d ON d.objid = s.oid
387
+ JOIN pg_class t ON d.objid = s.oid AND d.refobjid = t.oid
388
+ JOIN pg_attribute a ON (d.refobjid, d.refobjsubid) = (a.attrelid, a.attnum)
389
+ JOIN pg_namespace n ON n.oid = s.relnamespace
390
+ WHERE s.relkind = 'S'
391
+ AND n.nspname = 'public'
392
+ AND t.relname = $1
393
+ SQL
394
+ execute(query, [table])
352
395
  end
353
396
 
354
397
  # helpers
@@ -365,6 +408,10 @@ CREATE TABLE #{partition_name} (
365
408
  "#{table}_retired"
366
409
  end
367
410
 
411
+ def sql_date(time)
412
+ "'#{time.strftime("%Y-%m-%d")}'::date"
413
+ end
414
+
368
415
  def settings_from_table(original_table, table)
369
416
  trigger_name = self.trigger_name(original_table)
370
417
  function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]["pg_get_functiondef"]
@@ -1,3 +1,3 @@
1
1
  module PgSlice
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgslice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane