pgslice 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 27fdf533604b2635d75b2eb92043617f8a53ed7a
4
- data.tar.gz: 5f5579762fdfe759f38a4eb03cce91acb90a7397
3
+ metadata.gz: 63369b304364c5cbc5b6b96e9303d3f46293d0b3
4
+ data.tar.gz: 15e76add4582111650e46daad02bbe755b3e45e1
5
5
  SHA512:
6
- metadata.gz: ac1abb9261c93e8b0bf057b6b16fd1a864f3e2ddd9b85f196df095731fbbbdac0b49e3878b966632c15ce9d7c6774d64231dc03d2e4a42b74fd185ffc177a8b5
7
- data.tar.gz: 9524851474990255e54c008477db65de535fcea6412900ea0c68a801518a79b7124175cde7cddd5e932a6575c76d282047642d7c8f4016e6cce13fd703a90877
6
+ metadata.gz: 64d23629b73f8adf89ce6adc03c2cc951b9b67e7edc8e03eb596546e43b40f1415ea494ccdcab4efb460686ff967941ddf4ba66491525d3b4612490b017b4a0f
7
+ data.tar.gz: 864e600e79f944667c11a151a23341366085b39d1cf81b10f1ef03408c9d6101690ad3697d36369f0151353baa90d7f7d0429b0507c6d738c33396ffb3cefa47
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.1.4
2
+
3
+ - Added sequence ownership
4
+ - Default to 0 for `--past` and `--future` options
5
+ - Better `fill` with `--swapped`
6
+
1
7
  ## 0.1.3
2
8
 
3
9
  - Fixed table inheritance
data/README.md CHANGED
@@ -92,21 +92,19 @@ pgslice unswap <table>
92
92
  $ pgslice prep locations created_at day
93
93
  BEGIN;
94
94
 
95
- CREATE TABLE locations_intermediate (
96
- LIKE locations INCLUDING ALL
97
- );
95
+ CREATE TABLE locations_intermediate (LIKE locations INCLUDING ALL);
98
96
 
99
97
  CREATE FUNCTION locations_insert_trigger()
100
- RETURNS trigger AS $$
101
- BEGIN
102
- EXECUTE 'INSERT INTO public.locations_' || to_char(NEW.created_at, 'YYYYMMDD') || ' VALUES ($1.*)' USING NEW;
103
- RETURN NULL;
104
- END;
105
- $$ LANGUAGE plpgsql;
98
+ RETURNS trigger AS $$
99
+ BEGIN
100
+ EXECUTE 'INSERT INTO locations_' || to_char(NEW.created_at, 'YYYYMMDD') || ' VALUES ($1.*)' USING NEW;
101
+ RETURN NULL;
102
+ END;
103
+ $$ LANGUAGE plpgsql;
106
104
 
107
105
  CREATE TRIGGER locations_insert_trigger
108
- BEFORE INSERT ON locations_intermediate
109
- FOR EACH ROW EXECUTE PROCEDURE locations_insert_trigger();
106
+ BEFORE INSERT ON locations_intermediate
107
+ FOR EACH ROW EXECUTE PROCEDURE locations_insert_trigger();
110
108
 
111
109
  COMMIT;
112
110
  ```
@@ -115,25 +113,25 @@ COMMIT;
115
113
  $ pgslice add_partitions locations --intermediate --past 1 --future 1
116
114
  BEGIN;
117
115
 
118
- CREATE TABLE locations_20160423 (
119
- CHECK (created_at >= '2016-04-23'::date AND created_at < '2016-04-24'::date)
120
- ) INHERITS (locations_intermediate);
116
+ CREATE TABLE locations_20160423
117
+ (CHECK (created_at >= '2016-04-23'::date AND created_at < '2016-04-24'::date))
118
+ INHERITS (locations_intermediate);
121
119
 
122
120
  ALTER TABLE locations_20160423 ADD PRIMARY KEY (id);
123
121
 
124
122
  CREATE INDEX ON locations_20160423 USING btree (updated_at, shopper_id);
125
123
 
126
- CREATE TABLE locations_20160424 (
127
- CHECK (created_at >= '2016-04-24'::date AND created_at < '2016-04-25'::date)
128
- ) INHERITS (locations_intermediate);
124
+ CREATE TABLE locations_20160424
125
+ (CHECK (created_at >= '2016-04-24'::date AND created_at < '2016-04-25'::date))
126
+ INHERITS (locations_intermediate);
129
127
 
130
128
  ALTER TABLE locations_20160424 ADD PRIMARY KEY (id);
131
129
 
132
130
  CREATE INDEX ON locations_20160424 USING btree (updated_at, shopper_id);
133
131
 
134
- CREATE TABLE locations_20160425 (
135
- CHECK (created_at >= '2016-04-25'::date AND created_at < '2016-04-26'::date)
136
- ) INHERITS (locations_intermediate);
132
+ CREATE TABLE locations_20160425
133
+ (CHECK (created_at >= '2016-04-25'::date AND created_at < '2016-04-26'::date))
134
+ INHERITS (locations_intermediate);
137
135
 
138
136
  ALTER TABLE locations_20160425 ADD PRIMARY KEY (id);
139
137
 
@@ -142,6 +140,24 @@ CREATE INDEX ON locations_20160425 USING btree (updated_at, shopper_id);
142
140
  COMMIT;
143
141
  ```
144
142
 
143
+ ```console
144
+ $ pgslice fill locations
145
+ /* 1 of 3 */
146
+ INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
147
+ SELECT id, latitude, longitude, created_at FROM locations
148
+ WHERE id > 0 AND id <= 10000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
149
+
150
+ /* 2 of 3 */
151
+ INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
152
+ SELECT id, latitude, longitude, created_at FROM locations
153
+ WHERE id > 10000 AND id <= 20000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
154
+
155
+ /* 3 of 3 */
156
+ INSERT INTO locations_intermediate (id, latitude, longitude, created_at)
157
+ SELECT id, latitude, longitude, created_at FROM locations
158
+ WHERE id > 20000 AND id <= 30000 AND created_at >= '2016-04-23'::date AND created_at < '2016-04-26'::date
159
+ ```
160
+
145
161
  ```console
146
162
  $ pgslice swap locations
147
163
  BEGIN;
@@ -150,6 +166,23 @@ ALTER TABLE locations RENAME TO locations_retired;
150
166
 
151
167
  ALTER TABLE locations_intermediate RENAME TO locations;
152
168
 
169
+ ALTER SEQUENCE locations_id_seq OWNED BY locations.id;
170
+
171
+ COMMIT;
172
+ ```
173
+
174
+ ```console
175
+ $ pgslice add_partitions locations --future 2
176
+ BEGIN;
177
+
178
+ CREATE TABLE locations_20160426
179
+ (CHECK (created_at >= '2016-04-26'::date AND created_at < '2016-04-27'::date))
180
+ INHERITS (locations);
181
+
182
+ ALTER TABLE locations_20160426 ADD PRIMARY KEY (id);
183
+
184
+ CREATE INDEX ON locations_20160426 USING btree (updated_at, shopper_id);
185
+
153
186
  COMMIT;
154
187
  ```
155
188
 
data/lib/pgslice.rb CHANGED
@@ -15,6 +15,8 @@ module PgSlice
15
15
  }
16
16
 
17
17
  def initialize(args)
18
+ $stdout.sync = true
19
+ $stderr.sync = true
18
20
  parse_args(args)
19
21
  @command = @arguments.shift
20
22
  end
@@ -62,26 +64,24 @@ module PgSlice
62
64
  queries = []
63
65
 
64
66
  queries << <<-SQL
65
- CREATE TABLE #{intermediate_table} (
66
- LIKE #{table} INCLUDING ALL
67
- );
67
+ CREATE TABLE #{intermediate_table} (LIKE #{table} INCLUDING ALL);
68
68
  SQL
69
69
 
70
70
  sql_format = SQL_FORMAT[period.to_sym]
71
71
  queries << <<-SQL
72
72
  CREATE FUNCTION #{trigger_name}()
73
- RETURNS trigger AS $$
74
- BEGIN
75
- EXECUTE 'INSERT INTO public.#{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
76
- RETURN NULL;
77
- END;
78
- $$ LANGUAGE plpgsql;
73
+ RETURNS trigger AS $$
74
+ BEGIN
75
+ EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
76
+ RETURN NULL;
77
+ END;
78
+ $$ LANGUAGE plpgsql;
79
79
  SQL
80
80
 
81
81
  queries << <<-SQL
82
82
  CREATE TRIGGER #{trigger_name}
83
- BEFORE INSERT ON #{intermediate_table}
84
- FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
83
+ BEFORE INSERT ON #{intermediate_table}
84
+ FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
85
85
  SQL
86
86
 
87
87
  run_queries(queries)
@@ -129,15 +129,13 @@ FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
129
129
  partition_name = "#{original_table}_#{day.strftime(name_format)}"
130
130
  next if table_exists?(partition_name)
131
131
 
132
- date_format = "%Y-%m-%d"
133
-
134
132
  queries << <<-SQL
135
- CREATE TABLE #{partition_name} (
136
- CHECK (#{field} >= '#{day.strftime(date_format)}'::date AND #{field} < '#{(day + inc).strftime(date_format)}'::date)
137
- ) INHERITS (#{table});
133
+ CREATE TABLE #{partition_name}
134
+ (CHECK (#{field} >= #{sql_date(day)} AND #{field} < #{sql_date(day + inc)}))
135
+ INHERITS (#{table});
138
136
  SQL
139
137
 
140
- queries << "ALTER TABLE #{partition_name} ADD PRIMARY KEY (#{primary_key});"
138
+ queries << "ALTER TABLE #{partition_name} ADD PRIMARY KEY (#{primary_key});" if primary_key
141
139
 
142
140
  index_defs.each do |index_def|
143
141
  queries << index_def.sub(" ON #{original_table} USING ", " ON #{partition_name} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
@@ -165,34 +163,44 @@ CREATE TABLE #{partition_name} (
165
163
 
166
164
  period, field, name_format, inc, today = settings_from_table(table, dest_table)
167
165
 
168
- date_format = "%Y-%m-%d"
169
- existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }
166
+ existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }.sort
170
167
  starting_time = DateTime.strptime(existing_tables.first.last(8), name_format)
171
168
  ending_time = DateTime.strptime(existing_tables.last.last(8), name_format) + inc
172
169
 
173
170
  primary_key = self.primary_key(table)
174
171
  max_source_id = max_id(source_table, primary_key)
175
- max_dest_id = max_id(dest_table, primary_key)
172
+ max_dest_id =
173
+ if options[:swapped]
174
+ max_id(dest_table, primary_key, below: max_source_id)
175
+ else
176
+ max_id(dest_table, primary_key)
177
+ end
178
+
179
+ if max_dest_id == 0 && !options[:swapped]
180
+ min_source_id = min_id(source_table, primary_key, field, starting_time)
181
+ max_dest_id = min_source_id - 1 if min_source_id
182
+ end
176
183
 
177
- starting_id = max_dest_id + 1
184
+ starting_id = max_dest_id
178
185
  fields = columns(source_table).join(", ")
179
186
  batch_size = options[:batch_size]
180
187
 
181
- log "Overview"
182
- log "#{source_table} max #{primary_key}: #{max_source_id}"
183
- log "#{dest_table} max #{primary_key}: #{max_dest_id}"
184
- log "time period: #{starting_time.to_date} -> #{ending_time.to_date}"
185
- log
186
-
187
- log "Batches"
188
- while starting_id <= max_source_id
189
- log "#{starting_id}..#{[starting_id + batch_size - 1, max_source_id].min}"
188
+ i = 1
189
+ batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
190
+ while starting_id < max_source_id
191
+ query = <<-SQL
192
+ /* #{i} of #{batch_count} */
193
+ INSERT INTO #{dest_table} (#{fields})
194
+ SELECT #{fields} FROM #{source_table}
195
+ WHERE #{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size} AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}
196
+ SQL
190
197
 
191
- query = "INSERT INTO #{dest_table} (#{fields}) SELECT #{fields} FROM #{source_table} WHERE #{primary_key} >= #{starting_id} AND #{primary_key} < #{starting_id + batch_size} AND #{field} >= '#{starting_time.strftime(date_format)}'::date AND #{field} < '#{ending_time.strftime(date_format)}'::date"
192
- log query if options[:debug]
198
+ log_sql(query)
199
+ log_sql
193
200
  execute(query)
194
201
 
195
202
  starting_id += batch_size
203
+ i += 1
196
204
 
197
205
  if options[:sleep] && starting_id <= max_source_id
198
206
  sleep(options[:sleep])
@@ -214,6 +222,11 @@ CREATE TABLE #{partition_name} (
214
222
  "ALTER TABLE #{table} RENAME TO #{retired_table};",
215
223
  "ALTER TABLE #{intermediate_table} RENAME TO #{table};"
216
224
  ]
225
+
226
+ self.sequences(table).each do |sequence|
227
+ queries << "ALTER SEQUENCE #{sequence["sequence_name"]} OWNED BY #{table}.#{sequence["related_column"]};"
228
+ end
229
+
217
230
  run_queries(queries)
218
231
  end
219
232
 
@@ -231,6 +244,11 @@ CREATE TABLE #{partition_name} (
231
244
  "ALTER TABLE #{table} RENAME TO #{intermediate_table};",
232
245
  "ALTER TABLE #{retired_table} RENAME TO #{table};"
233
246
  ]
247
+
248
+ self.sequences(table).each do |sequence|
249
+ queries << "ALTER SEQUENCE #{sequence["sequence_name"]} OWNED BY #{table}.#{sequence["related_column"]};"
250
+ end
251
+
234
252
  run_queries(queries)
235
253
  end
236
254
 
@@ -242,8 +260,8 @@ CREATE TABLE #{partition_name} (
242
260
  o.boolean "--swapped"
243
261
  o.boolean "--debug"
244
262
  o.float "--sleep"
245
- o.integer "--future", default: 3
246
- o.integer "--past", default: 3
263
+ o.integer "--future", default: 0
264
+ o.integer "--past", default: 0
247
265
  o.integer "--batch-size", default: 10000
248
266
  o.boolean "--dry-run", default: false
249
267
  o.on "-v", "--version", "print the version" do
@@ -343,12 +361,37 @@ CREATE TABLE #{partition_name} (
343
361
  row && row["attname"]
344
362
  end
345
363
 
346
- def max_id(table, primary_key)
347
- execute("SELECT MAX(#{primary_key}) FROM #{table}")[0]["max"].to_i
364
+ def max_id(table, primary_key, below: nil)
365
+ query = "SELECT MAX(#{primary_key}) FROM #{table}"
366
+ query << " WHERE #{primary_key} <= #{below}" if below
367
+ execute(query)[0]["max"].to_i
368
+ end
369
+
370
+ def min_id(table, primary_key, column, starting_time)
371
+ query = "SELECT MIN(#{primary_key}) FROM #{table} WHERE #{column} >= #{sql_date(starting_time)}"
372
+ execute(query)[0]["min"].to_i
348
373
  end
349
374
 
350
375
  def has_trigger?(trigger_name, table)
351
- execute("SELECT 1 FROM pg_trigger WHERE tgname = $1 AND tgrelid = $2::regclass", [trigger_name, "public.#{table}"]).any?
376
+ execute("SELECT 1 FROM pg_trigger WHERE tgname = $1 AND tgrelid = $2::regclass", [trigger_name, table]).any?
377
+ end
378
+
379
+ # http://www.dbforums.com/showthread.php?1667561-How-to-list-sequences-and-the-columns-by-SQL
380
+ def sequences(table)
381
+ query = <<-SQL
382
+ SELECT
383
+ a.attname as related_column,
384
+ s.relname as sequence_name
385
+ FROM pg_class s
386
+ JOIN pg_depend d ON d.objid = s.oid
387
+ JOIN pg_class t ON d.objid = s.oid AND d.refobjid = t.oid
388
+ JOIN pg_attribute a ON (d.refobjid, d.refobjsubid) = (a.attrelid, a.attnum)
389
+ JOIN pg_namespace n ON n.oid = s.relnamespace
390
+ WHERE s.relkind = 'S'
391
+ AND n.nspname = 'public'
392
+ AND t.relname = $1
393
+ SQL
394
+ execute(query, [table])
352
395
  end
353
396
 
354
397
  # helpers
@@ -365,6 +408,10 @@ CREATE TABLE #{partition_name} (
365
408
  "#{table}_retired"
366
409
  end
367
410
 
411
+ def sql_date(time)
412
+ "'#{time.strftime("%Y-%m-%d")}'::date"
413
+ end
414
+
368
415
  def settings_from_table(original_table, table)
369
416
  trigger_name = self.trigger_name(original_table)
370
417
  function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]["pg_get_functiondef"]
@@ -1,3 +1,3 @@
1
1
  module PgSlice
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgslice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane