pgslice 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 02a6779afe17f04bc97713d360ac5bbdcdbd68e0
4
- data.tar.gz: e3aac8b61fed50af76c9bc7b4835b6f218a474d1
3
+ metadata.gz: 8b5f6975bd8ac6e9a10d0f9655a0b8a93f5ce1c1
4
+ data.tar.gz: 56f8ba1e389e723fa02aea9a2fdde9c39b64ea32
5
5
  SHA512:
6
- metadata.gz: dde7c9b7330f0edf78297d4a3a61e1d8d253c893f14fbe808083e3d55253fa4e4ffc3351f12ee34053c5fe4c999066b2eb7cdd8f801f594013ea7a477f1985db
7
- data.tar.gz: 6a578d1b2663fe77eedb21dfbf121284ed4a6ed026f4cfc4c4c60b911d6513168b4072bf30ab3fb3cb519227903e57c9bb11aa34400e1e9abb870e527d8a62d7
6
+ metadata.gz: 636dde577a3aaad53bccff51443f7ee82c0b72515af5db714c14c093bc8643a0715ae5fb3915535cc12a4eeed0baba6d988b47b5084b9ef5d3c69fe95d1ff5f6
7
+ data.tar.gz: 254f7d242c475accd162c2aa8143283cb9aae1f402b72ee9f168c4ab11b8538aabdf29fd2439d261f78ad69636b0304f682f575d321e2eecdb3497979f027e0d
@@ -1,3 +1,8 @@
1
+ ## 0.1.6
2
+
3
+ - Added `--no-partition` option to `prep`
4
+ - Added `--url` option
5
+
1
6
  ## 0.1.5
2
7
 
3
8
  - Removed `activesupport` dependency for speed
data/README.md CHANGED
@@ -46,6 +46,8 @@ gem install pgslice
46
46
 
47
47
  Use the `--batch-size` and `--sleep` options to control the speed.
48
48
 
49
+ To sync data across different databases, check out [pgsync](https://github.com/ankane/pgsync).
50
+
49
51
  5. Swap the intermediate table with the original table
50
52
 
51
53
  ```sh
@@ -70,6 +72,30 @@ To add partitions, use:
70
72
  pgslice add_partitions <table> --future 3
71
73
  ```
72
74
 
75
+ Add this as a cron job to create a new partition each day or month.
76
+
77
+ ```
78
+ # day
79
+ 0 0 * * * pgslice add_partitions <table> --future 3 --url ...
80
+
81
+ # month
82
+ 0 0 1 * * pgslice add_partitions <table> --future 3 --url ...
83
+ ```
84
+
85
+ Add a monitor to ensure partitions are being created.
86
+
87
+ ```sql
88
+ SELECT 1 FROM
89
+ pg_catalog.pg_class c
90
+ INNER JOIN
91
+ pg_catalog.pg_namespace n ON n.oid = c.relnamespace
92
+ WHERE
93
+ c.relkind = 'r' AND
94
+ n.nspname = 'public' AND
95
+ c.relname = '<table>_' || to_char(NOW() + INTERVAL '3 days', 'YYYYMMDD')
96
+ -- for months, use to_char(NOW() + INTERVAL '3 months', 'YYYYMM')
97
+ ```
98
+
73
99
  ## Additional Commands
74
100
 
75
101
  To undo prep (which will delete partitions), use:
@@ -186,6 +212,16 @@ CREATE INDEX ON locations_20160426 USING btree (updated_at, shopper_id);
186
212
  COMMIT;
187
213
  ```
188
214
 
215
+ ## One Off Tasks
216
+
217
+ You can also reduce the size of a table without partitioning.
218
+
219
+ ```sh
220
+ pgslice prep <table> --no-partition
221
+ pgslice fill <table> --start 1000 # starting primary key
222
+ pgslice swap <table>
223
+ ```
224
+
189
225
  ## Upgrading
190
226
 
191
227
  Run:
@@ -54,11 +54,18 @@ module PgSlice
54
54
  intermediate_table = "#{table}_intermediate"
55
55
  trigger_name = self.trigger_name(table)
56
56
 
57
- abort "Usage: pgslice prep <table> <column> <period>" if arguments.length != 3
57
+ if options[:no_partition]
58
+ abort "Usage: pgslice prep <table> --no-partition" if arguments.length != 1
59
+ else
60
+ abort "Usage: pgslice prep <table> <column> <period>" if arguments.length != 3
61
+ end
58
62
  abort "Table not found: #{table}" unless table_exists?(table)
59
63
  abort "Table already exists: #{intermediate_table}" if table_exists?(intermediate_table)
60
- abort "Column not found: #{column}" unless columns(table).include?(column)
61
- abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
64
+
65
+ unless options[:no_partition]
66
+ abort "Column not found: #{column}" unless columns(table).include?(column)
67
+ abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
68
+ end
62
69
 
63
70
  queries = []
64
71
 
@@ -66,22 +73,24 @@ module PgSlice
66
73
  CREATE TABLE #{intermediate_table} (LIKE #{table} INCLUDING ALL);
67
74
  SQL
68
75
 
69
- sql_format = SQL_FORMAT[period.to_sym]
70
- queries << <<-SQL
71
- CREATE FUNCTION #{trigger_name}()
72
- RETURNS trigger AS $$
73
- BEGIN
74
- EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
75
- RETURN NULL;
76
- END;
77
- $$ LANGUAGE plpgsql;
78
- SQL
76
+ unless options[:no_partition]
77
+ sql_format = SQL_FORMAT[period.to_sym]
78
+ queries << <<-SQL
79
+ CREATE FUNCTION #{trigger_name}()
80
+ RETURNS trigger AS $$
81
+ BEGIN
82
+ EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
83
+ RETURN NULL;
84
+ END;
85
+ $$ LANGUAGE plpgsql;
86
+ SQL
79
87
 
80
- queries << <<-SQL
81
- CREATE TRIGGER #{trigger_name}
82
- BEFORE INSERT ON #{intermediate_table}
83
- FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
84
- SQL
88
+ queries << <<-SQL
89
+ CREATE TRIGGER #{trigger_name}
90
+ BEFORE INSERT ON #{intermediate_table}
91
+ FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
92
+ SQL
93
+ end
85
94
 
86
95
  run_queries(queries)
87
96
  end
@@ -96,7 +105,7 @@ CREATE TRIGGER #{trigger_name}
96
105
 
97
106
  queries = [
98
107
  "DROP TABLE #{intermediate_table} CASCADE;",
99
- "DROP FUNCTION #{trigger_name}();"
108
+ "DROP FUNCTION IF EXISTS #{trigger_name}();"
100
109
  ]
101
110
  run_queries(queries)
102
111
  end
@@ -122,6 +131,7 @@ CREATE TRIGGER #{trigger_name}
122
131
  queries = []
123
132
 
124
133
  period, field = settings_from_table(original_table, table)
134
+ abort "Could not read settings" unless period
125
135
  today = round_date(Date.today, period)
126
136
  range.each do |n|
127
137
  day = advance_date(today, period, n)
@@ -162,11 +172,14 @@ CREATE TABLE #{partition_name}
162
172
  abort "Table not found: #{dest_table}" unless table_exists?(dest_table)
163
173
 
164
174
  period, field = settings_from_table(table, dest_table)
165
- name_format = self.name_format(period)
166
175
 
167
- existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }.sort
168
- starting_time = DateTime.strptime(existing_tables.first.split("_").last, name_format)
169
- ending_time = advance_date(DateTime.strptime(existing_tables.last.split("_").last, name_format), period, 1)
176
+ if period
177
+ name_format = self.name_format(period)
178
+
179
+ existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }.sort
180
+ starting_time = DateTime.strptime(existing_tables.first.split("_").last, name_format)
181
+ ending_time = advance_date(DateTime.strptime(existing_tables.last.split("_").last, name_format), period, 1)
182
+ end
170
183
 
171
184
  primary_key = self.primary_key(table)
172
185
  max_source_id = max_id(source_table, primary_key)
@@ -178,8 +191,12 @@ CREATE TABLE #{partition_name}
178
191
  end
179
192
 
180
193
  if max_dest_id == 0 && !options[:swapped]
181
- min_source_id = min_id(source_table, primary_key, field, starting_time)
182
- max_dest_id = min_source_id - 1 if min_source_id
194
+ if options[:start]
195
+ max_dest_id = options[:start]
196
+ else
197
+ min_source_id = min_id(source_table, primary_key, field, starting_time)
198
+ max_dest_id = min_source_id - 1 if min_source_id
199
+ end
183
200
  end
184
201
 
185
202
  starting_id = max_dest_id
@@ -189,11 +206,16 @@ CREATE TABLE #{partition_name}
189
206
  i = 1
190
207
  batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
191
208
  while starting_id < max_source_id
209
+ where = "#{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size}"
210
+ if period
211
+ where << " AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}"
212
+ end
213
+
192
214
  query = <<-SQL
193
215
  /* #{i} of #{batch_count} */
194
216
  INSERT INTO #{dest_table} (#{fields})
195
217
  SELECT #{fields} FROM #{source_table}
196
- WHERE #{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size} AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}
218
+ WHERE #{where}
197
219
  SQL
198
220
 
199
221
  log_sql(query)
@@ -265,6 +287,9 @@ INSERT INTO #{dest_table} (#{fields})
265
287
  o.integer "--past", default: 0
266
288
  o.integer "--batch-size", default: 10000
267
289
  o.boolean "--dry-run", default: false
290
+ o.boolean "--no-partition", default: false
291
+ o.integer "--start"
292
+ o.string "--url"
268
293
  o.on "-v", "--version", "print the version" do
269
294
  log PgSlice::VERSION
270
295
  @exit = true
@@ -294,8 +319,9 @@ INSERT INTO #{dest_table} (#{fields})
294
319
 
295
320
  def connection
296
321
  @connection ||= begin
297
- abort "Set PGSLICE_URL" unless ENV["PGSLICE_URL"]
298
- uri = URI.parse(ENV["PGSLICE_URL"])
322
+ url = options[:url] || ENV["PGSLICE_URL"]
323
+ abort "Set PGSLICE_URL or use the --url option" unless url
324
+ uri = URI.parse(url)
299
325
  uri_parser = URI::Parser.new
300
326
  config = {
301
327
  host: uri.host,
@@ -369,8 +395,9 @@ INSERT INTO #{dest_table} (#{fields})
369
395
  end
370
396
 
371
397
  def min_id(table, primary_key, column, starting_time)
372
- query = "SELECT MIN(#{primary_key}) FROM #{table} WHERE #{column} >= #{sql_date(starting_time)}"
373
- execute(query)[0]["min"].to_i
398
+ query = "SELECT MIN(#{primary_key}) FROM #{table}"
399
+ query << " WHERE #{column} >= #{sql_date(starting_time)}" if starting_time
400
+ (execute(query)[0]["min"] || 1).to_i
374
401
  end
375
402
 
376
403
  def has_trigger?(trigger_name, table)
@@ -444,9 +471,11 @@ INSERT INTO #{dest_table} (#{fields})
444
471
 
445
472
  def settings_from_table(original_table, table)
446
473
  trigger_name = self.trigger_name(original_table)
447
- function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]["pg_get_functiondef"]
474
+ function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
475
+ return [nil, nil] unless function_def
476
+ function_def = function_def["pg_get_functiondef"]
448
477
  sql_format = SQL_FORMAT.find { |_, f| function_def.include?("'#{f}'") }
449
- abort "Could not read settings" unless sql_format
478
+ return [nil, nil] unless sql_format
450
479
  period = sql_format[0]
451
480
  field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
452
481
  [period, field]
@@ -1,3 +1,3 @@
1
1
  module PgSlice
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgslice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-04-26 00:00:00.000000000 Z
11
+ date: 2016-08-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop