pgslice 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 02a6779afe17f04bc97713d360ac5bbdcdbd68e0
4
- data.tar.gz: e3aac8b61fed50af76c9bc7b4835b6f218a474d1
3
+ metadata.gz: 8b5f6975bd8ac6e9a10d0f9655a0b8a93f5ce1c1
4
+ data.tar.gz: 56f8ba1e389e723fa02aea9a2fdde9c39b64ea32
5
5
  SHA512:
6
- metadata.gz: dde7c9b7330f0edf78297d4a3a61e1d8d253c893f14fbe808083e3d55253fa4e4ffc3351f12ee34053c5fe4c999066b2eb7cdd8f801f594013ea7a477f1985db
7
- data.tar.gz: 6a578d1b2663fe77eedb21dfbf121284ed4a6ed026f4cfc4c4c60b911d6513168b4072bf30ab3fb3cb519227903e57c9bb11aa34400e1e9abb870e527d8a62d7
6
+ metadata.gz: 636dde577a3aaad53bccff51443f7ee82c0b72515af5db714c14c093bc8643a0715ae5fb3915535cc12a4eeed0baba6d988b47b5084b9ef5d3c69fe95d1ff5f6
7
+ data.tar.gz: 254f7d242c475accd162c2aa8143283cb9aae1f402b72ee9f168c4ab11b8538aabdf29fd2439d261f78ad69636b0304f682f575d321e2eecdb3497979f027e0d
@@ -1,3 +1,8 @@
1
+ ## 0.1.6
2
+
3
+ - Added `--no-partition` option to `prep`
4
+ - Added `--url` option
5
+
1
6
  ## 0.1.5
2
7
 
3
8
  - Removed `activesupport` dependency for speed
data/README.md CHANGED
@@ -46,6 +46,8 @@ gem install pgslice
46
46
 
47
47
  Use the `--batch-size` and `--sleep` options to control the speed.
48
48
 
49
+ To sync data across different databases, check out [pgsync](https://github.com/ankane/pgsync).
50
+
49
51
  5. Swap the intermediate table with the original table
50
52
 
51
53
  ```sh
@@ -70,6 +72,30 @@ To add partitions, use:
70
72
  pgslice add_partitions <table> --future 3
71
73
  ```
72
74
 
75
+ Add this as a cron job to create a new partition each day or month.
76
+
77
+ ```
78
+ # day
79
+ 0 0 * * * pgslice add_partitions <table> --future 3 --url ...
80
+
81
+ # month
82
+ 0 0 1 * * pgslice add_partitions <table> --future 3 --url ...
83
+ ```
84
+
85
+ Add a monitor to ensure partitions are being created.
86
+
87
+ ```sql
88
+ SELECT 1 FROM
89
+ pg_catalog.pg_class c
90
+ INNER JOIN
91
+ pg_catalog.pg_namespace n ON n.oid = c.relnamespace
92
+ WHERE
93
+ c.relkind = 'r' AND
94
+ n.nspname = 'public' AND
95
+ c.relname = '<table>_' || to_char(NOW() + INTERVAL '3 days', 'YYYYMMDD')
96
+ -- for months, use to_char(NOW() + INTERVAL '3 months', 'YYYYMM')
97
+ ```
98
+
73
99
  ## Additional Commands
74
100
 
75
101
  To undo prep (which will delete partitions), use:
@@ -186,6 +212,16 @@ CREATE INDEX ON locations_20160426 USING btree (updated_at, shopper_id);
186
212
  COMMIT;
187
213
  ```
188
214
 
215
+ ## One Off Tasks
216
+
217
+ You can also reduce the size of a table without partitioning.
218
+
219
+ ```sh
220
+ pgslice prep <table> --no-partition
221
+ pgslice fill <table> --start 1000 # starting primary key
222
+ pgslice swap <table>
223
+ ```
224
+
189
225
  ## Upgrading
190
226
 
191
227
  Run:
@@ -54,11 +54,18 @@ module PgSlice
54
54
  intermediate_table = "#{table}_intermediate"
55
55
  trigger_name = self.trigger_name(table)
56
56
 
57
- abort "Usage: pgslice prep <table> <column> <period>" if arguments.length != 3
57
+ if options[:no_partition]
58
+ abort "Usage: pgslice prep <table> --no-partition" if arguments.length != 1
59
+ else
60
+ abort "Usage: pgslice prep <table> <column> <period>" if arguments.length != 3
61
+ end
58
62
  abort "Table not found: #{table}" unless table_exists?(table)
59
63
  abort "Table already exists: #{intermediate_table}" if table_exists?(intermediate_table)
60
- abort "Column not found: #{column}" unless columns(table).include?(column)
61
- abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
64
+
65
+ unless options[:no_partition]
66
+ abort "Column not found: #{column}" unless columns(table).include?(column)
67
+ abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
68
+ end
62
69
 
63
70
  queries = []
64
71
 
@@ -66,22 +73,24 @@ module PgSlice
66
73
  CREATE TABLE #{intermediate_table} (LIKE #{table} INCLUDING ALL);
67
74
  SQL
68
75
 
69
- sql_format = SQL_FORMAT[period.to_sym]
70
- queries << <<-SQL
71
- CREATE FUNCTION #{trigger_name}()
72
- RETURNS trigger AS $$
73
- BEGIN
74
- EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
75
- RETURN NULL;
76
- END;
77
- $$ LANGUAGE plpgsql;
78
- SQL
76
+ unless options[:no_partition]
77
+ sql_format = SQL_FORMAT[period.to_sym]
78
+ queries << <<-SQL
79
+ CREATE FUNCTION #{trigger_name}()
80
+ RETURNS trigger AS $$
81
+ BEGIN
82
+ EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
83
+ RETURN NULL;
84
+ END;
85
+ $$ LANGUAGE plpgsql;
86
+ SQL
79
87
 
80
- queries << <<-SQL
81
- CREATE TRIGGER #{trigger_name}
82
- BEFORE INSERT ON #{intermediate_table}
83
- FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
84
- SQL
88
+ queries << <<-SQL
89
+ CREATE TRIGGER #{trigger_name}
90
+ BEFORE INSERT ON #{intermediate_table}
91
+ FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
92
+ SQL
93
+ end
85
94
 
86
95
  run_queries(queries)
87
96
  end
@@ -96,7 +105,7 @@ CREATE TRIGGER #{trigger_name}
96
105
 
97
106
  queries = [
98
107
  "DROP TABLE #{intermediate_table} CASCADE;",
99
- "DROP FUNCTION #{trigger_name}();"
108
+ "DROP FUNCTION IF EXISTS #{trigger_name}();"
100
109
  ]
101
110
  run_queries(queries)
102
111
  end
@@ -122,6 +131,7 @@ CREATE TRIGGER #{trigger_name}
122
131
  queries = []
123
132
 
124
133
  period, field = settings_from_table(original_table, table)
134
+ abort "Could not read settings" unless period
125
135
  today = round_date(Date.today, period)
126
136
  range.each do |n|
127
137
  day = advance_date(today, period, n)
@@ -162,11 +172,14 @@ CREATE TABLE #{partition_name}
162
172
  abort "Table not found: #{dest_table}" unless table_exists?(dest_table)
163
173
 
164
174
  period, field = settings_from_table(table, dest_table)
165
- name_format = self.name_format(period)
166
175
 
167
- existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }.sort
168
- starting_time = DateTime.strptime(existing_tables.first.split("_").last, name_format)
169
- ending_time = advance_date(DateTime.strptime(existing_tables.last.split("_").last, name_format), period, 1)
176
+ if period
177
+ name_format = self.name_format(period)
178
+
179
+ existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }.sort
180
+ starting_time = DateTime.strptime(existing_tables.first.split("_").last, name_format)
181
+ ending_time = advance_date(DateTime.strptime(existing_tables.last.split("_").last, name_format), period, 1)
182
+ end
170
183
 
171
184
  primary_key = self.primary_key(table)
172
185
  max_source_id = max_id(source_table, primary_key)
@@ -178,8 +191,12 @@ CREATE TABLE #{partition_name}
178
191
  end
179
192
 
180
193
  if max_dest_id == 0 && !options[:swapped]
181
- min_source_id = min_id(source_table, primary_key, field, starting_time)
182
- max_dest_id = min_source_id - 1 if min_source_id
194
+ if options[:start]
195
+ max_dest_id = options[:start]
196
+ else
197
+ min_source_id = min_id(source_table, primary_key, field, starting_time)
198
+ max_dest_id = min_source_id - 1 if min_source_id
199
+ end
183
200
  end
184
201
 
185
202
  starting_id = max_dest_id
@@ -189,11 +206,16 @@ CREATE TABLE #{partition_name}
189
206
  i = 1
190
207
  batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
191
208
  while starting_id < max_source_id
209
+ where = "#{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size}"
210
+ if period
211
+ where << " AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}"
212
+ end
213
+
192
214
  query = <<-SQL
193
215
  /* #{i} of #{batch_count} */
194
216
  INSERT INTO #{dest_table} (#{fields})
195
217
  SELECT #{fields} FROM #{source_table}
196
- WHERE #{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size} AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}
218
+ WHERE #{where}
197
219
  SQL
198
220
 
199
221
  log_sql(query)
@@ -265,6 +287,9 @@ INSERT INTO #{dest_table} (#{fields})
265
287
  o.integer "--past", default: 0
266
288
  o.integer "--batch-size", default: 10000
267
289
  o.boolean "--dry-run", default: false
290
+ o.boolean "--no-partition", default: false
291
+ o.integer "--start"
292
+ o.string "--url"
268
293
  o.on "-v", "--version", "print the version" do
269
294
  log PgSlice::VERSION
270
295
  @exit = true
@@ -294,8 +319,9 @@ INSERT INTO #{dest_table} (#{fields})
294
319
 
295
320
  def connection
296
321
  @connection ||= begin
297
- abort "Set PGSLICE_URL" unless ENV["PGSLICE_URL"]
298
- uri = URI.parse(ENV["PGSLICE_URL"])
322
+ url = options[:url] || ENV["PGSLICE_URL"]
323
+ abort "Set PGSLICE_URL or use the --url option" unless url
324
+ uri = URI.parse(url)
299
325
  uri_parser = URI::Parser.new
300
326
  config = {
301
327
  host: uri.host,
@@ -369,8 +395,9 @@ INSERT INTO #{dest_table} (#{fields})
369
395
  end
370
396
 
371
397
  def min_id(table, primary_key, column, starting_time)
372
- query = "SELECT MIN(#{primary_key}) FROM #{table} WHERE #{column} >= #{sql_date(starting_time)}"
373
- execute(query)[0]["min"].to_i
398
+ query = "SELECT MIN(#{primary_key}) FROM #{table}"
399
+ query << " WHERE #{column} >= #{sql_date(starting_time)}" if starting_time
400
+ (execute(query)[0]["min"] || 1).to_i
374
401
  end
375
402
 
376
403
  def has_trigger?(trigger_name, table)
@@ -444,9 +471,11 @@ INSERT INTO #{dest_table} (#{fields})
444
471
 
445
472
  def settings_from_table(original_table, table)
446
473
  trigger_name = self.trigger_name(original_table)
447
- function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]["pg_get_functiondef"]
474
+ function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
475
+ return [nil, nil] unless function_def
476
+ function_def = function_def["pg_get_functiondef"]
448
477
  sql_format = SQL_FORMAT.find { |_, f| function_def.include?("'#{f}'") }
449
- abort "Could not read settings" unless sql_format
478
+ return [nil, nil] unless sql_format
450
479
  period = sql_format[0]
451
480
  field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
452
481
  [period, field]
@@ -1,3 +1,3 @@
1
1
  module PgSlice
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgslice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-04-26 00:00:00.000000000 Z
11
+ date: 2016-08-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop