pgslice 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +36 -0
- data/lib/pgslice.rb +61 -32
- data/lib/pgslice/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b5f6975bd8ac6e9a10d0f9655a0b8a93f5ce1c1
|
4
|
+
data.tar.gz: 56f8ba1e389e723fa02aea9a2fdde9c39b64ea32
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 636dde577a3aaad53bccff51443f7ee82c0b72515af5db714c14c093bc8643a0715ae5fb3915535cc12a4eeed0baba6d988b47b5084b9ef5d3c69fe95d1ff5f6
|
7
|
+
data.tar.gz: 254f7d242c475accd162c2aa8143283cb9aae1f402b72ee9f168c4ab11b8538aabdf29fd2439d261f78ad69636b0304f682f575d321e2eecdb3497979f027e0d
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -46,6 +46,8 @@ gem install pgslice
|
|
46
46
|
|
47
47
|
Use the `--batch-size` and `--sleep` options to control the speed.
|
48
48
|
|
49
|
+
To sync data across different databases, check out [pgsync](https://github.com/ankane/pgsync).
|
50
|
+
|
49
51
|
5. Swap the intermediate table with the original table
|
50
52
|
|
51
53
|
```sh
|
@@ -70,6 +72,30 @@ To add partitions, use:
|
|
70
72
|
pgslice add_partitions <table> --future 3
|
71
73
|
```
|
72
74
|
|
75
|
+
Add this as a cron job to create a new partition each day or month.
|
76
|
+
|
77
|
+
```
|
78
|
+
# day
|
79
|
+
0 0 * * * pgslice add_partitions <table> --future 3 --url ...
|
80
|
+
|
81
|
+
# month
|
82
|
+
0 0 1 * * pgslice add_partitions <table> --future 3 --url ...
|
83
|
+
```
|
84
|
+
|
85
|
+
Add a monitor to ensure partitions are being created.
|
86
|
+
|
87
|
+
```sql
|
88
|
+
SELECT 1 FROM
|
89
|
+
pg_catalog.pg_class c
|
90
|
+
INNER JOIN
|
91
|
+
pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
92
|
+
WHERE
|
93
|
+
c.relkind = 'r' AND
|
94
|
+
n.nspname = 'public' AND
|
95
|
+
c.relname = '<table>_' || to_char(NOW() + INTERVAL '3 days', 'YYYYMMDD')
|
96
|
+
-- for months, use to_char(NOW() + INTERVAL '3 months', 'YYYYMM')
|
97
|
+
```
|
98
|
+
|
73
99
|
## Additional Commands
|
74
100
|
|
75
101
|
To undo prep (which will delete partitions), use:
|
@@ -186,6 +212,16 @@ CREATE INDEX ON locations_20160426 USING btree (updated_at, shopper_id);
|
|
186
212
|
COMMIT;
|
187
213
|
```
|
188
214
|
|
215
|
+
## One Off Tasks
|
216
|
+
|
217
|
+
You can also reduce the size of a table without partitioning.
|
218
|
+
|
219
|
+
```sh
|
220
|
+
pgslice prep <table> --no-partition
|
221
|
+
pgslice fill <table> --start 1000 # starting primary key
|
222
|
+
pgslice swap <table>
|
223
|
+
```
|
224
|
+
|
189
225
|
## Upgrading
|
190
226
|
|
191
227
|
Run:
|
data/lib/pgslice.rb
CHANGED
@@ -54,11 +54,18 @@ module PgSlice
|
|
54
54
|
intermediate_table = "#{table}_intermediate"
|
55
55
|
trigger_name = self.trigger_name(table)
|
56
56
|
|
57
|
-
|
57
|
+
if options[:no_partition]
|
58
|
+
abort "Usage: pgslice prep <table> --no-partition" if arguments.length != 1
|
59
|
+
else
|
60
|
+
abort "Usage: pgslice prep <table> <column> <period>" if arguments.length != 3
|
61
|
+
end
|
58
62
|
abort "Table not found: #{table}" unless table_exists?(table)
|
59
63
|
abort "Table already exists: #{intermediate_table}" if table_exists?(intermediate_table)
|
60
|
-
|
61
|
-
|
64
|
+
|
65
|
+
unless options[:no_partition]
|
66
|
+
abort "Column not found: #{column}" unless columns(table).include?(column)
|
67
|
+
abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
|
68
|
+
end
|
62
69
|
|
63
70
|
queries = []
|
64
71
|
|
@@ -66,22 +73,24 @@ module PgSlice
|
|
66
73
|
CREATE TABLE #{intermediate_table} (LIKE #{table} INCLUDING ALL);
|
67
74
|
SQL
|
68
75
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
76
|
+
unless options[:no_partition]
|
77
|
+
sql_format = SQL_FORMAT[period.to_sym]
|
78
|
+
queries << <<-SQL
|
79
|
+
CREATE FUNCTION #{trigger_name}()
|
80
|
+
RETURNS trigger AS $$
|
81
|
+
BEGIN
|
82
|
+
EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
|
83
|
+
RETURN NULL;
|
84
|
+
END;
|
85
|
+
$$ LANGUAGE plpgsql;
|
86
|
+
SQL
|
79
87
|
|
80
|
-
|
81
|
-
CREATE TRIGGER #{trigger_name}
|
82
|
-
|
83
|
-
|
84
|
-
|
88
|
+
queries << <<-SQL
|
89
|
+
CREATE TRIGGER #{trigger_name}
|
90
|
+
BEFORE INSERT ON #{intermediate_table}
|
91
|
+
FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
|
92
|
+
SQL
|
93
|
+
end
|
85
94
|
|
86
95
|
run_queries(queries)
|
87
96
|
end
|
@@ -96,7 +105,7 @@ CREATE TRIGGER #{trigger_name}
|
|
96
105
|
|
97
106
|
queries = [
|
98
107
|
"DROP TABLE #{intermediate_table} CASCADE;",
|
99
|
-
"DROP FUNCTION #{trigger_name}();"
|
108
|
+
"DROP FUNCTION IF EXISTS #{trigger_name}();"
|
100
109
|
]
|
101
110
|
run_queries(queries)
|
102
111
|
end
|
@@ -122,6 +131,7 @@ CREATE TRIGGER #{trigger_name}
|
|
122
131
|
queries = []
|
123
132
|
|
124
133
|
period, field = settings_from_table(original_table, table)
|
134
|
+
abort "Could not read settings" unless period
|
125
135
|
today = round_date(Date.today, period)
|
126
136
|
range.each do |n|
|
127
137
|
day = advance_date(today, period, n)
|
@@ -162,11 +172,14 @@ CREATE TABLE #{partition_name}
|
|
162
172
|
abort "Table not found: #{dest_table}" unless table_exists?(dest_table)
|
163
173
|
|
164
174
|
period, field = settings_from_table(table, dest_table)
|
165
|
-
name_format = self.name_format(period)
|
166
175
|
|
167
|
-
|
168
|
-
|
169
|
-
|
176
|
+
if period
|
177
|
+
name_format = self.name_format(period)
|
178
|
+
|
179
|
+
existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }.sort
|
180
|
+
starting_time = DateTime.strptime(existing_tables.first.split("_").last, name_format)
|
181
|
+
ending_time = advance_date(DateTime.strptime(existing_tables.last.split("_").last, name_format), period, 1)
|
182
|
+
end
|
170
183
|
|
171
184
|
primary_key = self.primary_key(table)
|
172
185
|
max_source_id = max_id(source_table, primary_key)
|
@@ -178,8 +191,12 @@ CREATE TABLE #{partition_name}
|
|
178
191
|
end
|
179
192
|
|
180
193
|
if max_dest_id == 0 && !options[:swapped]
|
181
|
-
|
182
|
-
|
194
|
+
if options[:start]
|
195
|
+
max_dest_id = options[:start]
|
196
|
+
else
|
197
|
+
min_source_id = min_id(source_table, primary_key, field, starting_time)
|
198
|
+
max_dest_id = min_source_id - 1 if min_source_id
|
199
|
+
end
|
183
200
|
end
|
184
201
|
|
185
202
|
starting_id = max_dest_id
|
@@ -189,11 +206,16 @@ CREATE TABLE #{partition_name}
|
|
189
206
|
i = 1
|
190
207
|
batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
|
191
208
|
while starting_id < max_source_id
|
209
|
+
where = "#{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size}"
|
210
|
+
if period
|
211
|
+
where << " AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}"
|
212
|
+
end
|
213
|
+
|
192
214
|
query = <<-SQL
|
193
215
|
/* #{i} of #{batch_count} */
|
194
216
|
INSERT INTO #{dest_table} (#{fields})
|
195
217
|
SELECT #{fields} FROM #{source_table}
|
196
|
-
WHERE #{
|
218
|
+
WHERE #{where}
|
197
219
|
SQL
|
198
220
|
|
199
221
|
log_sql(query)
|
@@ -265,6 +287,9 @@ INSERT INTO #{dest_table} (#{fields})
|
|
265
287
|
o.integer "--past", default: 0
|
266
288
|
o.integer "--batch-size", default: 10000
|
267
289
|
o.boolean "--dry-run", default: false
|
290
|
+
o.boolean "--no-partition", default: false
|
291
|
+
o.integer "--start"
|
292
|
+
o.string "--url"
|
268
293
|
o.on "-v", "--version", "print the version" do
|
269
294
|
log PgSlice::VERSION
|
270
295
|
@exit = true
|
@@ -294,8 +319,9 @@ INSERT INTO #{dest_table} (#{fields})
|
|
294
319
|
|
295
320
|
def connection
|
296
321
|
@connection ||= begin
|
297
|
-
|
298
|
-
|
322
|
+
url = options[:url] || ENV["PGSLICE_URL"]
|
323
|
+
abort "Set PGSLICE_URL or use the --url option" unless url
|
324
|
+
uri = URI.parse(url)
|
299
325
|
uri_parser = URI::Parser.new
|
300
326
|
config = {
|
301
327
|
host: uri.host,
|
@@ -369,8 +395,9 @@ INSERT INTO #{dest_table} (#{fields})
|
|
369
395
|
end
|
370
396
|
|
371
397
|
def min_id(table, primary_key, column, starting_time)
|
372
|
-
query = "SELECT MIN(#{primary_key}) FROM #{table}
|
373
|
-
|
398
|
+
query = "SELECT MIN(#{primary_key}) FROM #{table}"
|
399
|
+
query << " WHERE #{column} >= #{sql_date(starting_time)}" if starting_time
|
400
|
+
(execute(query)[0]["min"] || 1).to_i
|
374
401
|
end
|
375
402
|
|
376
403
|
def has_trigger?(trigger_name, table)
|
@@ -444,9 +471,11 @@ INSERT INTO #{dest_table} (#{fields})
|
|
444
471
|
|
445
472
|
def settings_from_table(original_table, table)
|
446
473
|
trigger_name = self.trigger_name(original_table)
|
447
|
-
function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
|
474
|
+
function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
|
475
|
+
return [nil, nil] unless function_def
|
476
|
+
function_def = function_def["pg_get_functiondef"]
|
448
477
|
sql_format = SQL_FORMAT.find { |_, f| function_def.include?("'#{f}'") }
|
449
|
-
|
478
|
+
return [nil, nil] unless sql_format
|
450
479
|
period = sql_format[0]
|
451
480
|
field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
|
452
481
|
[period, field]
|
data/lib/pgslice/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgslice
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: slop
|