pgslice 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +36 -0
- data/lib/pgslice.rb +61 -32
- data/lib/pgslice/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b5f6975bd8ac6e9a10d0f9655a0b8a93f5ce1c1
|
4
|
+
data.tar.gz: 56f8ba1e389e723fa02aea9a2fdde9c39b64ea32
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 636dde577a3aaad53bccff51443f7ee82c0b72515af5db714c14c093bc8643a0715ae5fb3915535cc12a4eeed0baba6d988b47b5084b9ef5d3c69fe95d1ff5f6
|
7
|
+
data.tar.gz: 254f7d242c475accd162c2aa8143283cb9aae1f402b72ee9f168c4ab11b8538aabdf29fd2439d261f78ad69636b0304f682f575d321e2eecdb3497979f027e0d
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -46,6 +46,8 @@ gem install pgslice
|
|
46
46
|
|
47
47
|
Use the `--batch-size` and `--sleep` options to control the speed.
|
48
48
|
|
49
|
+
To sync data across different databases, check out [pgsync](https://github.com/ankane/pgsync).
|
50
|
+
|
49
51
|
5. Swap the intermediate table with the original table
|
50
52
|
|
51
53
|
```sh
|
@@ -70,6 +72,30 @@ To add partitions, use:
|
|
70
72
|
pgslice add_partitions <table> --future 3
|
71
73
|
```
|
72
74
|
|
75
|
+
Add this as a cron job to create a new partition each day or month.
|
76
|
+
|
77
|
+
```
|
78
|
+
# day
|
79
|
+
0 0 * * * pgslice add_partitions <table> --future 3 --url ...
|
80
|
+
|
81
|
+
# month
|
82
|
+
0 0 1 * * pgslice add_partitions <table> --future 3 --url ...
|
83
|
+
```
|
84
|
+
|
85
|
+
Add a monitor to ensure partitions are being created.
|
86
|
+
|
87
|
+
```sql
|
88
|
+
SELECT 1 FROM
|
89
|
+
pg_catalog.pg_class c
|
90
|
+
INNER JOIN
|
91
|
+
pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
92
|
+
WHERE
|
93
|
+
c.relkind = 'r' AND
|
94
|
+
n.nspname = 'public' AND
|
95
|
+
c.relname = '<table>_' || to_char(NOW() + INTERVAL '3 days', 'YYYYMMDD')
|
96
|
+
-- for months, use to_char(NOW() + INTERVAL '3 months', 'YYYYMM')
|
97
|
+
```
|
98
|
+
|
73
99
|
## Additional Commands
|
74
100
|
|
75
101
|
To undo prep (which will delete partitions), use:
|
@@ -186,6 +212,16 @@ CREATE INDEX ON locations_20160426 USING btree (updated_at, shopper_id);
|
|
186
212
|
COMMIT;
|
187
213
|
```
|
188
214
|
|
215
|
+
## One Off Tasks
|
216
|
+
|
217
|
+
You can also reduce the size of a table without partitioning.
|
218
|
+
|
219
|
+
```sh
|
220
|
+
pgslice prep <table> --no-partition
|
221
|
+
pgslice fill <table> --start 1000 # starting primary key
|
222
|
+
pgslice swap <table>
|
223
|
+
```
|
224
|
+
|
189
225
|
## Upgrading
|
190
226
|
|
191
227
|
Run:
|
data/lib/pgslice.rb
CHANGED
@@ -54,11 +54,18 @@ module PgSlice
|
|
54
54
|
intermediate_table = "#{table}_intermediate"
|
55
55
|
trigger_name = self.trigger_name(table)
|
56
56
|
|
57
|
-
|
57
|
+
if options[:no_partition]
|
58
|
+
abort "Usage: pgslice prep <table> --no-partition" if arguments.length != 1
|
59
|
+
else
|
60
|
+
abort "Usage: pgslice prep <table> <column> <period>" if arguments.length != 3
|
61
|
+
end
|
58
62
|
abort "Table not found: #{table}" unless table_exists?(table)
|
59
63
|
abort "Table already exists: #{intermediate_table}" if table_exists?(intermediate_table)
|
60
|
-
|
61
|
-
|
64
|
+
|
65
|
+
unless options[:no_partition]
|
66
|
+
abort "Column not found: #{column}" unless columns(table).include?(column)
|
67
|
+
abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
|
68
|
+
end
|
62
69
|
|
63
70
|
queries = []
|
64
71
|
|
@@ -66,22 +73,24 @@ module PgSlice
|
|
66
73
|
CREATE TABLE #{intermediate_table} (LIKE #{table} INCLUDING ALL);
|
67
74
|
SQL
|
68
75
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
76
|
+
unless options[:no_partition]
|
77
|
+
sql_format = SQL_FORMAT[period.to_sym]
|
78
|
+
queries << <<-SQL
|
79
|
+
CREATE FUNCTION #{trigger_name}()
|
80
|
+
RETURNS trigger AS $$
|
81
|
+
BEGIN
|
82
|
+
EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
|
83
|
+
RETURN NULL;
|
84
|
+
END;
|
85
|
+
$$ LANGUAGE plpgsql;
|
86
|
+
SQL
|
79
87
|
|
80
|
-
|
81
|
-
CREATE TRIGGER #{trigger_name}
|
82
|
-
|
83
|
-
|
84
|
-
|
88
|
+
queries << <<-SQL
|
89
|
+
CREATE TRIGGER #{trigger_name}
|
90
|
+
BEFORE INSERT ON #{intermediate_table}
|
91
|
+
FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
|
92
|
+
SQL
|
93
|
+
end
|
85
94
|
|
86
95
|
run_queries(queries)
|
87
96
|
end
|
@@ -96,7 +105,7 @@ CREATE TRIGGER #{trigger_name}
|
|
96
105
|
|
97
106
|
queries = [
|
98
107
|
"DROP TABLE #{intermediate_table} CASCADE;",
|
99
|
-
"DROP FUNCTION #{trigger_name}();"
|
108
|
+
"DROP FUNCTION IF EXISTS #{trigger_name}();"
|
100
109
|
]
|
101
110
|
run_queries(queries)
|
102
111
|
end
|
@@ -122,6 +131,7 @@ CREATE TRIGGER #{trigger_name}
|
|
122
131
|
queries = []
|
123
132
|
|
124
133
|
period, field = settings_from_table(original_table, table)
|
134
|
+
abort "Could not read settings" unless period
|
125
135
|
today = round_date(Date.today, period)
|
126
136
|
range.each do |n|
|
127
137
|
day = advance_date(today, period, n)
|
@@ -162,11 +172,14 @@ CREATE TABLE #{partition_name}
|
|
162
172
|
abort "Table not found: #{dest_table}" unless table_exists?(dest_table)
|
163
173
|
|
164
174
|
period, field = settings_from_table(table, dest_table)
|
165
|
-
name_format = self.name_format(period)
|
166
175
|
|
167
|
-
|
168
|
-
|
169
|
-
|
176
|
+
if period
|
177
|
+
name_format = self.name_format(period)
|
178
|
+
|
179
|
+
existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }.sort
|
180
|
+
starting_time = DateTime.strptime(existing_tables.first.split("_").last, name_format)
|
181
|
+
ending_time = advance_date(DateTime.strptime(existing_tables.last.split("_").last, name_format), period, 1)
|
182
|
+
end
|
170
183
|
|
171
184
|
primary_key = self.primary_key(table)
|
172
185
|
max_source_id = max_id(source_table, primary_key)
|
@@ -178,8 +191,12 @@ CREATE TABLE #{partition_name}
|
|
178
191
|
end
|
179
192
|
|
180
193
|
if max_dest_id == 0 && !options[:swapped]
|
181
|
-
|
182
|
-
|
194
|
+
if options[:start]
|
195
|
+
max_dest_id = options[:start]
|
196
|
+
else
|
197
|
+
min_source_id = min_id(source_table, primary_key, field, starting_time)
|
198
|
+
max_dest_id = min_source_id - 1 if min_source_id
|
199
|
+
end
|
183
200
|
end
|
184
201
|
|
185
202
|
starting_id = max_dest_id
|
@@ -189,11 +206,16 @@ CREATE TABLE #{partition_name}
|
|
189
206
|
i = 1
|
190
207
|
batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
|
191
208
|
while starting_id < max_source_id
|
209
|
+
where = "#{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size}"
|
210
|
+
if period
|
211
|
+
where << " AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}"
|
212
|
+
end
|
213
|
+
|
192
214
|
query = <<-SQL
|
193
215
|
/* #{i} of #{batch_count} */
|
194
216
|
INSERT INTO #{dest_table} (#{fields})
|
195
217
|
SELECT #{fields} FROM #{source_table}
|
196
|
-
WHERE #{
|
218
|
+
WHERE #{where}
|
197
219
|
SQL
|
198
220
|
|
199
221
|
log_sql(query)
|
@@ -265,6 +287,9 @@ INSERT INTO #{dest_table} (#{fields})
|
|
265
287
|
o.integer "--past", default: 0
|
266
288
|
o.integer "--batch-size", default: 10000
|
267
289
|
o.boolean "--dry-run", default: false
|
290
|
+
o.boolean "--no-partition", default: false
|
291
|
+
o.integer "--start"
|
292
|
+
o.string "--url"
|
268
293
|
o.on "-v", "--version", "print the version" do
|
269
294
|
log PgSlice::VERSION
|
270
295
|
@exit = true
|
@@ -294,8 +319,9 @@ INSERT INTO #{dest_table} (#{fields})
|
|
294
319
|
|
295
320
|
def connection
|
296
321
|
@connection ||= begin
|
297
|
-
|
298
|
-
|
322
|
+
url = options[:url] || ENV["PGSLICE_URL"]
|
323
|
+
abort "Set PGSLICE_URL or use the --url option" unless url
|
324
|
+
uri = URI.parse(url)
|
299
325
|
uri_parser = URI::Parser.new
|
300
326
|
config = {
|
301
327
|
host: uri.host,
|
@@ -369,8 +395,9 @@ INSERT INTO #{dest_table} (#{fields})
|
|
369
395
|
end
|
370
396
|
|
371
397
|
def min_id(table, primary_key, column, starting_time)
|
372
|
-
query = "SELECT MIN(#{primary_key}) FROM #{table}
|
373
|
-
|
398
|
+
query = "SELECT MIN(#{primary_key}) FROM #{table}"
|
399
|
+
query << " WHERE #{column} >= #{sql_date(starting_time)}" if starting_time
|
400
|
+
(execute(query)[0]["min"] || 1).to_i
|
374
401
|
end
|
375
402
|
|
376
403
|
def has_trigger?(trigger_name, table)
|
@@ -444,9 +471,11 @@ INSERT INTO #{dest_table} (#{fields})
|
|
444
471
|
|
445
472
|
def settings_from_table(original_table, table)
|
446
473
|
trigger_name = self.trigger_name(original_table)
|
447
|
-
function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
|
474
|
+
function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
|
475
|
+
return [nil, nil] unless function_def
|
476
|
+
function_def = function_def["pg_get_functiondef"]
|
448
477
|
sql_format = SQL_FORMAT.find { |_, f| function_def.include?("'#{f}'") }
|
449
|
-
|
478
|
+
return [nil, nil] unless sql_format
|
450
479
|
period = sql_format[0]
|
451
480
|
field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
|
452
481
|
[period, field]
|
data/lib/pgslice/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgslice
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: slop
|