pgsync 0.6.3 → 0.6.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +8 -0
- data/lib/pgsync.rb +1 -0
- data/lib/pgsync/data_source.rb +21 -34
- data/lib/pgsync/sequence.rb +29 -0
- data/lib/pgsync/sync.rb +7 -7
- data/lib/pgsync/table_sync.rb +108 -20
- data/lib/pgsync/task.rb +13 -17
- data/lib/pgsync/utils.rb +3 -2
- data/lib/pgsync/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ecb467f4d3112edfcfebe19913e29daeddfe1e4cc10e18bd9e8850fbceced57
|
4
|
+
data.tar.gz: 4d8309dec9e8238074267baf583fe333c7b1d3300ad80398748e15905eb1e539
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 145eb31e2565257a5adedc05c1308692dffb1e0785728ecd38230bb23c34c4ba6ecf08512201f707f457b136e0b7a22ca511b46fb7834f2045c041ff43dd6ccc
|
7
|
+
data.tar.gz: cde51dee36149f9f8c21e26eecd504de68d8a47c2524f084b74962d7ceae1a01b59841261fc6c112cf94b31a89f55b9a991c05d1af0db41c46ed615fc56e888f
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
data/lib/pgsync.rb
CHANGED
data/lib/pgsync/data_source.rb
CHANGED
@@ -4,8 +4,10 @@ module PgSync
|
|
4
4
|
|
5
5
|
attr_reader :url
|
6
6
|
|
7
|
-
def initialize(url)
|
7
|
+
def initialize(url, name:, debug:)
|
8
8
|
@url = url
|
9
|
+
@name = name
|
10
|
+
@debug = debug
|
9
11
|
end
|
10
12
|
|
11
13
|
def exists?
|
@@ -50,10 +52,6 @@ module PgSync
|
|
50
52
|
table_set.include?(table)
|
51
53
|
end
|
52
54
|
|
53
|
-
def sequences(table, columns)
|
54
|
-
execute("SELECT #{columns.map { |f| "pg_get_serial_sequence(#{escape("#{quote_ident_full(table)}")}, #{escape(f)}) AS #{quote_ident(f)}" }.join(", ")}").first.values.compact
|
55
|
-
end
|
56
|
-
|
57
55
|
def max_id(table, primary_key, sql_clause = nil)
|
58
56
|
execute("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["max"].to_i
|
59
57
|
end
|
@@ -62,39 +60,14 @@ module PgSync
|
|
62
60
|
execute("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["min"].to_i
|
63
61
|
end
|
64
62
|
|
65
|
-
# this value comes from pg_get_serial_sequence which is already quoted
|
66
63
|
def last_value(seq)
|
67
|
-
execute("SELECT last_value FROM #{seq}").first["last_value"]
|
64
|
+
execute("SELECT last_value FROM #{quote_ident_full(seq)}").first["last_value"]
|
68
65
|
end
|
69
66
|
|
70
67
|
def truncate(table)
|
71
68
|
execute("TRUNCATE #{quote_ident_full(table)} CASCADE")
|
72
69
|
end
|
73
70
|
|
74
|
-
# https://stackoverflow.com/a/20537829
|
75
|
-
# TODO can simplify with array_position in Postgres 9.5+
|
76
|
-
def primary_key(table)
|
77
|
-
query = <<~SQL
|
78
|
-
SELECT
|
79
|
-
pg_attribute.attname,
|
80
|
-
format_type(pg_attribute.atttypid, pg_attribute.atttypmod),
|
81
|
-
pg_attribute.attnum,
|
82
|
-
pg_index.indkey
|
83
|
-
FROM
|
84
|
-
pg_index, pg_class, pg_attribute, pg_namespace
|
85
|
-
WHERE
|
86
|
-
nspname = $1 AND
|
87
|
-
relname = $2 AND
|
88
|
-
indrelid = pg_class.oid AND
|
89
|
-
pg_class.relnamespace = pg_namespace.oid AND
|
90
|
-
pg_attribute.attrelid = pg_class.oid AND
|
91
|
-
pg_attribute.attnum = any(pg_index.indkey) AND
|
92
|
-
indisprimary
|
93
|
-
SQL
|
94
|
-
rows = execute(query, [table.schema, table.name])
|
95
|
-
rows.sort_by { |r| r["indkey"].split(" ").index(r["attnum"]) }.map { |r| r["attname"] }
|
96
|
-
end
|
97
|
-
|
98
71
|
def triggers(table)
|
99
72
|
query = <<~SQL
|
100
73
|
SELECT
|
@@ -148,20 +121,34 @@ module PgSync
|
|
148
121
|
end
|
149
122
|
|
150
123
|
def execute(query, params = [])
|
124
|
+
log_sql query, params
|
151
125
|
conn.exec_params(query, params).to_a
|
152
126
|
end
|
153
127
|
|
154
128
|
def transaction
|
155
129
|
if conn.transaction_status == 0
|
156
130
|
# not currently in transaction
|
157
|
-
|
158
|
-
|
159
|
-
|
131
|
+
log_sql "BEGIN"
|
132
|
+
result =
|
133
|
+
conn.transaction do
|
134
|
+
yield
|
135
|
+
end
|
136
|
+
log_sql "COMMIT"
|
137
|
+
result
|
160
138
|
else
|
161
139
|
yield
|
162
140
|
end
|
163
141
|
end
|
164
142
|
|
143
|
+
# TODO log time for each statement
|
144
|
+
def log_sql(query, params = {})
|
145
|
+
if @debug
|
146
|
+
message = "#{colorize("[#{@name}]", :cyan)} #{query.gsub(/\s+/, " ").strip}"
|
147
|
+
message = "#{message} #{params.inspect}" if params.any?
|
148
|
+
log message
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
165
152
|
private
|
166
153
|
|
167
154
|
def concurrent_id
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# minimal class to keep schema and sequence name separate
|
2
|
+
module PgSync
|
3
|
+
class Sequence
|
4
|
+
attr_reader :schema, :name, :column
|
5
|
+
|
6
|
+
def initialize(schema, name, column:)
|
7
|
+
@schema = schema
|
8
|
+
@name = name
|
9
|
+
@column = column
|
10
|
+
end
|
11
|
+
|
12
|
+
def full_name
|
13
|
+
"#{schema}.#{name}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def eql?(other)
|
17
|
+
other.schema == schema && other.name == name
|
18
|
+
end
|
19
|
+
|
20
|
+
# override hash when overriding eql?
|
21
|
+
def hash
|
22
|
+
[schema, name].hash
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_s
|
26
|
+
full_name
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/pgsync/sync.rb
CHANGED
@@ -34,13 +34,13 @@ module PgSync
|
|
34
34
|
raise Error, "Danger! Add `to_safe: true` to `.pgsync.yml` if the destination is not localhost or 127.0.0.1"
|
35
35
|
end
|
36
36
|
|
37
|
+
print_description("From", source)
|
38
|
+
print_description("To", destination)
|
39
|
+
|
37
40
|
if (opts[:preserve] || opts[:overwrite]) && destination.server_version_num < 90500
|
38
41
|
raise Error, "Postgres 9.5+ is required for --preserve and --overwrite"
|
39
42
|
end
|
40
43
|
|
41
|
-
print_description("From", source)
|
42
|
-
print_description("To", destination)
|
43
|
-
|
44
44
|
resolver = TaskResolver.new(args: args, opts: opts, source: source, destination: destination, config: config, first_schema: first_schema)
|
45
45
|
tasks =
|
46
46
|
resolver.tasks.map do |task|
|
@@ -126,15 +126,15 @@ module PgSync
|
|
126
126
|
end
|
127
127
|
|
128
128
|
def source
|
129
|
-
@source ||= data_source(@options[:from])
|
129
|
+
@source ||= data_source(@options[:from], "from")
|
130
130
|
end
|
131
131
|
|
132
132
|
def destination
|
133
|
-
@destination ||= data_source(@options[:to])
|
133
|
+
@destination ||= data_source(@options[:to], "to")
|
134
134
|
end
|
135
135
|
|
136
|
-
def data_source(url)
|
137
|
-
ds = DataSource.new(url)
|
136
|
+
def data_source(url, name)
|
137
|
+
ds = DataSource.new(url, name: name, debug: @options[:debug])
|
138
138
|
ObjectSpace.define_finalizer(self, self.class.finalize(ds))
|
139
139
|
ds
|
140
140
|
end
|
data/lib/pgsync/table_sync.rb
CHANGED
@@ -17,6 +17,10 @@ module PgSync
|
|
17
17
|
|
18
18
|
add_columns
|
19
19
|
|
20
|
+
add_primary_keys
|
21
|
+
|
22
|
+
add_sequences unless opts[:no_sequences]
|
23
|
+
|
20
24
|
show_notes
|
21
25
|
|
22
26
|
# don't sync tables with no shared fields
|
@@ -24,8 +28,6 @@ module PgSync
|
|
24
28
|
run_tasks(tasks.reject { |task| task.shared_fields.empty? })
|
25
29
|
end
|
26
30
|
|
27
|
-
# TODO only query specific tables
|
28
|
-
# TODO add sequences, primary keys, etc
|
29
31
|
def add_columns
|
30
32
|
source_columns = columns(source)
|
31
33
|
destination_columns = columns(destination)
|
@@ -36,6 +38,79 @@ module PgSync
|
|
36
38
|
end
|
37
39
|
end
|
38
40
|
|
41
|
+
def add_primary_keys
|
42
|
+
destination_primary_keys = primary_keys(destination)
|
43
|
+
|
44
|
+
tasks.each do |task|
|
45
|
+
task.to_primary_key = destination_primary_keys[task.table] || []
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def add_sequences
|
50
|
+
source_sequences = sequences(source)
|
51
|
+
destination_sequences = sequences(destination)
|
52
|
+
|
53
|
+
tasks.each do |task|
|
54
|
+
shared_columns = Set.new(task.shared_fields)
|
55
|
+
|
56
|
+
task.from_sequences = (source_sequences[task.table] || []).select { |s| shared_columns.include?(s.column) }
|
57
|
+
task.to_sequences = (destination_sequences[task.table] || []).select { |s| shared_columns.include?(s.column) }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def sequences(data_source)
|
62
|
+
query = <<~SQL
|
63
|
+
SELECT
|
64
|
+
nt.nspname as schema,
|
65
|
+
t.relname as table,
|
66
|
+
a.attname as column,
|
67
|
+
n.nspname as sequence_schema,
|
68
|
+
s.relname as sequence
|
69
|
+
FROM
|
70
|
+
pg_class s
|
71
|
+
INNER JOIN
|
72
|
+
pg_depend d ON d.objid = s.oid
|
73
|
+
INNER JOIN
|
74
|
+
pg_class t ON d.objid = s.oid AND d.refobjid = t.oid
|
75
|
+
INNER JOIN
|
76
|
+
pg_attribute a ON (d.refobjid, d.refobjsubid) = (a.attrelid, a.attnum)
|
77
|
+
INNER JOIN
|
78
|
+
pg_namespace n ON n.oid = s.relnamespace
|
79
|
+
INNER JOIN
|
80
|
+
pg_namespace nt ON nt.oid = t.relnamespace
|
81
|
+
WHERE
|
82
|
+
s.relkind = 'S'
|
83
|
+
SQL
|
84
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
85
|
+
[k, v.map { |r| Sequence.new(r["sequence_schema"], r["sequence"], column: r["column"]) }]
|
86
|
+
end.to_h
|
87
|
+
end
|
88
|
+
|
89
|
+
def primary_keys(data_source)
|
90
|
+
# https://stackoverflow.com/a/20537829
|
91
|
+
# TODO can simplify with array_position in Postgres 9.5+
|
92
|
+
query = <<~SQL
|
93
|
+
SELECT
|
94
|
+
nspname AS schema,
|
95
|
+
relname AS table,
|
96
|
+
pg_attribute.attname AS column,
|
97
|
+
format_type(pg_attribute.atttypid, pg_attribute.atttypmod),
|
98
|
+
pg_attribute.attnum,
|
99
|
+
pg_index.indkey
|
100
|
+
FROM
|
101
|
+
pg_index, pg_class, pg_attribute, pg_namespace
|
102
|
+
WHERE
|
103
|
+
indrelid = pg_class.oid AND
|
104
|
+
pg_class.relnamespace = pg_namespace.oid AND
|
105
|
+
pg_attribute.attrelid = pg_class.oid AND
|
106
|
+
pg_attribute.attnum = any(pg_index.indkey) AND
|
107
|
+
indisprimary
|
108
|
+
SQL
|
109
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
110
|
+
[k, v.sort_by { |r| r["indkey"].split(" ").index(r["attnum"]) }.map { |r| r["column"] }]
|
111
|
+
end.to_h
|
112
|
+
end
|
113
|
+
|
39
114
|
def show_notes
|
40
115
|
# for tables
|
41
116
|
resolver.notes.each do |note|
|
@@ -93,28 +168,33 @@ module PgSync
|
|
93
168
|
def run_tasks(tasks, &block)
|
94
169
|
notices = []
|
95
170
|
failed_tables = []
|
96
|
-
|
97
|
-
spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
|
98
|
-
task_spinners = {}
|
99
171
|
started_at = {}
|
100
172
|
|
173
|
+
show_spinners = output.tty? && !opts[:in_batches] && !opts[:debug]
|
174
|
+
if show_spinners
|
175
|
+
spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
|
176
|
+
task_spinners = {}
|
177
|
+
end
|
178
|
+
|
101
179
|
start = lambda do |task, i|
|
102
180
|
message = ":spinner #{display_item(task)}"
|
103
|
-
|
104
|
-
if
|
105
|
-
|
106
|
-
log message.sub(":spinner", "⠋")
|
107
|
-
else
|
181
|
+
|
182
|
+
if show_spinners
|
183
|
+
spinner = spinners.register(message)
|
108
184
|
spinner.auto_spin
|
185
|
+
task_spinners[task] = spinner
|
186
|
+
elsif opts[:in_batches]
|
187
|
+
log message.sub(":spinner", "⠋")
|
109
188
|
end
|
110
|
-
|
189
|
+
|
111
190
|
started_at[task] = Time.now
|
112
191
|
end
|
113
192
|
|
114
193
|
finish = lambda do |task, i, result|
|
115
|
-
spinner = task_spinners[task]
|
116
194
|
time = (Time.now - started_at[task]).round(1)
|
117
195
|
|
196
|
+
success = result[:status] == "success"
|
197
|
+
|
118
198
|
message =
|
119
199
|
if result[:message]
|
120
200
|
"(#{result[:message].lines.first.to_s.strip})"
|
@@ -124,23 +204,30 @@ module PgSync
|
|
124
204
|
|
125
205
|
notices.concat(result[:notices])
|
126
206
|
|
127
|
-
if
|
128
|
-
spinner
|
207
|
+
if show_spinners
|
208
|
+
spinner = task_spinners[task]
|
209
|
+
if success
|
210
|
+
spinner.success(message)
|
211
|
+
else
|
212
|
+
spinner.error(message)
|
213
|
+
end
|
129
214
|
else
|
130
|
-
|
131
|
-
|
132
|
-
fail_sync(failed_tables) if opts[:fail_fast]
|
215
|
+
status = success ? "✔" : "✖"
|
216
|
+
log [status, display_item(task), message].join(" ")
|
133
217
|
end
|
134
218
|
|
135
|
-
unless
|
136
|
-
|
137
|
-
|
219
|
+
unless success
|
220
|
+
failed_tables << task_name(task)
|
221
|
+
fail_sync(failed_tables) if opts[:fail_fast]
|
138
222
|
end
|
139
223
|
end
|
140
224
|
|
141
225
|
options = {start: start, finish: finish}
|
142
226
|
|
143
227
|
jobs = opts[:jobs]
|
228
|
+
|
229
|
+
# disable multiple jobs for defer constraints and disable integrity
|
230
|
+
# so we can use a transaction to ensure a consistent snapshot
|
144
231
|
if opts[:debug] || opts[:in_batches] || opts[:defer_constraints] || opts[:defer_constraints_v2] || opts[:disable_integrity] || opts[:disable_integrity_v2]
|
145
232
|
warning "--jobs ignored" if jobs
|
146
233
|
jobs = 0
|
@@ -171,6 +258,7 @@ module PgSync
|
|
171
258
|
fail_sync(failed_tables) if failed_tables.any?
|
172
259
|
end
|
173
260
|
|
261
|
+
# TODO add option to open transaction on source when manually specifying order of tables
|
174
262
|
def maybe_defer_constraints
|
175
263
|
if opts[:disable_integrity] || opts[:disable_integrity_v2]
|
176
264
|
# create a transaction on the source
|
data/lib/pgsync/task.rb
CHANGED
@@ -3,7 +3,7 @@ module PgSync
|
|
3
3
|
include Utils
|
4
4
|
|
5
5
|
attr_reader :source, :destination, :config, :table, :opts
|
6
|
-
attr_accessor :from_columns, :to_columns
|
6
|
+
attr_accessor :from_columns, :to_columns, :from_sequences, :to_sequences, :to_primary_key
|
7
7
|
|
8
8
|
def initialize(source:, destination:, config:, table:, opts:)
|
9
9
|
@source = source
|
@@ -11,6 +11,8 @@ module PgSync
|
|
11
11
|
@config = config
|
12
12
|
@table = table
|
13
13
|
@opts = opts
|
14
|
+
@from_sequences = []
|
15
|
+
@to_sequences = []
|
14
16
|
end
|
15
17
|
|
16
18
|
def quoted_table
|
@@ -39,14 +41,6 @@ module PgSync
|
|
39
41
|
@shared_fields ||= to_fields & from_fields
|
40
42
|
end
|
41
43
|
|
42
|
-
def from_sequences
|
43
|
-
@from_sequences ||= opts[:no_sequences] ? [] : source.sequences(table, shared_fields)
|
44
|
-
end
|
45
|
-
|
46
|
-
def to_sequences
|
47
|
-
@to_sequences ||= opts[:no_sequences] ? [] : destination.sequences(table, shared_fields)
|
48
|
-
end
|
49
|
-
|
50
44
|
def shared_sequences
|
51
45
|
@shared_sequences ||= to_sequences & from_sequences
|
52
46
|
end
|
@@ -88,15 +82,10 @@ module PgSync
|
|
88
82
|
sql_clause << " #{opts[:sql]}" if opts[:sql]
|
89
83
|
|
90
84
|
bad_fields = opts[:no_rules] ? [] : config["data_rules"]
|
91
|
-
primary_key =
|
85
|
+
primary_key = to_primary_key
|
92
86
|
copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quoted_table}.#{quote_ident(f)}" }.join(", ")
|
93
87
|
fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
|
94
88
|
|
95
|
-
seq_values = {}
|
96
|
-
shared_sequences.each do |seq|
|
97
|
-
seq_values[seq] = source.last_value(seq)
|
98
|
-
end
|
99
|
-
|
100
89
|
copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{sql_clause}) TO STDOUT"
|
101
90
|
if opts[:in_batches]
|
102
91
|
raise Error, "No primary key" if primary_key.empty?
|
@@ -163,8 +152,11 @@ module PgSync
|
|
163
152
|
end
|
164
153
|
copy(copy_to_command, dest_table: table, dest_fields: fields)
|
165
154
|
end
|
166
|
-
|
167
|
-
|
155
|
+
|
156
|
+
# update sequences
|
157
|
+
shared_sequences.each do |seq|
|
158
|
+
value = source.last_value(seq)
|
159
|
+
destination.execute("SELECT setval(#{escape(quote_ident_full(seq))}, #{escape(value)})")
|
168
160
|
end
|
169
161
|
|
170
162
|
{status: "success"}
|
@@ -214,6 +206,10 @@ module PgSync
|
|
214
206
|
|
215
207
|
def copy(source_command, dest_table:, dest_fields:)
|
216
208
|
destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN"
|
209
|
+
|
210
|
+
source.log_sql(source_command)
|
211
|
+
destination.log_sql(destination_command)
|
212
|
+
|
217
213
|
destination.conn.copy_data(destination_command) do
|
218
214
|
source.conn.copy_data(source_command) do
|
219
215
|
while (row = source.conn.get_copy_data)
|
data/lib/pgsync/utils.rb
CHANGED
@@ -3,7 +3,8 @@ module PgSync
|
|
3
3
|
COLOR_CODES = {
|
4
4
|
red: 31,
|
5
5
|
green: 32,
|
6
|
-
yellow: 33
|
6
|
+
yellow: 33,
|
7
|
+
cyan: 36
|
7
8
|
}
|
8
9
|
|
9
10
|
def log(message = nil)
|
@@ -59,7 +60,7 @@ module PgSync
|
|
59
60
|
end
|
60
61
|
|
61
62
|
def quote_ident_full(ident)
|
62
|
-
if ident.is_a?(Table)
|
63
|
+
if ident.is_a?(Table) || ident.is_a?(Sequence)
|
63
64
|
[quote_ident(ident.schema), quote_ident(ident.name)].join(".")
|
64
65
|
else # temp table names are strings
|
65
66
|
quote_ident(ident)
|
data/lib/pgsync/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgsync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: parallel
|
@@ -125,6 +125,7 @@ files:
|
|
125
125
|
- lib/pgsync/data_source.rb
|
126
126
|
- lib/pgsync/init.rb
|
127
127
|
- lib/pgsync/schema_sync.rb
|
128
|
+
- lib/pgsync/sequence.rb
|
128
129
|
- lib/pgsync/sync.rb
|
129
130
|
- lib/pgsync/table.rb
|
130
131
|
- lib/pgsync/table_sync.rb
|