pgsync 0.5.5 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -3
- data/LICENSE.txt +1 -1
- data/README.md +28 -26
- data/config.yml +4 -0
- data/lib/pgsync.rb +5 -1
- data/lib/pgsync/client.rb +52 -57
- data/lib/pgsync/data_source.rb +47 -83
- data/lib/pgsync/init.rb +19 -4
- data/lib/pgsync/schema_sync.rb +83 -0
- data/lib/pgsync/sync.rb +77 -195
- data/lib/pgsync/table.rb +28 -0
- data/lib/pgsync/table_sync.rb +150 -220
- data/lib/pgsync/task.rb +315 -0
- data/lib/pgsync/task_resolver.rb +235 -0
- data/lib/pgsync/utils.rb +53 -13
- data/lib/pgsync/version.rb +1 -1
- metadata +6 -3
- data/lib/pgsync/table_list.rb +0 -141
data/lib/pgsync/table.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# minimal class to keep schema and table name separate
|
2
|
+
module PgSync
|
3
|
+
class Table
|
4
|
+
attr_reader :schema, :name
|
5
|
+
|
6
|
+
def initialize(schema, name)
|
7
|
+
@schema = schema
|
8
|
+
@name = name
|
9
|
+
end
|
10
|
+
|
11
|
+
def full_name
|
12
|
+
"#{schema}.#{name}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def eql?(other)
|
16
|
+
other.schema == schema && other.name == name
|
17
|
+
end
|
18
|
+
|
19
|
+
# override hash when overriding eql?
|
20
|
+
def hash
|
21
|
+
[schema, name].hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
full_name
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/pgsync/table_sync.rb
CHANGED
@@ -2,274 +2,204 @@ module PgSync
|
|
2
2
|
class TableSync
|
3
3
|
include Utils
|
4
4
|
|
5
|
-
attr_reader :source, :destination
|
5
|
+
attr_reader :source, :destination, :tasks, :opts, :resolver
|
6
6
|
|
7
|
-
def initialize(source:, destination:)
|
7
|
+
def initialize(source:, destination:, tasks:, opts:, resolver:)
|
8
8
|
@source = source
|
9
9
|
@destination = destination
|
10
|
+
@tasks = tasks
|
11
|
+
@opts = opts
|
12
|
+
@resolver = resolver
|
10
13
|
end
|
11
14
|
|
12
|
-
def
|
13
|
-
|
14
|
-
sync_data(config, table, opts)
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def sync_data(config, table, opts)
|
19
|
-
start_time = Time.now
|
20
|
-
|
21
|
-
from_fields = source.columns(table)
|
22
|
-
to_fields = destination.columns(table)
|
23
|
-
shared_fields = to_fields & from_fields
|
24
|
-
extra_fields = to_fields - from_fields
|
25
|
-
missing_fields = from_fields - to_fields
|
26
|
-
|
27
|
-
if opts[:no_sequences]
|
28
|
-
from_sequences = []
|
29
|
-
to_sequences = []
|
30
|
-
else
|
31
|
-
from_sequences = source.sequences(table, shared_fields)
|
32
|
-
to_sequences = destination.sequences(table, shared_fields)
|
33
|
-
end
|
34
|
-
|
35
|
-
shared_sequences = to_sequences & from_sequences
|
36
|
-
extra_sequences = to_sequences - from_sequences
|
37
|
-
missing_sequences = from_sequences - to_sequences
|
15
|
+
def perform
|
16
|
+
confirm_tables_exist(destination, tasks, "destination")
|
38
17
|
|
39
|
-
|
40
|
-
sql_clause << " #{opts[:sql]}" if opts[:sql]
|
18
|
+
add_columns
|
41
19
|
|
42
|
-
|
43
|
-
notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
|
44
|
-
notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
|
45
|
-
notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
|
46
|
-
notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
20
|
+
show_notes
|
47
21
|
|
48
|
-
|
22
|
+
# don't sync tables with no shared fields
|
23
|
+
# we show a warning message above
|
24
|
+
run_tasks(tasks.reject { |task| task.shared_fields.empty? })
|
25
|
+
end
|
49
26
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
27
|
+
# TODO only query specific tables
|
28
|
+
# TODO add sequences, primary keys, etc
|
29
|
+
def add_columns
|
30
|
+
source_columns = columns(source)
|
31
|
+
destination_columns = columns(destination)
|
54
32
|
|
55
|
-
|
56
|
-
|
57
|
-
|
33
|
+
tasks.each do |task|
|
34
|
+
task.from_columns = source_columns[task.table] || []
|
35
|
+
task.to_columns = destination_columns[task.table] || []
|
58
36
|
end
|
37
|
+
end
|
59
38
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
destination.truncate(table) if opts[:truncate]
|
66
|
-
|
67
|
-
from_max_id = source.max_id(table, primary_key)
|
68
|
-
to_max_id = destination.max_id(table, primary_key) + 1
|
39
|
+
def show_notes
|
40
|
+
# for tables
|
41
|
+
resolver.notes.each do |note|
|
42
|
+
warning note
|
43
|
+
end
|
69
44
|
|
70
|
-
|
71
|
-
|
72
|
-
|
45
|
+
# for columns and sequences
|
46
|
+
tasks.each do |task|
|
47
|
+
task.notes.each do |note|
|
48
|
+
warning "#{task_name(task)}: #{note}"
|
73
49
|
end
|
50
|
+
end
|
74
51
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
52
|
+
# for non-deferrable constraints
|
53
|
+
if opts[:defer_constraints]
|
54
|
+
constraints = non_deferrable_constraints(destination)
|
55
|
+
constraints = tasks.flat_map { |t| constraints[t.table] || [] }
|
56
|
+
warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any?
|
57
|
+
end
|
58
|
+
end
|
80
59
|
|
81
|
-
|
82
|
-
|
83
|
-
|
60
|
+
def columns(data_source)
|
61
|
+
query = <<~SQL
|
62
|
+
SELECT
|
63
|
+
table_schema AS schema,
|
64
|
+
table_name AS table,
|
65
|
+
column_name AS column,
|
66
|
+
data_type AS type
|
67
|
+
FROM
|
68
|
+
information_schema.columns
|
69
|
+
ORDER BY 1, 2, 3
|
70
|
+
SQL
|
71
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
72
|
+
[k, v.map { |r| {name: r["column"], type: r["type"]} }]
|
73
|
+
end.to_h
|
74
|
+
end
|
84
75
|
|
85
|
-
|
86
|
-
|
76
|
+
def non_deferrable_constraints(data_source)
|
77
|
+
query = <<~SQL
|
78
|
+
SELECT
|
79
|
+
table_schema AS schema,
|
80
|
+
table_name AS table,
|
81
|
+
constraint_name
|
82
|
+
FROM
|
83
|
+
information_schema.table_constraints
|
84
|
+
WHERE
|
85
|
+
constraint_type = 'FOREIGN KEY' AND
|
86
|
+
is_deferrable = 'NO'
|
87
|
+
SQL
|
88
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
89
|
+
[k, v.map { |r| r["constraint_name"] }]
|
90
|
+
end.to_h
|
91
|
+
end
|
87
92
|
|
88
|
-
|
89
|
-
|
93
|
+
def run_tasks(tasks, &block)
|
94
|
+
notices = []
|
95
|
+
failed_tables = []
|
90
96
|
|
91
|
-
|
92
|
-
|
97
|
+
spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
|
98
|
+
task_spinners = {}
|
99
|
+
started_at = {}
|
93
100
|
|
94
|
-
|
95
|
-
|
96
|
-
|
101
|
+
start = lambda do |task, i|
|
102
|
+
message = ":spinner #{display_item(task)}"
|
103
|
+
spinner = spinners.register(message)
|
104
|
+
if opts[:in_batches]
|
105
|
+
# log instead of spin for non-tty
|
106
|
+
log message.sub(":spinner", "⠋")
|
107
|
+
else
|
108
|
+
spinner.auto_spin
|
97
109
|
end
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
# create a temp table
|
102
|
-
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
103
|
-
destination.execute("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS TABLE #{quote_ident_full(table)} WITH NO DATA")
|
110
|
+
task_spinners[task] = spinner
|
111
|
+
started_at[task] = Time.now
|
112
|
+
end
|
104
113
|
|
105
|
-
|
106
|
-
|
114
|
+
finish = lambda do |task, i, result|
|
115
|
+
spinner = task_spinners[task]
|
116
|
+
time = (Time.now - started_at[task]).round(1)
|
107
117
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
destination.execute("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
|
114
|
-
destination.execute("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
|
118
|
+
message =
|
119
|
+
if result[:message]
|
120
|
+
"(#{result[:message].lines.first.to_s.strip})"
|
121
|
+
else
|
122
|
+
"- #{time}s"
|
115
123
|
end
|
116
|
-
end
|
117
|
-
else
|
118
|
-
# use delete instead of truncate for foreign keys
|
119
|
-
if opts[:defer_constraints]
|
120
|
-
destination.execute("DELETE FROM #{quote_ident_full(table)}")
|
121
|
-
else
|
122
|
-
destination.truncate(table)
|
123
|
-
end
|
124
|
-
copy(copy_to_command, dest_table: table, dest_fields: fields)
|
125
|
-
end
|
126
|
-
seq_values.each do |seq, value|
|
127
|
-
destination.execute("SELECT setval(#{escape(seq)}, #{escape(value)})")
|
128
|
-
end
|
129
124
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
rescue => e
|
135
|
-
message =
|
136
|
-
case e
|
137
|
-
when PG::ConnectionBad
|
138
|
-
# likely fine to show simplified message here
|
139
|
-
# the full message will be shown when first trying to connect
|
140
|
-
"Connection failed"
|
141
|
-
when PG::Error
|
142
|
-
e.message.sub("ERROR: ", "")
|
143
|
-
when Error
|
144
|
-
e.message
|
125
|
+
notices.concat(result[:notices])
|
126
|
+
|
127
|
+
if result[:status] == "success"
|
128
|
+
spinner.success(message)
|
145
129
|
else
|
146
|
-
|
130
|
+
spinner.error(message)
|
131
|
+
failed_tables << task_name(task)
|
132
|
+
fail_sync(failed_tables) if opts[:fail_fast]
|
147
133
|
end
|
148
134
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
private
|
153
|
-
|
154
|
-
def copy(source_command, dest_table:, dest_fields:)
|
155
|
-
destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN"
|
156
|
-
destination.conn.copy_data(destination_command) do
|
157
|
-
source.conn.copy_data(source_command) do
|
158
|
-
while (row = source.conn.get_copy_data)
|
159
|
-
destination.conn.put_copy_data(row)
|
160
|
-
end
|
135
|
+
unless spinner.send(:tty?)
|
136
|
+
status = result[:status] == "success" ? "✔" : "✖"
|
137
|
+
log [status, display_item(task), message].join(" ")
|
161
138
|
end
|
162
139
|
end
|
163
|
-
end
|
164
140
|
|
165
|
-
|
166
|
-
def rule_match?(table, column, rule)
|
167
|
-
regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
|
168
|
-
regex.match(column) || regex.match("#{table.split(".", 2)[-1]}.#{column}") || regex.match("#{table}.#{column}")
|
169
|
-
end
|
141
|
+
options = {start: start, finish: finish}
|
170
142
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
escape(rule["value"])
|
176
|
-
elsif rule.key?("statement")
|
177
|
-
rule["statement"]
|
178
|
-
else
|
179
|
-
raise Error, "Unknown rule #{rule.inspect} for column #{column}"
|
180
|
-
end
|
181
|
-
else
|
182
|
-
case rule
|
183
|
-
when "untouched"
|
184
|
-
quote_ident(column)
|
185
|
-
when "unique_email"
|
186
|
-
"'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
|
187
|
-
when "unique_phone"
|
188
|
-
"(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
|
189
|
-
when "unique_secret"
|
190
|
-
"'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
|
191
|
-
when "random_int", "random_number"
|
192
|
-
"(RANDOM() * 100)::int"
|
193
|
-
when "random_date"
|
194
|
-
"date '1970-01-01' + (RANDOM() * 10000)::int"
|
195
|
-
when "random_time"
|
196
|
-
"NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
|
197
|
-
when "random_ip"
|
198
|
-
"(1 + RANDOM() * 254)::int::text || '.0.0.1'"
|
199
|
-
when "random_letter"
|
200
|
-
"chr(65 + (RANDOM() * 26)::int)"
|
201
|
-
when "random_string"
|
202
|
-
"RIGHT(MD5(RANDOM()::text), 10)"
|
203
|
-
when "null", nil
|
204
|
-
"NULL"
|
205
|
-
else
|
206
|
-
raise Error, "Unknown rule #{rule} for column #{column}"
|
207
|
-
end
|
143
|
+
jobs = opts[:jobs]
|
144
|
+
if opts[:debug] || opts[:in_batches] || opts[:defer_constraints]
|
145
|
+
warning "--jobs ignored" if jobs
|
146
|
+
jobs = 0
|
208
147
|
end
|
209
|
-
end
|
210
148
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
149
|
+
if windows?
|
150
|
+
options[:in_threads] = jobs || 4
|
151
|
+
else
|
152
|
+
options[:in_processes] = jobs if jobs
|
153
|
+
end
|
215
154
|
|
216
|
-
|
217
|
-
|
218
|
-
|
155
|
+
maybe_defer_constraints do
|
156
|
+
# could try to use `raise Parallel::Kill` to fail faster with --fail-fast
|
157
|
+
# see `fast_faster` branch
|
158
|
+
# however, need to make sure connections are cleaned up properly
|
159
|
+
Parallel.each(tasks, **options) do |task|
|
160
|
+
source.reconnect_if_needed
|
161
|
+
destination.reconnect_if_needed
|
219
162
|
|
220
|
-
|
221
|
-
|
222
|
-
|
163
|
+
task.perform
|
164
|
+
end
|
165
|
+
end
|
223
166
|
|
224
|
-
|
225
|
-
|
226
|
-
"'#{quote_string(value)}'"
|
227
|
-
else
|
228
|
-
value
|
167
|
+
notices.each do |notice|
|
168
|
+
warning notice
|
229
169
|
end
|
230
|
-
end
|
231
170
|
|
232
|
-
|
233
|
-
def quote_string(s)
|
234
|
-
s.gsub(/\\/, '\&\&').gsub(/'/, "''")
|
171
|
+
fail_sync(failed_tables) if failed_tables.any?
|
235
172
|
end
|
236
173
|
|
237
|
-
def
|
238
|
-
if opts[:
|
174
|
+
def maybe_defer_constraints
|
175
|
+
if opts[:defer_constraints]
|
239
176
|
destination.transaction do
|
240
|
-
|
241
|
-
triggers.select! { |t| t["enabled"] == "t" }
|
242
|
-
internal_triggers, user_triggers = triggers.partition { |t| t["internal"] == "t" }
|
243
|
-
integrity_triggers = internal_triggers.select { |t| t["integrity"] == "t" }
|
244
|
-
restore_triggers = []
|
245
|
-
|
246
|
-
if opts[:disable_integrity]
|
247
|
-
integrity_triggers.each do |trigger|
|
248
|
-
destination.execute("ALTER TABLE #{quote_ident_full(table)} DISABLE TRIGGER #{quote_ident(trigger["name"])}")
|
249
|
-
end
|
250
|
-
restore_triggers.concat(integrity_triggers)
|
251
|
-
end
|
252
|
-
|
253
|
-
if opts[:disable_user_triggers]
|
254
|
-
# important!
|
255
|
-
# rely on Postgres to disable user triggers
|
256
|
-
# we don't want to accidentally disable non-user triggers if logic above is off
|
257
|
-
destination.execute("ALTER TABLE #{quote_ident_full(table)} DISABLE TRIGGER USER")
|
258
|
-
restore_triggers.concat(user_triggers)
|
259
|
-
end
|
260
|
-
|
261
|
-
result = yield
|
177
|
+
destination.execute("SET CONSTRAINTS ALL DEFERRED")
|
262
178
|
|
263
|
-
#
|
264
|
-
|
265
|
-
|
179
|
+
# create a transaction on the source
|
180
|
+
# to ensure we get a consistent snapshot
|
181
|
+
source.transaction do
|
182
|
+
yield
|
266
183
|
end
|
267
|
-
|
268
|
-
result
|
269
184
|
end
|
270
185
|
else
|
271
186
|
yield
|
272
187
|
end
|
273
188
|
end
|
189
|
+
|
190
|
+
def fail_sync(failed_tables)
|
191
|
+
raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}"
|
192
|
+
end
|
193
|
+
|
194
|
+
def display_item(item)
|
195
|
+
messages = []
|
196
|
+
messages << task_name(item)
|
197
|
+
messages << item.opts[:sql] if item.opts[:sql]
|
198
|
+
messages.join(" ")
|
199
|
+
end
|
200
|
+
|
201
|
+
def windows?
|
202
|
+
Gem.win_platform?
|
203
|
+
end
|
274
204
|
end
|
275
205
|
end
|