pgsync 0.5.2 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/LICENSE.txt +1 -1
- data/README.md +121 -38
- data/config.yml +4 -0
- data/exe/pgsync +0 -4
- data/lib/pgsync.rb +5 -1
- data/lib/pgsync/client.rb +54 -52
- data/lib/pgsync/data_source.rb +78 -80
- data/lib/pgsync/init.rb +48 -10
- data/lib/pgsync/schema_sync.rb +83 -0
- data/lib/pgsync/sync.rb +98 -175
- data/lib/pgsync/table.rb +28 -0
- data/lib/pgsync/table_sync.rb +167 -219
- data/lib/pgsync/task.rb +315 -0
- data/lib/pgsync/task_resolver.rb +235 -0
- data/lib/pgsync/utils.rb +64 -24
- data/lib/pgsync/version.rb +1 -1
- metadata +8 -5
- data/lib/pgsync/table_list.rb +0 -143
data/lib/pgsync/table.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# minimal class to keep schema and table name separate
|
2
|
+
module PgSync
|
3
|
+
class Table
|
4
|
+
attr_reader :schema, :name
|
5
|
+
|
6
|
+
def initialize(schema, name)
|
7
|
+
@schema = schema
|
8
|
+
@name = name
|
9
|
+
end
|
10
|
+
|
11
|
+
def full_name
|
12
|
+
"#{schema}.#{name}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def eql?(other)
|
16
|
+
other.schema == schema && other.name == name
|
17
|
+
end
|
18
|
+
|
19
|
+
# override hash when overriding eql?
|
20
|
+
def hash
|
21
|
+
[schema, name].hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
full_name
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/pgsync/table_sync.rb
CHANGED
@@ -2,256 +2,204 @@ module PgSync
|
|
2
2
|
class TableSync
|
3
3
|
include Utils
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
bad_fields = opts[:no_rules] ? [] : config["data_rules"]
|
15
|
-
|
16
|
-
from_fields = source.columns(table)
|
17
|
-
to_fields = destination.columns(table)
|
18
|
-
shared_fields = to_fields & from_fields
|
19
|
-
extra_fields = to_fields - from_fields
|
20
|
-
missing_fields = from_fields - to_fields
|
21
|
-
|
22
|
-
if opts[:no_sequences]
|
23
|
-
from_sequences = []
|
24
|
-
to_sequences = []
|
25
|
-
else
|
26
|
-
from_sequences = source.sequences(table, shared_fields)
|
27
|
-
to_sequences = destination.sequences(table, shared_fields)
|
28
|
-
end
|
5
|
+
attr_reader :source, :destination, :tasks, :opts, :resolver
|
6
|
+
|
7
|
+
def initialize(source:, destination:, tasks:, opts:, resolver:)
|
8
|
+
@source = source
|
9
|
+
@destination = destination
|
10
|
+
@tasks = tasks
|
11
|
+
@opts = opts
|
12
|
+
@resolver = resolver
|
13
|
+
end
|
29
14
|
|
30
|
-
|
31
|
-
|
32
|
-
missing_sequences = from_sequences - to_sequences
|
15
|
+
def perform
|
16
|
+
confirm_tables_exist(destination, tasks, "destination")
|
33
17
|
|
34
|
-
|
18
|
+
add_columns
|
35
19
|
|
36
|
-
|
37
|
-
sql_clause << " #{opts[:sql]}"
|
38
|
-
end
|
20
|
+
show_notes
|
39
21
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
45
|
-
|
46
|
-
if shared_fields.empty?
|
47
|
-
return {status: "success", message: "No fields to copy"}
|
48
|
-
end
|
22
|
+
# don't sync tables with no shared fields
|
23
|
+
# we show a warning message above
|
24
|
+
run_tasks(tasks.reject { |task| task.shared_fields.empty? })
|
25
|
+
end
|
49
26
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
27
|
+
# TODO only query specific tables
|
28
|
+
# TODO add sequences, primary keys, etc
|
29
|
+
def add_columns
|
30
|
+
source_columns = columns(source)
|
31
|
+
destination_columns = columns(destination)
|
54
32
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
33
|
+
tasks.each do |task|
|
34
|
+
task.from_columns = source_columns[task.table] || []
|
35
|
+
task.to_columns = destination_columns[task.table] || []
|
36
|
+
end
|
37
|
+
end
|
59
38
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
destination.truncate(table) if opts[:truncate]
|
66
|
-
|
67
|
-
from_max_id = source.max_id(table, primary_key)
|
68
|
-
to_max_id = destination.max_id(table, primary_key) + 1
|
69
|
-
|
70
|
-
if to_max_id == 1
|
71
|
-
from_min_id = source.min_id(table, primary_key)
|
72
|
-
to_max_id = from_min_id if from_min_id > 0
|
73
|
-
end
|
74
|
-
|
75
|
-
starting_id = to_max_id
|
76
|
-
batch_size = opts[:batch_size]
|
77
|
-
|
78
|
-
i = 1
|
79
|
-
batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
|
80
|
-
|
81
|
-
while starting_id <= from_max_id
|
82
|
-
where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
|
83
|
-
log " #{i}/#{batch_count}: #{where}"
|
84
|
-
|
85
|
-
# TODO be smarter for advance sql clauses
|
86
|
-
batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
|
87
|
-
|
88
|
-
batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{batch_sql_clause}) TO STDOUT"
|
89
|
-
to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
|
90
|
-
from_connection.copy_data batch_copy_to_command do
|
91
|
-
while (row = from_connection.get_copy_data)
|
92
|
-
to_connection.put_copy_data(row)
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
starting_id += batch_size
|
98
|
-
i += 1
|
99
|
-
|
100
|
-
if opts[:sleep] && starting_id <= from_max_id
|
101
|
-
sleep(opts[:sleep])
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
log # add extra line for spinner
|
106
|
-
elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
|
107
|
-
raise Error, "No primary key" unless primary_key
|
108
|
-
|
109
|
-
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
110
|
-
file = Tempfile.new(temp_table)
|
111
|
-
begin
|
112
|
-
from_connection.copy_data copy_to_command do
|
113
|
-
while (row = from_connection.get_copy_data)
|
114
|
-
file.write(row)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
file.rewind
|
118
|
-
|
119
|
-
# create a temp table
|
120
|
-
to_connection.exec("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS SELECT * FROM #{quote_ident_full(table)} WITH NO DATA")
|
121
|
-
|
122
|
-
# load file
|
123
|
-
to_connection.copy_data "COPY #{quote_ident_full(temp_table)} (#{fields}) FROM STDIN" do
|
124
|
-
file.each do |row|
|
125
|
-
to_connection.put_copy_data(row)
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
if opts[:preserve]
|
130
|
-
# insert into
|
131
|
-
to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident_full(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident_full(table)} WHERE #{quote_ident_full(table)}.#{quote_ident(primary_key)} = #{quote_ident_full(temp_table)}.#{quote_ident(primary_key)}))")
|
132
|
-
else
|
133
|
-
to_connection.transaction do
|
134
|
-
to_connection.exec("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
|
135
|
-
to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
|
136
|
-
end
|
137
|
-
end
|
138
|
-
ensure
|
139
|
-
file.close
|
140
|
-
file.unlink
|
141
|
-
end
|
142
|
-
else
|
143
|
-
destination.truncate(table)
|
144
|
-
to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
|
145
|
-
from_connection.copy_data copy_to_command do
|
146
|
-
while (row = from_connection.get_copy_data)
|
147
|
-
to_connection.put_copy_data(row)
|
148
|
-
end
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
152
|
-
seq_values.each do |seq, value|
|
153
|
-
to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
|
154
|
-
end
|
155
|
-
end
|
39
|
+
def show_notes
|
40
|
+
# for tables
|
41
|
+
resolver.notes.each do |note|
|
42
|
+
warning note
|
43
|
+
end
|
156
44
|
|
157
|
-
|
158
|
-
|
159
|
-
|
45
|
+
# for columns and sequences
|
46
|
+
tasks.each do |task|
|
47
|
+
task.notes.each do |note|
|
48
|
+
warning "#{task_name(task)}: #{note}"
|
160
49
|
end
|
50
|
+
end
|
161
51
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
52
|
+
# for non-deferrable constraints
|
53
|
+
if opts[:defer_constraints]
|
54
|
+
constraints = non_deferrable_constraints(destination)
|
55
|
+
constraints = tasks.flat_map { |t| constraints[t.table] || [] }
|
56
|
+
warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any?
|
166
57
|
end
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
58
|
+
end
|
59
|
+
|
60
|
+
def columns(data_source)
|
61
|
+
query = <<~SQL
|
62
|
+
SELECT
|
63
|
+
table_schema AS schema,
|
64
|
+
table_name AS table,
|
65
|
+
column_name AS column,
|
66
|
+
data_type AS type
|
67
|
+
FROM
|
68
|
+
information_schema.columns
|
69
|
+
ORDER BY 1, 2, 3
|
70
|
+
SQL
|
71
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
72
|
+
[k, v.map { |r| {name: r["column"], type: r["type"]} }]
|
73
|
+
end.to_h
|
74
|
+
end
|
75
|
+
|
76
|
+
def non_deferrable_constraints(data_source)
|
77
|
+
query = <<~SQL
|
78
|
+
SELECT
|
79
|
+
table_schema AS schema,
|
80
|
+
table_name AS table,
|
81
|
+
constraint_name
|
82
|
+
FROM
|
83
|
+
information_schema.table_constraints
|
84
|
+
WHERE
|
85
|
+
constraint_type = 'FOREIGN KEY' AND
|
86
|
+
is_deferrable = 'NO'
|
87
|
+
SQL
|
88
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
89
|
+
[k, v.map { |r| r["constraint_name"] }]
|
90
|
+
end.to_h
|
91
|
+
end
|
92
|
+
|
93
|
+
def run_tasks(tasks, &block)
|
94
|
+
notices = []
|
95
|
+
failed_tables = []
|
96
|
+
|
97
|
+
spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
|
98
|
+
task_spinners = {}
|
99
|
+
started_at = {}
|
100
|
+
|
101
|
+
start = lambda do |task, i|
|
102
|
+
message = ":spinner #{display_item(task)}"
|
103
|
+
spinner = spinners.register(message)
|
104
|
+
if opts[:in_batches]
|
105
|
+
# log instead of spin for non-tty
|
106
|
+
log message.sub(":spinner", "⠋")
|
176
107
|
else
|
177
|
-
|
108
|
+
spinner.auto_spin
|
178
109
|
end
|
110
|
+
task_spinners[task] = spinner
|
111
|
+
started_at[task] = Time.now
|
112
|
+
end
|
179
113
|
|
180
|
-
|
181
|
-
|
114
|
+
finish = lambda do |task, i, result|
|
115
|
+
spinner = task_spinners[task]
|
116
|
+
time = (Time.now - started_at[task]).round(1)
|
182
117
|
|
183
|
-
|
118
|
+
message =
|
119
|
+
if result[:message]
|
120
|
+
"(#{result[:message].lines.first.to_s.strip})"
|
121
|
+
else
|
122
|
+
"- #{time}s"
|
123
|
+
end
|
184
124
|
|
185
|
-
|
186
|
-
def rule_match?(table, column, rule)
|
187
|
-
regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
|
188
|
-
regex.match(column) || regex.match("#{table.split(".", 2)[-1]}.#{column}") || regex.match("#{table}.#{column}")
|
189
|
-
end
|
125
|
+
notices.concat(result[:notices])
|
190
126
|
|
191
|
-
|
192
|
-
|
193
|
-
if rule.is_a?(Hash)
|
194
|
-
if rule.key?("value")
|
195
|
-
escape(rule["value"])
|
196
|
-
elsif rule.key?("statement")
|
197
|
-
rule["statement"]
|
127
|
+
if result[:status] == "success"
|
128
|
+
spinner.success(message)
|
198
129
|
else
|
199
|
-
|
130
|
+
spinner.error(message)
|
131
|
+
failed_tables << task_name(task)
|
132
|
+
fail_sync(failed_tables) if opts[:fail_fast]
|
200
133
|
end
|
134
|
+
|
135
|
+
unless spinner.send(:tty?)
|
136
|
+
status = result[:status] == "success" ? "✔" : "✖"
|
137
|
+
log [status, display_item(task), message].join(" ")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
options = {start: start, finish: finish}
|
142
|
+
|
143
|
+
jobs = opts[:jobs]
|
144
|
+
if opts[:debug] || opts[:in_batches] || opts[:defer_constraints]
|
145
|
+
warning "--jobs ignored" if jobs
|
146
|
+
jobs = 0
|
147
|
+
end
|
148
|
+
|
149
|
+
if windows?
|
150
|
+
options[:in_threads] = jobs || 4
|
201
151
|
else
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
"date '1970-01-01' + (RANDOM() * 10000)::int"
|
215
|
-
when "random_time"
|
216
|
-
"NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
|
217
|
-
when "random_ip"
|
218
|
-
"(1 + RANDOM() * 254)::int::text || '.0.0.1'"
|
219
|
-
when "random_letter"
|
220
|
-
"chr(65 + (RANDOM() * 26)::int)"
|
221
|
-
when "random_string"
|
222
|
-
"RIGHT(MD5(RANDOM()::text), 10)"
|
223
|
-
when "null", nil
|
224
|
-
"NULL"
|
225
|
-
else
|
226
|
-
raise Error, "Unknown rule #{rule} for column #{column}"
|
152
|
+
options[:in_processes] = jobs if jobs
|
153
|
+
end
|
154
|
+
|
155
|
+
maybe_defer_constraints do
|
156
|
+
# could try to use `raise Parallel::Kill` to fail faster with --fail-fast
|
157
|
+
# see `fast_faster` branch
|
158
|
+
# however, need to make sure connections are cleaned up properly
|
159
|
+
Parallel.each(tasks, **options) do |task|
|
160
|
+
source.reconnect_if_needed
|
161
|
+
destination.reconnect_if_needed
|
162
|
+
|
163
|
+
task.perform
|
227
164
|
end
|
228
165
|
end
|
229
|
-
end
|
230
166
|
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
end
|
167
|
+
notices.each do |notice|
|
168
|
+
warning notice
|
169
|
+
end
|
235
170
|
|
236
|
-
|
237
|
-
ident.split(".").map { |v| quote_ident(v) }.join(".")
|
171
|
+
fail_sync(failed_tables) if failed_tables.any?
|
238
172
|
end
|
239
173
|
|
240
|
-
def
|
241
|
-
|
242
|
-
|
174
|
+
def maybe_defer_constraints
|
175
|
+
if opts[:defer_constraints]
|
176
|
+
destination.transaction do
|
177
|
+
destination.execute("SET CONSTRAINTS ALL DEFERRED")
|
243
178
|
|
244
|
-
|
245
|
-
|
246
|
-
|
179
|
+
# create a transaction on the source
|
180
|
+
# to ensure we get a consistent snapshot
|
181
|
+
source.transaction do
|
182
|
+
yield
|
183
|
+
end
|
184
|
+
end
|
247
185
|
else
|
248
|
-
|
186
|
+
yield
|
249
187
|
end
|
250
188
|
end
|
251
189
|
|
252
|
-
|
253
|
-
|
254
|
-
|
190
|
+
def fail_sync(failed_tables)
|
191
|
+
raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}"
|
192
|
+
end
|
193
|
+
|
194
|
+
def display_item(item)
|
195
|
+
messages = []
|
196
|
+
messages << task_name(item)
|
197
|
+
messages << item.opts[:sql] if item.opts[:sql]
|
198
|
+
messages.join(" ")
|
199
|
+
end
|
200
|
+
|
201
|
+
def windows?
|
202
|
+
Gem.win_platform?
|
255
203
|
end
|
256
204
|
end
|
257
205
|
end
|