pgsync 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -0
- data/LICENSE.txt +1 -1
- data/README.md +84 -41
- data/config.yml +5 -4
- data/exe/pgsync +3 -11
- data/lib/pgsync.rb +8 -5
- data/lib/pgsync/client.rb +60 -332
- data/lib/pgsync/data_source.rb +78 -77
- data/lib/pgsync/init.rb +61 -0
- data/lib/pgsync/schema_sync.rb +83 -0
- data/lib/pgsync/sync.rb +162 -0
- data/lib/pgsync/table.rb +28 -0
- data/lib/pgsync/table_sync.rb +168 -208
- data/lib/pgsync/task.rb +315 -0
- data/lib/pgsync/task_resolver.rb +235 -0
- data/lib/pgsync/utils.rb +86 -0
- data/lib/pgsync/version.rb +1 -1
- metadata +11 -5
- data/lib/pgsync/table_list.rb +0 -107
data/lib/pgsync/table.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# minimal class to keep schema and table name separate
|
2
|
+
module PgSync
|
3
|
+
class Table
|
4
|
+
attr_reader :schema, :name
|
5
|
+
|
6
|
+
def initialize(schema, name)
|
7
|
+
@schema = schema
|
8
|
+
@name = name
|
9
|
+
end
|
10
|
+
|
11
|
+
def full_name
|
12
|
+
"#{schema}.#{name}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def eql?(other)
|
16
|
+
other.schema == schema && other.name == name
|
17
|
+
end
|
18
|
+
|
19
|
+
# override hash when overriding eql?
|
20
|
+
def hash
|
21
|
+
[schema, name].hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
full_name
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/pgsync/table_sync.rb
CHANGED
@@ -1,245 +1,205 @@
|
|
1
1
|
module PgSync
|
2
2
|
class TableSync
|
3
|
-
|
4
|
-
start_time = Time.now
|
5
|
-
source = DataSource.new(source_url, timeout: 0)
|
6
|
-
destination = DataSource.new(destination_url, timeout: 0)
|
7
|
-
|
8
|
-
begin
|
9
|
-
from_connection = source.conn
|
10
|
-
to_connection = destination.conn
|
11
|
-
|
12
|
-
bad_fields = opts[:no_rules] ? [] : config["data_rules"]
|
13
|
-
|
14
|
-
from_fields = source.columns(table)
|
15
|
-
to_fields = destination.columns(table)
|
16
|
-
shared_fields = to_fields & from_fields
|
17
|
-
extra_fields = to_fields - from_fields
|
18
|
-
missing_fields = from_fields - to_fields
|
19
|
-
|
20
|
-
if opts[:no_sequences]
|
21
|
-
from_sequences = []
|
22
|
-
to_sequences = []
|
23
|
-
else
|
24
|
-
from_sequences = source.sequences(table, shared_fields)
|
25
|
-
to_sequences = destination.sequences(table, shared_fields)
|
26
|
-
end
|
3
|
+
include Utils
|
27
4
|
|
28
|
-
|
29
|
-
extra_sequences = to_sequences - from_sequences
|
30
|
-
missing_sequences = from_sequences - to_sequences
|
5
|
+
attr_reader :source, :destination, :tasks, :opts, :resolver
|
31
6
|
|
32
|
-
|
7
|
+
def initialize(source:, destination:, tasks:, opts:, resolver:)
|
8
|
+
@source = source
|
9
|
+
@destination = destination
|
10
|
+
@tasks = tasks
|
11
|
+
@opts = opts
|
12
|
+
@resolver = resolver
|
13
|
+
end
|
33
14
|
|
34
|
-
|
35
|
-
|
36
|
-
end
|
15
|
+
def perform
|
16
|
+
confirm_tables_exist(destination, tasks, "destination")
|
37
17
|
|
38
|
-
|
39
|
-
notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
|
40
|
-
notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
|
41
|
-
notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
|
42
|
-
notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
18
|
+
add_columns
|
43
19
|
|
44
|
-
|
45
|
-
return {status: "success", message: "No fields to copy"}
|
46
|
-
end
|
20
|
+
show_notes
|
47
21
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
22
|
+
# don't sync tables with no shared fields
|
23
|
+
# we show a warning message above
|
24
|
+
run_tasks(tasks.reject { |task| task.shared_fields.empty? })
|
25
|
+
end
|
52
26
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
27
|
+
# TODO only query specific tables
|
28
|
+
# TODO add sequences, primary keys, etc
|
29
|
+
def add_columns
|
30
|
+
source_columns = columns(source)
|
31
|
+
destination_columns = columns(destination)
|
57
32
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
destination.truncate(table) if opts[:truncate]
|
64
|
-
|
65
|
-
from_max_id = source.max_id(table, primary_key)
|
66
|
-
to_max_id = destination.max_id(table, primary_key) + 1
|
67
|
-
|
68
|
-
if to_max_id == 1
|
69
|
-
from_min_id = source.min_id(table, primary_key)
|
70
|
-
to_max_id = from_min_id if from_min_id > 0
|
71
|
-
end
|
72
|
-
|
73
|
-
starting_id = to_max_id
|
74
|
-
batch_size = opts[:batch_size]
|
75
|
-
|
76
|
-
i = 1
|
77
|
-
batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
|
78
|
-
|
79
|
-
while starting_id <= from_max_id
|
80
|
-
where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
|
81
|
-
log " #{i}/#{batch_count}: #{where}"
|
82
|
-
|
83
|
-
# TODO be smarter for advance sql clauses
|
84
|
-
batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
|
85
|
-
|
86
|
-
batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{batch_sql_clause}) TO STDOUT"
|
87
|
-
to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
|
88
|
-
from_connection.copy_data batch_copy_to_command do
|
89
|
-
while (row = from_connection.get_copy_data)
|
90
|
-
to_connection.put_copy_data(row)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
starting_id += batch_size
|
96
|
-
i += 1
|
97
|
-
|
98
|
-
if opts[:sleep] && starting_id <= from_max_id
|
99
|
-
sleep(opts[:sleep])
|
100
|
-
end
|
101
|
-
end
|
102
|
-
elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
|
103
|
-
raise PgSync::Error, "No primary key" unless primary_key
|
104
|
-
|
105
|
-
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
106
|
-
file = Tempfile.new(temp_table)
|
107
|
-
begin
|
108
|
-
from_connection.copy_data copy_to_command do
|
109
|
-
while (row = from_connection.get_copy_data)
|
110
|
-
file.write(row)
|
111
|
-
end
|
112
|
-
end
|
113
|
-
file.rewind
|
114
|
-
|
115
|
-
# create a temp table
|
116
|
-
to_connection.exec("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS SELECT * FROM #{quote_ident_full(table)} WITH NO DATA")
|
117
|
-
|
118
|
-
# load file
|
119
|
-
to_connection.copy_data "COPY #{quote_ident_full(temp_table)} (#{fields}) FROM STDIN" do
|
120
|
-
file.each do |row|
|
121
|
-
to_connection.put_copy_data(row)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
if opts[:preserve]
|
126
|
-
# insert into
|
127
|
-
to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident_full(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident_full(table)} WHERE #{quote_ident_full(table)}.#{quote_ident(primary_key)} = #{quote_ident_full(temp_table)}.#{quote_ident(primary_key)}))")
|
128
|
-
else
|
129
|
-
to_connection.transaction do
|
130
|
-
to_connection.exec("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
|
131
|
-
to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
|
132
|
-
end
|
133
|
-
end
|
134
|
-
ensure
|
135
|
-
file.close
|
136
|
-
file.unlink
|
137
|
-
end
|
138
|
-
else
|
139
|
-
destination.truncate(table)
|
140
|
-
to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
|
141
|
-
from_connection.copy_data copy_to_command do
|
142
|
-
while (row = from_connection.get_copy_data)
|
143
|
-
to_connection.put_copy_data(row)
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
end
|
148
|
-
seq_values.each do |seq, value|
|
149
|
-
to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
|
150
|
-
end
|
151
|
-
end
|
33
|
+
tasks.each do |task|
|
34
|
+
task.from_columns = source_columns[task.table] || []
|
35
|
+
task.to_columns = destination_columns[task.table] || []
|
36
|
+
end
|
37
|
+
end
|
152
38
|
|
153
|
-
|
154
|
-
|
155
|
-
|
39
|
+
def show_notes
|
40
|
+
# for tables
|
41
|
+
resolver.notes.each do |note|
|
42
|
+
warning note
|
43
|
+
end
|
44
|
+
|
45
|
+
# for columns and sequences
|
46
|
+
tasks.each do |task|
|
47
|
+
task.notes.each do |note|
|
48
|
+
warning "#{task_name(task)}: #{note}"
|
156
49
|
end
|
50
|
+
end
|
157
51
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
52
|
+
# for non-deferrable constraints
|
53
|
+
if opts[:defer_constraints]
|
54
|
+
constraints = non_deferrable_constraints(destination)
|
55
|
+
constraints = tasks.flat_map { |t| constraints[t.table] || [] }
|
56
|
+
warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any?
|
162
57
|
end
|
163
|
-
rescue PgSync::Error => e
|
164
|
-
{status: "error", message: e.message}
|
165
58
|
end
|
166
59
|
|
167
|
-
|
60
|
+
def columns(data_source)
|
61
|
+
query = <<~SQL
|
62
|
+
SELECT
|
63
|
+
table_schema AS schema,
|
64
|
+
table_name AS table,
|
65
|
+
column_name AS column,
|
66
|
+
data_type AS type
|
67
|
+
FROM
|
68
|
+
information_schema.columns
|
69
|
+
ORDER BY 1, 2, 3
|
70
|
+
SQL
|
71
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
72
|
+
[k, v.map { |r| {name: r["column"], type: r["type"]} }]
|
73
|
+
end.to_h
|
74
|
+
end
|
168
75
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
76
|
+
def non_deferrable_constraints(data_source)
|
77
|
+
query = <<~SQL
|
78
|
+
SELECT
|
79
|
+
table_schema AS schema,
|
80
|
+
table_name AS table,
|
81
|
+
constraint_name
|
82
|
+
FROM
|
83
|
+
information_schema.table_constraints
|
84
|
+
WHERE
|
85
|
+
constraint_type = 'FOREIGN KEY' AND
|
86
|
+
is_deferrable = 'NO'
|
87
|
+
SQL
|
88
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
89
|
+
[k, v.map { |r| r["constraint_name"] }]
|
90
|
+
end.to_h
|
173
91
|
end
|
174
92
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
93
|
+
def run_tasks(tasks, &block)
|
94
|
+
notices = []
|
95
|
+
failed_tables = []
|
96
|
+
|
97
|
+
spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
|
98
|
+
task_spinners = {}
|
99
|
+
started_at = {}
|
100
|
+
|
101
|
+
start = lambda do |task, i|
|
102
|
+
message = ":spinner #{display_item(task)}"
|
103
|
+
spinner = spinners.register(message)
|
104
|
+
if opts[:in_batches]
|
105
|
+
# log instead of spin for non-tty
|
106
|
+
log message.sub(":spinner", "⠋")
|
182
107
|
else
|
183
|
-
|
108
|
+
spinner.auto_spin
|
184
109
|
end
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
"chr(65 + (RANDOM() * 26)::int)"
|
205
|
-
when "random_string"
|
206
|
-
"RIGHT(MD5(RANDOM()::text), 10)"
|
207
|
-
when "null", nil
|
208
|
-
"NULL"
|
110
|
+
task_spinners[task] = spinner
|
111
|
+
started_at[task] = Time.now
|
112
|
+
end
|
113
|
+
|
114
|
+
finish = lambda do |task, i, result|
|
115
|
+
spinner = task_spinners[task]
|
116
|
+
time = (Time.now - started_at[task]).round(1)
|
117
|
+
|
118
|
+
message =
|
119
|
+
if result[:message]
|
120
|
+
"(#{result[:message].lines.first.to_s.strip})"
|
121
|
+
else
|
122
|
+
"- #{time}s"
|
123
|
+
end
|
124
|
+
|
125
|
+
notices.concat(result[:notices])
|
126
|
+
|
127
|
+
if result[:status] == "success"
|
128
|
+
spinner.success(message)
|
209
129
|
else
|
210
|
-
|
130
|
+
spinner.error(message)
|
131
|
+
failed_tables << task_name(task)
|
132
|
+
fail_sync(failed_tables) if opts[:fail_fast]
|
133
|
+
end
|
134
|
+
|
135
|
+
unless spinner.send(:tty?)
|
136
|
+
status = result[:status] == "success" ? "✔" : "✖"
|
137
|
+
log [status, display_item(task), message].join(" ")
|
211
138
|
end
|
212
139
|
end
|
213
|
-
end
|
214
140
|
|
215
|
-
|
216
|
-
raise "Primary key required for this data rule: #{rule}" unless primary_key
|
217
|
-
"#{quote_ident_full(table)}.#{quote_ident(primary_key)}"
|
218
|
-
end
|
141
|
+
options = {start: start, finish: finish}
|
219
142
|
|
220
|
-
|
221
|
-
|
222
|
-
|
143
|
+
jobs = opts[:jobs]
|
144
|
+
if opts[:debug] || opts[:in_batches] || opts[:defer_constraints]
|
145
|
+
warning "--jobs ignored" if jobs
|
146
|
+
jobs = 0
|
147
|
+
end
|
223
148
|
|
224
|
-
|
225
|
-
|
226
|
-
|
149
|
+
if windows?
|
150
|
+
options[:in_threads] = jobs || 4
|
151
|
+
else
|
152
|
+
options[:in_processes] = jobs if jobs
|
153
|
+
end
|
154
|
+
|
155
|
+
maybe_defer_constraints do
|
156
|
+
# could try to use `raise Parallel::Kill` to fail faster with --fail-fast
|
157
|
+
# see `fast_faster` branch
|
158
|
+
# however, need to make sure connections are cleaned up properly
|
159
|
+
Parallel.each(tasks, **options) do |task|
|
160
|
+
source.reconnect_if_needed
|
161
|
+
destination.reconnect_if_needed
|
227
162
|
|
228
|
-
|
229
|
-
|
163
|
+
task.perform
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
notices.each do |notice|
|
168
|
+
warning notice
|
169
|
+
end
|
170
|
+
|
171
|
+
fail_sync(failed_tables) if failed_tables.any?
|
230
172
|
end
|
231
173
|
|
232
|
-
def
|
233
|
-
if
|
234
|
-
|
174
|
+
def maybe_defer_constraints
|
175
|
+
if opts[:defer_constraints]
|
176
|
+
destination.transaction do
|
177
|
+
destination.execute("SET CONSTRAINTS ALL DEFERRED")
|
178
|
+
|
179
|
+
# create a transaction on the source
|
180
|
+
# to ensure we get a consistent snapshot
|
181
|
+
source.transaction do
|
182
|
+
yield
|
183
|
+
end
|
184
|
+
end
|
235
185
|
else
|
236
|
-
|
186
|
+
yield
|
237
187
|
end
|
238
188
|
end
|
239
189
|
|
240
|
-
|
241
|
-
|
242
|
-
|
190
|
+
def fail_sync(failed_tables)
|
191
|
+
raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}"
|
192
|
+
end
|
193
|
+
|
194
|
+
def display_item(item)
|
195
|
+
messages = []
|
196
|
+
messages << task_name(item)
|
197
|
+
messages << item.opts[:sql] if item.opts[:sql]
|
198
|
+
messages.join(" ")
|
199
|
+
end
|
200
|
+
|
201
|
+
def windows?
|
202
|
+
Gem.win_platform?
|
243
203
|
end
|
244
204
|
end
|
245
205
|
end
|
data/lib/pgsync/task.rb
ADDED
@@ -0,0 +1,315 @@
|
|
1
|
+
module PgSync
|
2
|
+
class Task
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
attr_reader :source, :destination, :config, :table, :opts
|
6
|
+
attr_accessor :from_columns, :to_columns
|
7
|
+
|
8
|
+
def initialize(source:, destination:, config:, table:, opts:)
|
9
|
+
@source = source
|
10
|
+
@destination = destination
|
11
|
+
@config = config
|
12
|
+
@table = table
|
13
|
+
@opts = opts
|
14
|
+
end
|
15
|
+
|
16
|
+
def quoted_table
|
17
|
+
quote_ident_full(table)
|
18
|
+
end
|
19
|
+
|
20
|
+
def perform
|
21
|
+
with_notices do
|
22
|
+
handle_errors do
|
23
|
+
maybe_disable_triggers do
|
24
|
+
sync_data
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def from_fields
|
31
|
+
@from_fields ||= from_columns.map { |c| c[:name] }
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_fields
|
35
|
+
@to_fields ||= to_columns.map { |c| c[:name] }
|
36
|
+
end
|
37
|
+
|
38
|
+
def shared_fields
|
39
|
+
@shared_fields ||= to_fields & from_fields
|
40
|
+
end
|
41
|
+
|
42
|
+
def from_sequences
|
43
|
+
@from_sequences ||= opts[:no_sequences] ? [] : source.sequences(table, shared_fields)
|
44
|
+
end
|
45
|
+
|
46
|
+
def to_sequences
|
47
|
+
@to_sequences ||= opts[:no_sequences] ? [] : destination.sequences(table, shared_fields)
|
48
|
+
end
|
49
|
+
|
50
|
+
def shared_sequences
|
51
|
+
@shared_sequences ||= to_sequences & from_sequences
|
52
|
+
end
|
53
|
+
|
54
|
+
def notes
|
55
|
+
notes = []
|
56
|
+
if shared_fields.empty?
|
57
|
+
notes << "No fields to copy"
|
58
|
+
else
|
59
|
+
extra_fields = to_fields - from_fields
|
60
|
+
notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
|
61
|
+
|
62
|
+
missing_fields = from_fields - to_fields
|
63
|
+
notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
|
64
|
+
|
65
|
+
extra_sequences = to_sequences - from_sequences
|
66
|
+
notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
|
67
|
+
|
68
|
+
missing_sequences = from_sequences - to_sequences
|
69
|
+
notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
70
|
+
|
71
|
+
from_types = from_columns.map { |c| [c[:name], c[:type]] }.to_h
|
72
|
+
to_types = to_columns.map { |c| [c[:name], c[:type]] }.to_h
|
73
|
+
different_types = []
|
74
|
+
shared_fields.each do |field|
|
75
|
+
if from_types[field] != to_types[field]
|
76
|
+
different_types << "#{field} (#{from_types[field]} -> #{to_types[field]})"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
notes << "Different column types: #{different_types.join(", ")}" if different_types.any?
|
80
|
+
end
|
81
|
+
notes
|
82
|
+
end
|
83
|
+
|
84
|
+
def sync_data
|
85
|
+
raise Error, "This should never happen. Please file a bug." if shared_fields.empty?
|
86
|
+
|
87
|
+
sql_clause = String.new("")
|
88
|
+
sql_clause << " #{opts[:sql]}" if opts[:sql]
|
89
|
+
|
90
|
+
bad_fields = opts[:no_rules] ? [] : config["data_rules"]
|
91
|
+
primary_key = destination.primary_key(table)
|
92
|
+
copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quoted_table}.#{quote_ident(f)}" }.join(", ")
|
93
|
+
fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
|
94
|
+
|
95
|
+
seq_values = {}
|
96
|
+
shared_sequences.each do |seq|
|
97
|
+
seq_values[seq] = source.last_value(seq)
|
98
|
+
end
|
99
|
+
|
100
|
+
copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{sql_clause}) TO STDOUT"
|
101
|
+
if opts[:in_batches]
|
102
|
+
raise Error, "No primary key" if primary_key.empty?
|
103
|
+
primary_key = primary_key.first
|
104
|
+
|
105
|
+
destination.truncate(table) if opts[:truncate]
|
106
|
+
|
107
|
+
from_max_id = source.max_id(table, primary_key)
|
108
|
+
to_max_id = destination.max_id(table, primary_key) + 1
|
109
|
+
|
110
|
+
if to_max_id == 1
|
111
|
+
from_min_id = source.min_id(table, primary_key)
|
112
|
+
to_max_id = from_min_id if from_min_id > 0
|
113
|
+
end
|
114
|
+
|
115
|
+
starting_id = to_max_id
|
116
|
+
batch_size = opts[:batch_size]
|
117
|
+
|
118
|
+
i = 1
|
119
|
+
batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
|
120
|
+
|
121
|
+
while starting_id <= from_max_id
|
122
|
+
where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
|
123
|
+
log " #{i}/#{batch_count}: #{where}"
|
124
|
+
|
125
|
+
# TODO be smarter for advance sql clauses
|
126
|
+
batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
|
127
|
+
|
128
|
+
batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{batch_sql_clause}) TO STDOUT"
|
129
|
+
copy(batch_copy_to_command, dest_table: table, dest_fields: fields)
|
130
|
+
|
131
|
+
starting_id += batch_size
|
132
|
+
i += 1
|
133
|
+
|
134
|
+
if opts[:sleep] && starting_id <= from_max_id
|
135
|
+
sleep(opts[:sleep])
|
136
|
+
end
|
137
|
+
end
|
138
|
+
elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
|
139
|
+
raise Error, "No primary key" if primary_key.empty?
|
140
|
+
|
141
|
+
# create a temp table
|
142
|
+
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
143
|
+
destination.execute("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS TABLE #{quoted_table} WITH NO DATA")
|
144
|
+
|
145
|
+
# load data
|
146
|
+
copy(copy_to_command, dest_table: temp_table, dest_fields: fields)
|
147
|
+
|
148
|
+
on_conflict = primary_key.map { |pk| quote_ident(pk) }.join(", ")
|
149
|
+
action =
|
150
|
+
if opts[:preserve]
|
151
|
+
"NOTHING"
|
152
|
+
else # overwrite or sql clause
|
153
|
+
setter = shared_fields.reject { |f| primary_key.include?(f) }.map { |f| "#{quote_ident(f)} = EXCLUDED.#{quote_ident(f)}" }
|
154
|
+
"UPDATE SET #{setter.join(", ")}"
|
155
|
+
end
|
156
|
+
destination.execute("INSERT INTO #{quoted_table} (SELECT * FROM #{quote_ident_full(temp_table)}) ON CONFLICT (#{on_conflict}) DO #{action}")
|
157
|
+
else
|
158
|
+
# use delete instead of truncate for foreign keys
|
159
|
+
if opts[:defer_constraints]
|
160
|
+
destination.execute("DELETE FROM #{quoted_table}")
|
161
|
+
else
|
162
|
+
destination.truncate(table)
|
163
|
+
end
|
164
|
+
copy(copy_to_command, dest_table: table, dest_fields: fields)
|
165
|
+
end
|
166
|
+
seq_values.each do |seq, value|
|
167
|
+
destination.execute("SELECT setval(#{escape(seq)}, #{escape(value)})")
|
168
|
+
end
|
169
|
+
|
170
|
+
{status: "success"}
|
171
|
+
end
|
172
|
+
|
173
|
+
private
|
174
|
+
|
175
|
+
def with_notices
|
176
|
+
notices = []
|
177
|
+
[source, destination].each do |data_source|
|
178
|
+
data_source.send(:conn).set_notice_processor do |message|
|
179
|
+
notices << message.strip
|
180
|
+
end
|
181
|
+
end
|
182
|
+
result = yield
|
183
|
+
result[:notices] = notices if result
|
184
|
+
result
|
185
|
+
ensure
|
186
|
+
# clear notice processor
|
187
|
+
[source, destination].each do |data_source|
|
188
|
+
data_source.send(:conn).set_notice_processor
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# TODO add retries
|
193
|
+
def handle_errors
|
194
|
+
yield
|
195
|
+
rescue => e
|
196
|
+
raise e if opts[:debug]
|
197
|
+
|
198
|
+
message =
|
199
|
+
case e
|
200
|
+
when PG::ConnectionBad
|
201
|
+
# likely fine to show simplified message here
|
202
|
+
# the full message will be shown when first trying to connect
|
203
|
+
"Connection failed"
|
204
|
+
when PG::Error
|
205
|
+
e.message.sub("ERROR: ", "")
|
206
|
+
when Error
|
207
|
+
e.message
|
208
|
+
else
|
209
|
+
"#{e.class.name}: #{e.message}"
|
210
|
+
end
|
211
|
+
|
212
|
+
{status: "error", message: message}
|
213
|
+
end
|
214
|
+
|
215
|
+
def copy(source_command, dest_table:, dest_fields:)
|
216
|
+
destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN"
|
217
|
+
destination.conn.copy_data(destination_command) do
|
218
|
+
source.conn.copy_data(source_command) do
|
219
|
+
while (row = source.conn.get_copy_data)
|
220
|
+
destination.conn.put_copy_data(row)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# TODO better performance
|
227
|
+
def rule_match?(table, column, rule)
|
228
|
+
regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
|
229
|
+
regex.match(column) || regex.match("#{table.name}.#{column}") || regex.match("#{table.schema}.#{table.name}.#{column}")
|
230
|
+
end
|
231
|
+
|
232
|
+
# TODO wildcard rules
|
233
|
+
def apply_strategy(rule, table, column, primary_key)
|
234
|
+
if rule.is_a?(Hash)
|
235
|
+
if rule.key?("value")
|
236
|
+
escape(rule["value"])
|
237
|
+
elsif rule.key?("statement")
|
238
|
+
rule["statement"]
|
239
|
+
else
|
240
|
+
raise Error, "Unknown rule #{rule.inspect} for column #{column}"
|
241
|
+
end
|
242
|
+
else
|
243
|
+
case rule
|
244
|
+
when "untouched"
|
245
|
+
quote_ident(column)
|
246
|
+
when "unique_email"
|
247
|
+
"'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
|
248
|
+
when "unique_phone"
|
249
|
+
"(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
|
250
|
+
when "unique_secret"
|
251
|
+
"'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
|
252
|
+
when "random_int", "random_number"
|
253
|
+
"(RANDOM() * 100)::int"
|
254
|
+
when "random_date"
|
255
|
+
"date '1970-01-01' + (RANDOM() * 10000)::int"
|
256
|
+
when "random_time"
|
257
|
+
"NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
|
258
|
+
when "random_ip"
|
259
|
+
"(1 + RANDOM() * 254)::int::text || '.0.0.1'"
|
260
|
+
when "random_letter"
|
261
|
+
"chr(65 + (RANDOM() * 26)::int)"
|
262
|
+
when "random_string"
|
263
|
+
"RIGHT(MD5(RANDOM()::text), 10)"
|
264
|
+
when "null", nil
|
265
|
+
"NULL"
|
266
|
+
else
|
267
|
+
raise Error, "Unknown rule #{rule} for column #{column}"
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def quoted_primary_key(table, primary_key, rule)
|
273
|
+
raise Error, "Single column primary key required for this data rule: #{rule}" unless primary_key.size == 1
|
274
|
+
"#{quoted_table}.#{quote_ident(primary_key.first)}"
|
275
|
+
end
|
276
|
+
|
277
|
+
def maybe_disable_triggers
|
278
|
+
if opts[:disable_integrity] || opts[:disable_user_triggers]
|
279
|
+
destination.transaction do
|
280
|
+
triggers = destination.triggers(table)
|
281
|
+
triggers.select! { |t| t["enabled"] == "t" }
|
282
|
+
internal_triggers, user_triggers = triggers.partition { |t| t["internal"] == "t" }
|
283
|
+
integrity_triggers = internal_triggers.select { |t| t["integrity"] == "t" }
|
284
|
+
restore_triggers = []
|
285
|
+
|
286
|
+
if opts[:disable_integrity]
|
287
|
+
integrity_triggers.each do |trigger|
|
288
|
+
destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER #{quote_ident(trigger["name"])}")
|
289
|
+
end
|
290
|
+
restore_triggers.concat(integrity_triggers)
|
291
|
+
end
|
292
|
+
|
293
|
+
if opts[:disable_user_triggers]
|
294
|
+
# important!
|
295
|
+
# rely on Postgres to disable user triggers
|
296
|
+
# we don't want to accidentally disable non-user triggers if logic above is off
|
297
|
+
destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER USER")
|
298
|
+
restore_triggers.concat(user_triggers)
|
299
|
+
end
|
300
|
+
|
301
|
+
result = yield
|
302
|
+
|
303
|
+
# restore triggers that were previously enabled
|
304
|
+
restore_triggers.each do |trigger|
|
305
|
+
destination.execute("ALTER TABLE #{quoted_table} ENABLE TRIGGER #{quote_ident(trigger["name"])}")
|
306
|
+
end
|
307
|
+
|
308
|
+
result
|
309
|
+
end
|
310
|
+
else
|
311
|
+
yield
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|