pgsync 0.5.4 → 0.6.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/LICENSE.txt +1 -1
- data/README.md +123 -38
- data/config.yml +4 -0
- data/lib/pgsync.rb +5 -1
- data/lib/pgsync/client.rb +55 -53
- data/lib/pgsync/data_source.rb +78 -80
- data/lib/pgsync/init.rb +50 -6
- data/lib/pgsync/schema_sync.rb +83 -0
- data/lib/pgsync/sync.rb +95 -177
- data/lib/pgsync/table.rb +28 -0
- data/lib/pgsync/table_sync.rb +185 -186
- data/lib/pgsync/task.rb +329 -0
- data/lib/pgsync/task_resolver.rb +237 -0
- data/lib/pgsync/utils.rb +53 -13
- data/lib/pgsync/version.rb +1 -1
- metadata +6 -3
- data/lib/pgsync/table_list.rb +0 -141
data/lib/pgsync/table.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# minimal class to keep schema and table name separate
|
2
|
+
module PgSync
|
3
|
+
class Table
|
4
|
+
attr_reader :schema, :name
|
5
|
+
|
6
|
+
def initialize(schema, name)
|
7
|
+
@schema = schema
|
8
|
+
@name = name
|
9
|
+
end
|
10
|
+
|
11
|
+
def full_name
|
12
|
+
"#{schema}.#{name}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def eql?(other)
|
16
|
+
other.schema == schema && other.name == name
|
17
|
+
end
|
18
|
+
|
19
|
+
# override hash when overriding eql?
|
20
|
+
def hash
|
21
|
+
[schema, name].hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
full_name
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/pgsync/table_sync.rb
CHANGED
@@ -2,234 +2,233 @@ module PgSync
|
|
2
2
|
class TableSync
|
3
3
|
include Utils
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
source =
|
9
|
-
destination =
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
from_fields = source.columns(table)
|
15
|
-
to_fields = destination.columns(table)
|
16
|
-
shared_fields = to_fields & from_fields
|
17
|
-
extra_fields = to_fields - from_fields
|
18
|
-
missing_fields = from_fields - to_fields
|
5
|
+
attr_reader :source, :destination, :tasks, :opts, :resolver
|
6
|
+
|
7
|
+
def initialize(source:, destination:, tasks:, opts:, resolver:)
|
8
|
+
@source = source
|
9
|
+
@destination = destination
|
10
|
+
@tasks = tasks
|
11
|
+
@opts = opts
|
12
|
+
@resolver = resolver
|
13
|
+
end
|
19
14
|
|
20
|
-
|
21
|
-
|
22
|
-
to_sequences = []
|
23
|
-
else
|
24
|
-
from_sequences = source.sequences(table, shared_fields)
|
25
|
-
to_sequences = destination.sequences(table, shared_fields)
|
26
|
-
end
|
15
|
+
def perform
|
16
|
+
confirm_tables_exist(destination, tasks, "destination")
|
27
17
|
|
28
|
-
|
29
|
-
extra_sequences = to_sequences - from_sequences
|
30
|
-
missing_sequences = from_sequences - to_sequences
|
18
|
+
add_columns
|
31
19
|
|
32
|
-
|
33
|
-
sql_clause << " #{opts[:sql]}" if opts[:sql]
|
20
|
+
show_notes
|
34
21
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
22
|
+
# don't sync tables with no shared fields
|
23
|
+
# we show a warning message above
|
24
|
+
run_tasks(tasks.reject { |task| task.shared_fields.empty? })
|
25
|
+
end
|
40
26
|
|
41
|
-
|
27
|
+
# TODO only query specific tables
|
28
|
+
# TODO add sequences, primary keys, etc
|
29
|
+
def add_columns
|
30
|
+
source_columns = columns(source)
|
31
|
+
destination_columns = columns(destination)
|
42
32
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
33
|
+
tasks.each do |task|
|
34
|
+
task.from_columns = source_columns[task.table] || []
|
35
|
+
task.to_columns = destination_columns[task.table] || []
|
36
|
+
end
|
37
|
+
end
|
47
38
|
|
48
|
-
|
49
|
-
|
50
|
-
|
39
|
+
def show_notes
|
40
|
+
# for tables
|
41
|
+
resolver.notes.each do |note|
|
42
|
+
warning note
|
51
43
|
end
|
52
44
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
45
|
+
# for columns and sequences
|
46
|
+
tasks.each do |task|
|
47
|
+
task.notes.each do |note|
|
48
|
+
warning "#{task_name(task)}: #{note}"
|
49
|
+
end
|
50
|
+
end
|
57
51
|
|
58
|
-
|
52
|
+
# for non-deferrable constraints
|
53
|
+
if opts[:defer_constraints]
|
54
|
+
constraints = non_deferrable_constraints(destination)
|
55
|
+
constraints = tasks.flat_map { |t| constraints[t.table] || [] }
|
56
|
+
warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any?
|
57
|
+
end
|
58
|
+
end
|
59
59
|
|
60
|
-
|
61
|
-
|
60
|
+
def columns(data_source)
|
61
|
+
query = <<~SQL
|
62
|
+
SELECT
|
63
|
+
table_schema AS schema,
|
64
|
+
table_name AS table,
|
65
|
+
column_name AS column,
|
66
|
+
data_type AS type
|
67
|
+
FROM
|
68
|
+
information_schema.columns
|
69
|
+
ORDER BY 1, 2, 3
|
70
|
+
SQL
|
71
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
72
|
+
[k, v.map { |r| {name: r["column"], type: r["type"]} }]
|
73
|
+
end.to_h
|
74
|
+
end
|
62
75
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
76
|
+
def non_deferrable_constraints(data_source)
|
77
|
+
query = <<~SQL
|
78
|
+
SELECT
|
79
|
+
table_schema AS schema,
|
80
|
+
table_name AS table,
|
81
|
+
constraint_name
|
82
|
+
FROM
|
83
|
+
information_schema.table_constraints
|
84
|
+
WHERE
|
85
|
+
constraint_type = 'FOREIGN KEY' AND
|
86
|
+
is_deferrable = 'NO'
|
87
|
+
SQL
|
88
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
89
|
+
[k, v.map { |r| r["constraint_name"] }]
|
90
|
+
end.to_h
|
91
|
+
end
|
67
92
|
|
68
|
-
|
69
|
-
|
93
|
+
def run_tasks(tasks, &block)
|
94
|
+
notices = []
|
95
|
+
failed_tables = []
|
70
96
|
|
71
|
-
|
72
|
-
|
97
|
+
spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
|
98
|
+
task_spinners = {}
|
99
|
+
started_at = {}
|
73
100
|
|
74
|
-
|
75
|
-
|
76
|
-
|
101
|
+
start = lambda do |task, i|
|
102
|
+
message = ":spinner #{display_item(task)}"
|
103
|
+
spinner = spinners.register(message)
|
104
|
+
if opts[:in_batches]
|
105
|
+
# log instead of spin for non-tty
|
106
|
+
log message.sub(":spinner", "⠋")
|
107
|
+
else
|
108
|
+
spinner.auto_spin
|
109
|
+
end
|
110
|
+
task_spinners[task] = spinner
|
111
|
+
started_at[task] = Time.now
|
112
|
+
end
|
77
113
|
|
78
|
-
|
79
|
-
|
114
|
+
finish = lambda do |task, i, result|
|
115
|
+
spinner = task_spinners[task]
|
116
|
+
time = (Time.now - started_at[task]).round(1)
|
80
117
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
end
|
87
|
-
end
|
118
|
+
message =
|
119
|
+
if result[:message]
|
120
|
+
"(#{result[:message].lines.first.to_s.strip})"
|
121
|
+
else
|
122
|
+
"- #{time}s"
|
88
123
|
end
|
89
124
|
|
90
|
-
|
91
|
-
i += 1
|
125
|
+
notices.concat(result[:notices])
|
92
126
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
# create a temp table
|
101
|
-
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
102
|
-
to_connection.exec("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS SELECT * FROM #{quote_ident_full(table)} WITH NO DATA")
|
103
|
-
|
104
|
-
# load data
|
105
|
-
to_connection.copy_data "COPY #{quote_ident_full(temp_table)} (#{fields}) FROM STDIN" do
|
106
|
-
from_connection.copy_data copy_to_command do
|
107
|
-
while (row = from_connection.get_copy_data)
|
108
|
-
to_connection.put_copy_data(row)
|
109
|
-
end
|
110
|
-
end
|
127
|
+
if result[:status] == "success"
|
128
|
+
spinner.success(message)
|
129
|
+
else
|
130
|
+
spinner.error(message)
|
131
|
+
failed_tables << task_name(task)
|
132
|
+
fail_sync(failed_tables) if opts[:fail_fast]
|
111
133
|
end
|
112
134
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
else
|
117
|
-
to_connection.transaction do
|
118
|
-
to_connection.exec("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
|
119
|
-
to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
|
120
|
-
end
|
135
|
+
unless spinner.send(:tty?)
|
136
|
+
status = result[:status] == "success" ? "✔" : "✖"
|
137
|
+
log [status, display_item(task), message].join(" ")
|
121
138
|
end
|
139
|
+
end
|
140
|
+
|
141
|
+
options = {start: start, finish: finish}
|
142
|
+
|
143
|
+
jobs = opts[:jobs]
|
144
|
+
if opts[:debug] || opts[:in_batches] || opts[:defer_constraints] || opts[:defer_constraints_v2] || opts[:disable_integrity] || opts[:disable_integrity_v2]
|
145
|
+
warning "--jobs ignored" if jobs
|
146
|
+
jobs = 0
|
147
|
+
end
|
148
|
+
|
149
|
+
if windows?
|
150
|
+
options[:in_threads] = jobs || 4
|
122
151
|
else
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
152
|
+
options[:in_processes] = jobs if jobs
|
153
|
+
end
|
154
|
+
|
155
|
+
maybe_defer_constraints do
|
156
|
+
# could try to use `raise Parallel::Kill` to fail faster with --fail-fast
|
157
|
+
# see `fast_faster` branch
|
158
|
+
# however, need to make sure connections are cleaned up properly
|
159
|
+
Parallel.each(tasks, **options) do |task|
|
160
|
+
source.reconnect_if_needed
|
161
|
+
destination.reconnect_if_needed
|
162
|
+
|
163
|
+
task.perform
|
130
164
|
end
|
131
165
|
end
|
132
|
-
|
133
|
-
|
166
|
+
|
167
|
+
notices.each do |notice|
|
168
|
+
warning notice
|
134
169
|
end
|
135
170
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
# the full message will be shown when first trying to connect
|
146
|
-
"Connection failed"
|
147
|
-
when PG::Error
|
148
|
-
e.message.sub("ERROR: ", "")
|
149
|
-
when Error
|
150
|
-
e.message
|
151
|
-
else
|
152
|
-
"#{e.class.name}: #{e.message}"
|
171
|
+
fail_sync(failed_tables) if failed_tables.any?
|
172
|
+
end
|
173
|
+
|
174
|
+
def maybe_defer_constraints
|
175
|
+
if opts[:disable_integrity] || opts[:disable_integrity_v2]
|
176
|
+
# create a transaction on the source
|
177
|
+
# to ensure we get a consistent snapshot
|
178
|
+
source.transaction do
|
179
|
+
yield
|
153
180
|
end
|
181
|
+
elsif opts[:defer_constraints] || opts[:defer_constraints_v2]
|
182
|
+
destination.transaction do
|
183
|
+
if opts[:defer_constraints_v2]
|
184
|
+
table_constraints = non_deferrable_constraints(destination)
|
185
|
+
table_constraints.each do |table, constraints|
|
186
|
+
constraints.each do |constraint|
|
187
|
+
destination.execute("ALTER TABLE #{quote_ident_full(table)} ALTER CONSTRAINT #{quote_ident(constraint)} DEFERRABLE")
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
154
191
|
|
155
|
-
|
156
|
-
ensure
|
157
|
-
source.close if source
|
158
|
-
destination.close if destination
|
159
|
-
end
|
192
|
+
destination.execute("SET CONSTRAINTS ALL DEFERRED")
|
160
193
|
|
161
|
-
|
194
|
+
# create a transaction on the source
|
195
|
+
# to ensure we get a consistent snapshot
|
196
|
+
source.transaction do
|
197
|
+
yield
|
198
|
+
end
|
162
199
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
200
|
+
# set them back
|
201
|
+
# there are 3 modes: DEFERRABLE INITIALLY DEFERRED, DEFERRABLE INITIALLY IMMEDIATE, and NOT DEFERRABLE
|
202
|
+
# we only update NOT DEFERRABLE
|
203
|
+
# https://www.postgresql.org/docs/current/sql-set-constraints.html
|
204
|
+
if opts[:defer_constraints_v2]
|
205
|
+
destination.execute("SET CONSTRAINTS ALL IMMEDIATE")
|
168
206
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
rule["statement"]
|
176
|
-
else
|
177
|
-
raise Error, "Unknown rule #{rule.inspect} for column #{column}"
|
207
|
+
table_constraints.each do |table, constraints|
|
208
|
+
constraints.each do |constraint|
|
209
|
+
destination.execute("ALTER TABLE #{quote_ident_full(table)} ALTER CONSTRAINT #{quote_ident(constraint)} NOT DEFERRABLE")
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
178
213
|
end
|
179
214
|
else
|
180
|
-
|
181
|
-
when "untouched"
|
182
|
-
quote_ident(column)
|
183
|
-
when "unique_email"
|
184
|
-
"'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
|
185
|
-
when "unique_phone"
|
186
|
-
"(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
|
187
|
-
when "unique_secret"
|
188
|
-
"'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
|
189
|
-
when "random_int", "random_number"
|
190
|
-
"(RANDOM() * 100)::int"
|
191
|
-
when "random_date"
|
192
|
-
"date '1970-01-01' + (RANDOM() * 10000)::int"
|
193
|
-
when "random_time"
|
194
|
-
"NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
|
195
|
-
when "random_ip"
|
196
|
-
"(1 + RANDOM() * 254)::int::text || '.0.0.1'"
|
197
|
-
when "random_letter"
|
198
|
-
"chr(65 + (RANDOM() * 26)::int)"
|
199
|
-
when "random_string"
|
200
|
-
"RIGHT(MD5(RANDOM()::text), 10)"
|
201
|
-
when "null", nil
|
202
|
-
"NULL"
|
203
|
-
else
|
204
|
-
raise Error, "Unknown rule #{rule} for column #{column}"
|
205
|
-
end
|
215
|
+
yield
|
206
216
|
end
|
207
217
|
end
|
208
218
|
|
209
|
-
def
|
210
|
-
raise "
|
211
|
-
"#{quote_ident_full(table)}.#{quote_ident(primary_key)}"
|
212
|
-
end
|
213
|
-
|
214
|
-
def quote_ident_full(ident)
|
215
|
-
ident.split(".").map { |v| quote_ident(v) }.join(".")
|
219
|
+
def fail_sync(failed_tables)
|
220
|
+
raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}"
|
216
221
|
end
|
217
222
|
|
218
|
-
def
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
if value.is_a?(String)
|
224
|
-
"'#{quote_string(value)}'"
|
225
|
-
else
|
226
|
-
value
|
227
|
-
end
|
223
|
+
def display_item(item)
|
224
|
+
messages = []
|
225
|
+
messages << task_name(item)
|
226
|
+
messages << item.opts[:sql] if item.opts[:sql]
|
227
|
+
messages.join(" ")
|
228
228
|
end
|
229
229
|
|
230
|
-
|
231
|
-
|
232
|
-
s.gsub(/\\/, '\&\&').gsub(/'/, "''")
|
230
|
+
def windows?
|
231
|
+
Gem.win_platform?
|
233
232
|
end
|
234
233
|
end
|
235
234
|
end
|
data/lib/pgsync/task.rb
ADDED
@@ -0,0 +1,329 @@
|
|
1
|
+
module PgSync
|
2
|
+
class Task
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
attr_reader :source, :destination, :config, :table, :opts
|
6
|
+
attr_accessor :from_columns, :to_columns
|
7
|
+
|
8
|
+
def initialize(source:, destination:, config:, table:, opts:)
|
9
|
+
@source = source
|
10
|
+
@destination = destination
|
11
|
+
@config = config
|
12
|
+
@table = table
|
13
|
+
@opts = opts
|
14
|
+
end
|
15
|
+
|
16
|
+
def quoted_table
|
17
|
+
quote_ident_full(table)
|
18
|
+
end
|
19
|
+
|
20
|
+
def perform
|
21
|
+
with_notices do
|
22
|
+
handle_errors do
|
23
|
+
maybe_disable_triggers do
|
24
|
+
sync_data
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def from_fields
|
31
|
+
@from_fields ||= from_columns.map { |c| c[:name] }
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_fields
|
35
|
+
@to_fields ||= to_columns.map { |c| c[:name] }
|
36
|
+
end
|
37
|
+
|
38
|
+
def shared_fields
|
39
|
+
@shared_fields ||= to_fields & from_fields
|
40
|
+
end
|
41
|
+
|
42
|
+
def from_sequences
|
43
|
+
@from_sequences ||= opts[:no_sequences] ? [] : source.sequences(table, shared_fields)
|
44
|
+
end
|
45
|
+
|
46
|
+
def to_sequences
|
47
|
+
@to_sequences ||= opts[:no_sequences] ? [] : destination.sequences(table, shared_fields)
|
48
|
+
end
|
49
|
+
|
50
|
+
def shared_sequences
|
51
|
+
@shared_sequences ||= to_sequences & from_sequences
|
52
|
+
end
|
53
|
+
|
54
|
+
def notes
|
55
|
+
notes = []
|
56
|
+
if shared_fields.empty?
|
57
|
+
notes << "No fields to copy"
|
58
|
+
else
|
59
|
+
extra_fields = to_fields - from_fields
|
60
|
+
notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
|
61
|
+
|
62
|
+
missing_fields = from_fields - to_fields
|
63
|
+
notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
|
64
|
+
|
65
|
+
extra_sequences = to_sequences - from_sequences
|
66
|
+
notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
|
67
|
+
|
68
|
+
missing_sequences = from_sequences - to_sequences
|
69
|
+
notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
70
|
+
|
71
|
+
from_types = from_columns.map { |c| [c[:name], c[:type]] }.to_h
|
72
|
+
to_types = to_columns.map { |c| [c[:name], c[:type]] }.to_h
|
73
|
+
different_types = []
|
74
|
+
shared_fields.each do |field|
|
75
|
+
if from_types[field] != to_types[field]
|
76
|
+
different_types << "#{field} (#{from_types[field]} -> #{to_types[field]})"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
notes << "Different column types: #{different_types.join(", ")}" if different_types.any?
|
80
|
+
end
|
81
|
+
notes
|
82
|
+
end
|
83
|
+
|
84
|
+
def sync_data
|
85
|
+
raise Error, "This should never happen. Please file a bug." if shared_fields.empty?
|
86
|
+
|
87
|
+
sql_clause = String.new("")
|
88
|
+
sql_clause << " #{opts[:sql]}" if opts[:sql]
|
89
|
+
|
90
|
+
bad_fields = opts[:no_rules] ? [] : config["data_rules"]
|
91
|
+
primary_key = destination.primary_key(table)
|
92
|
+
copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quoted_table}.#{quote_ident(f)}" }.join(", ")
|
93
|
+
fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
|
94
|
+
|
95
|
+
seq_values = {}
|
96
|
+
shared_sequences.each do |seq|
|
97
|
+
seq_values[seq] = source.last_value(seq)
|
98
|
+
end
|
99
|
+
|
100
|
+
copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{sql_clause}) TO STDOUT"
|
101
|
+
if opts[:in_batches]
|
102
|
+
raise Error, "No primary key" if primary_key.empty?
|
103
|
+
primary_key = primary_key.first
|
104
|
+
|
105
|
+
destination.truncate(table) if opts[:truncate]
|
106
|
+
|
107
|
+
from_max_id = source.max_id(table, primary_key)
|
108
|
+
to_max_id = destination.max_id(table, primary_key) + 1
|
109
|
+
|
110
|
+
if to_max_id == 1
|
111
|
+
from_min_id = source.min_id(table, primary_key)
|
112
|
+
to_max_id = from_min_id if from_min_id > 0
|
113
|
+
end
|
114
|
+
|
115
|
+
starting_id = to_max_id
|
116
|
+
batch_size = opts[:batch_size]
|
117
|
+
|
118
|
+
i = 1
|
119
|
+
batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
|
120
|
+
|
121
|
+
while starting_id <= from_max_id
|
122
|
+
where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
|
123
|
+
log " #{i}/#{batch_count}: #{where}"
|
124
|
+
|
125
|
+
# TODO be smarter for advance sql clauses
|
126
|
+
batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
|
127
|
+
|
128
|
+
batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{batch_sql_clause}) TO STDOUT"
|
129
|
+
copy(batch_copy_to_command, dest_table: table, dest_fields: fields)
|
130
|
+
|
131
|
+
starting_id += batch_size
|
132
|
+
i += 1
|
133
|
+
|
134
|
+
if opts[:sleep] && starting_id <= from_max_id
|
135
|
+
sleep(opts[:sleep])
|
136
|
+
end
|
137
|
+
end
|
138
|
+
elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
|
139
|
+
raise Error, "No primary key" if primary_key.empty?
|
140
|
+
|
141
|
+
# create a temp table
|
142
|
+
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
143
|
+
destination.execute("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS TABLE #{quoted_table} WITH NO DATA")
|
144
|
+
|
145
|
+
# load data
|
146
|
+
copy(copy_to_command, dest_table: temp_table, dest_fields: fields)
|
147
|
+
|
148
|
+
on_conflict = primary_key.map { |pk| quote_ident(pk) }.join(", ")
|
149
|
+
action =
|
150
|
+
if opts[:preserve]
|
151
|
+
"NOTHING"
|
152
|
+
else # overwrite or sql clause
|
153
|
+
setter = shared_fields.reject { |f| primary_key.include?(f) }.map { |f| "#{quote_ident(f)} = EXCLUDED.#{quote_ident(f)}" }
|
154
|
+
"UPDATE SET #{setter.join(", ")}"
|
155
|
+
end
|
156
|
+
destination.execute("INSERT INTO #{quoted_table} (SELECT * FROM #{quote_ident_full(temp_table)}) ON CONFLICT (#{on_conflict}) DO #{action}")
|
157
|
+
else
|
158
|
+
# use delete instead of truncate for foreign keys
|
159
|
+
if opts[:defer_constraints] || opts[:defer_constraints_v2]
|
160
|
+
destination.execute("DELETE FROM #{quoted_table}")
|
161
|
+
else
|
162
|
+
destination.truncate(table)
|
163
|
+
end
|
164
|
+
copy(copy_to_command, dest_table: table, dest_fields: fields)
|
165
|
+
end
|
166
|
+
seq_values.each do |seq, value|
|
167
|
+
destination.execute("SELECT setval(#{escape(seq)}, #{escape(value)})")
|
168
|
+
end
|
169
|
+
|
170
|
+
{status: "success"}
|
171
|
+
end
|
172
|
+
|
173
|
+
private
|
174
|
+
|
175
|
+
def with_notices
|
176
|
+
notices = []
|
177
|
+
[source, destination].each do |data_source|
|
178
|
+
data_source.send(:conn).set_notice_processor do |message|
|
179
|
+
notices << message.strip
|
180
|
+
end
|
181
|
+
end
|
182
|
+
result = yield
|
183
|
+
result[:notices] = notices if result
|
184
|
+
result
|
185
|
+
ensure
|
186
|
+
# clear notice processor
|
187
|
+
[source, destination].each do |data_source|
|
188
|
+
data_source.send(:conn).set_notice_processor
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# TODO add retries
|
193
|
+
def handle_errors
|
194
|
+
yield
|
195
|
+
rescue => e
|
196
|
+
raise e if opts[:debug]
|
197
|
+
|
198
|
+
message =
|
199
|
+
case e
|
200
|
+
when PG::ConnectionBad
|
201
|
+
# likely fine to show simplified message here
|
202
|
+
# the full message will be shown when first trying to connect
|
203
|
+
"Connection failed"
|
204
|
+
when PG::Error
|
205
|
+
e.message.sub("ERROR: ", "")
|
206
|
+
when Error
|
207
|
+
e.message
|
208
|
+
else
|
209
|
+
"#{e.class.name}: #{e.message}"
|
210
|
+
end
|
211
|
+
|
212
|
+
{status: "error", message: message}
|
213
|
+
end
|
214
|
+
|
215
|
+
def copy(source_command, dest_table:, dest_fields:)
|
216
|
+
destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN"
|
217
|
+
destination.conn.copy_data(destination_command) do
|
218
|
+
source.conn.copy_data(source_command) do
|
219
|
+
while (row = source.conn.get_copy_data)
|
220
|
+
destination.conn.put_copy_data(row)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# TODO better performance
|
227
|
+
def rule_match?(table, column, rule)
|
228
|
+
regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
|
229
|
+
regex.match(column) || regex.match("#{table.name}.#{column}") || regex.match("#{table.schema}.#{table.name}.#{column}")
|
230
|
+
end
|
231
|
+
|
232
|
+
# TODO wildcard rules
|
233
|
+
def apply_strategy(rule, table, column, primary_key)
|
234
|
+
if rule.is_a?(Hash)
|
235
|
+
if rule.key?("value")
|
236
|
+
escape(rule["value"])
|
237
|
+
elsif rule.key?("statement")
|
238
|
+
rule["statement"]
|
239
|
+
else
|
240
|
+
raise Error, "Unknown rule #{rule.inspect} for column #{column}"
|
241
|
+
end
|
242
|
+
else
|
243
|
+
case rule
|
244
|
+
when "untouched"
|
245
|
+
quote_ident(column)
|
246
|
+
when "unique_email"
|
247
|
+
"'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
|
248
|
+
when "unique_phone"
|
249
|
+
"(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
|
250
|
+
when "unique_secret"
|
251
|
+
"'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
|
252
|
+
when "random_int", "random_number"
|
253
|
+
"(RANDOM() * 100)::int"
|
254
|
+
when "random_date"
|
255
|
+
"date '1970-01-01' + (RANDOM() * 10000)::int"
|
256
|
+
when "random_time"
|
257
|
+
"NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
|
258
|
+
when "random_ip"
|
259
|
+
"(1 + RANDOM() * 254)::int::text || '.0.0.1'"
|
260
|
+
when "random_letter"
|
261
|
+
"chr(65 + (RANDOM() * 26)::int)"
|
262
|
+
when "random_string"
|
263
|
+
"RIGHT(MD5(RANDOM()::text), 10)"
|
264
|
+
when "null", nil
|
265
|
+
"NULL"
|
266
|
+
else
|
267
|
+
raise Error, "Unknown rule #{rule} for column #{column}"
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def quoted_primary_key(table, primary_key, rule)
|
273
|
+
raise Error, "Single column primary key required for this data rule: #{rule}" unless primary_key.size == 1
|
274
|
+
"#{quoted_table}.#{quote_ident(primary_key.first)}"
|
275
|
+
end
|
276
|
+
|
277
|
+
def maybe_disable_triggers
|
278
|
+
if opts[:disable_integrity] || opts[:disable_integrity_v2] || opts[:disable_user_triggers]
|
279
|
+
destination.transaction do
|
280
|
+
triggers = destination.triggers(table)
|
281
|
+
triggers.select! { |t| t["enabled"] == "t" }
|
282
|
+
internal_triggers, user_triggers = triggers.partition { |t| t["internal"] == "t" }
|
283
|
+
integrity_triggers = internal_triggers.select { |t| t["integrity"] == "t" }
|
284
|
+
restore_triggers = []
|
285
|
+
|
286
|
+
# both --disable-integrity options require superuser privileges
|
287
|
+
# however, only v2 works on Amazon RDS, which added specific support for it
|
288
|
+
# https://aws.amazon.com/about-aws/whats-new/2014/11/10/amazon-rds-postgresql-read-replicas/
|
289
|
+
#
|
290
|
+
# session_replication_role disables more than foreign keys (like triggers and rules)
|
291
|
+
# this is probably fine, but keep the current default for now
|
292
|
+
if opts[:disable_integrity_v2] || (opts[:disable_integrity] && rds?)
|
293
|
+
# SET LOCAL lasts until the end of the transaction
|
294
|
+
# https://www.postgresql.org/docs/current/sql-set.html
|
295
|
+
destination.execute("SET LOCAL session_replication_role = replica")
|
296
|
+
elsif opts[:disable_integrity]
|
297
|
+
integrity_triggers.each do |trigger|
|
298
|
+
destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER #{quote_ident(trigger["name"])}")
|
299
|
+
end
|
300
|
+
restore_triggers.concat(integrity_triggers)
|
301
|
+
end
|
302
|
+
|
303
|
+
if opts[:disable_user_triggers]
|
304
|
+
# important!
|
305
|
+
# rely on Postgres to disable user triggers
|
306
|
+
# we don't want to accidentally disable non-user triggers if logic above is off
|
307
|
+
destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER USER")
|
308
|
+
restore_triggers.concat(user_triggers)
|
309
|
+
end
|
310
|
+
|
311
|
+
result = yield
|
312
|
+
|
313
|
+
# restore triggers that were previously enabled
|
314
|
+
restore_triggers.each do |trigger|
|
315
|
+
destination.execute("ALTER TABLE #{quoted_table} ENABLE TRIGGER #{quote_ident(trigger["name"])}")
|
316
|
+
end
|
317
|
+
|
318
|
+
result
|
319
|
+
end
|
320
|
+
else
|
321
|
+
yield
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
def rds?
|
326
|
+
destination.execute("SELECT name, setting FROM pg_settings WHERE name LIKE 'rds.%'").any?
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|