pgsync 0.5.3 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/LICENSE.txt +1 -1
- data/README.md +123 -38
- data/config.yml +4 -0
- data/lib/pgsync.rb +5 -1
- data/lib/pgsync/client.rb +54 -53
- data/lib/pgsync/data_source.rb +78 -80
- data/lib/pgsync/init.rb +50 -6
- data/lib/pgsync/schema_sync.rb +83 -0
- data/lib/pgsync/sync.rb +95 -172
- data/lib/pgsync/table.rb +28 -0
- data/lib/pgsync/table_sync.rb +156 -188
- data/lib/pgsync/task.rb +329 -0
- data/lib/pgsync/task_resolver.rb +237 -0
- data/lib/pgsync/utils.rb +53 -13
- data/lib/pgsync/version.rb +1 -1
- metadata +6 -3
- data/lib/pgsync/table_list.rb +0 -141
data/lib/pgsync/table.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# minimal class to keep schema and table name separate
|
2
|
+
module PgSync
|
3
|
+
class Table
|
4
|
+
attr_reader :schema, :name
|
5
|
+
|
6
|
+
def initialize(schema, name)
|
7
|
+
@schema = schema
|
8
|
+
@name = name
|
9
|
+
end
|
10
|
+
|
11
|
+
def full_name
|
12
|
+
"#{schema}.#{name}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def eql?(other)
|
16
|
+
other.schema == schema && other.name == name
|
17
|
+
end
|
18
|
+
|
19
|
+
# override hash when overriding eql?
|
20
|
+
def hash
|
21
|
+
[schema, name].hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
full_name
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/pgsync/table_sync.rb
CHANGED
@@ -2,236 +2,204 @@ module PgSync
|
|
2
2
|
class TableSync
|
3
3
|
include Utils
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
source =
|
9
|
-
destination =
|
5
|
+
attr_reader :source, :destination, :tasks, :opts, :resolver
|
6
|
+
|
7
|
+
def initialize(source:, destination:, tasks:, opts:, resolver:)
|
8
|
+
@source = source
|
9
|
+
@destination = destination
|
10
|
+
@tasks = tasks
|
11
|
+
@opts = opts
|
12
|
+
@resolver = resolver
|
13
|
+
end
|
10
14
|
|
11
|
-
|
12
|
-
|
15
|
+
def perform
|
16
|
+
confirm_tables_exist(destination, tasks, "destination")
|
13
17
|
|
14
|
-
|
15
|
-
to_fields = destination.columns(table)
|
16
|
-
shared_fields = to_fields & from_fields
|
17
|
-
extra_fields = to_fields - from_fields
|
18
|
-
missing_fields = from_fields - to_fields
|
18
|
+
add_columns
|
19
19
|
|
20
|
-
|
21
|
-
from_sequences = []
|
22
|
-
to_sequences = []
|
23
|
-
else
|
24
|
-
from_sequences = source.sequences(table, shared_fields)
|
25
|
-
to_sequences = destination.sequences(table, shared_fields)
|
26
|
-
end
|
20
|
+
show_notes
|
27
21
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
sql_clause = String.new("")
|
33
|
-
sql_clause << " #{opts[:sql]}" if opts[:sql]
|
34
|
-
|
35
|
-
notes = []
|
36
|
-
notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
|
37
|
-
notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
|
38
|
-
notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
|
39
|
-
notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
22
|
+
# don't sync tables with no shared fields
|
23
|
+
# we show a warning message above
|
24
|
+
run_tasks(tasks.reject { |task| task.shared_fields.empty? })
|
25
|
+
end
|
40
26
|
|
41
|
-
|
27
|
+
# TODO only query specific tables
|
28
|
+
# TODO add sequences, primary keys, etc
|
29
|
+
def add_columns
|
30
|
+
source_columns = columns(source)
|
31
|
+
destination_columns = columns(destination)
|
42
32
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
33
|
+
tasks.each do |task|
|
34
|
+
task.from_columns = source_columns[task.table] || []
|
35
|
+
task.to_columns = destination_columns[task.table] || []
|
36
|
+
end
|
37
|
+
end
|
47
38
|
|
48
|
-
|
49
|
-
|
50
|
-
|
39
|
+
def show_notes
|
40
|
+
# for tables
|
41
|
+
resolver.notes.each do |note|
|
42
|
+
warning note
|
51
43
|
end
|
52
44
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
45
|
+
# for columns and sequences
|
46
|
+
tasks.each do |task|
|
47
|
+
task.notes.each do |note|
|
48
|
+
warning "#{task_name(task)}: #{note}"
|
49
|
+
end
|
50
|
+
end
|
57
51
|
|
58
|
-
|
52
|
+
# for non-deferrable constraints
|
53
|
+
if opts[:defer_constraints]
|
54
|
+
constraints = non_deferrable_constraints(destination)
|
55
|
+
constraints = tasks.flat_map { |t| constraints[t.table] || [] }
|
56
|
+
warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any?
|
57
|
+
end
|
58
|
+
end
|
59
59
|
|
60
|
-
|
61
|
-
|
60
|
+
def columns(data_source)
|
61
|
+
query = <<~SQL
|
62
|
+
SELECT
|
63
|
+
table_schema AS schema,
|
64
|
+
table_name AS table,
|
65
|
+
column_name AS column,
|
66
|
+
data_type AS type
|
67
|
+
FROM
|
68
|
+
information_schema.columns
|
69
|
+
ORDER BY 1, 2, 3
|
70
|
+
SQL
|
71
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
72
|
+
[k, v.map { |r| {name: r["column"], type: r["type"]} }]
|
73
|
+
end.to_h
|
74
|
+
end
|
62
75
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
76
|
+
def non_deferrable_constraints(data_source)
|
77
|
+
query = <<~SQL
|
78
|
+
SELECT
|
79
|
+
table_schema AS schema,
|
80
|
+
table_name AS table,
|
81
|
+
constraint_name
|
82
|
+
FROM
|
83
|
+
information_schema.table_constraints
|
84
|
+
WHERE
|
85
|
+
constraint_type = 'FOREIGN KEY' AND
|
86
|
+
is_deferrable = 'NO'
|
87
|
+
SQL
|
88
|
+
data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
|
89
|
+
[k, v.map { |r| r["constraint_name"] }]
|
90
|
+
end.to_h
|
91
|
+
end
|
67
92
|
|
68
|
-
|
69
|
-
|
93
|
+
def run_tasks(tasks, &block)
|
94
|
+
notices = []
|
95
|
+
failed_tables = []
|
70
96
|
|
71
|
-
|
72
|
-
|
97
|
+
spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
|
98
|
+
task_spinners = {}
|
99
|
+
started_at = {}
|
73
100
|
|
74
|
-
|
75
|
-
|
76
|
-
|
101
|
+
start = lambda do |task, i|
|
102
|
+
message = ":spinner #{display_item(task)}"
|
103
|
+
spinner = spinners.register(message)
|
104
|
+
if opts[:in_batches]
|
105
|
+
# log instead of spin for non-tty
|
106
|
+
log message.sub(":spinner", "⠋")
|
107
|
+
else
|
108
|
+
spinner.auto_spin
|
109
|
+
end
|
110
|
+
task_spinners[task] = spinner
|
111
|
+
started_at[task] = Time.now
|
112
|
+
end
|
77
113
|
|
78
|
-
|
79
|
-
|
114
|
+
finish = lambda do |task, i, result|
|
115
|
+
spinner = task_spinners[task]
|
116
|
+
time = (Time.now - started_at[task]).round(1)
|
80
117
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
end
|
87
|
-
end
|
118
|
+
message =
|
119
|
+
if result[:message]
|
120
|
+
"(#{result[:message].lines.first.to_s.strip})"
|
121
|
+
else
|
122
|
+
"- #{time}s"
|
88
123
|
end
|
89
124
|
|
90
|
-
|
91
|
-
i += 1
|
125
|
+
notices.concat(result[:notices])
|
92
126
|
|
93
|
-
|
94
|
-
|
95
|
-
|
127
|
+
if result[:status] == "success"
|
128
|
+
spinner.success(message)
|
129
|
+
else
|
130
|
+
spinner.error(message)
|
131
|
+
failed_tables << task_name(task)
|
132
|
+
fail_sync(failed_tables) if opts[:fail_fast]
|
96
133
|
end
|
97
134
|
|
98
|
-
|
99
|
-
|
100
|
-
|
135
|
+
unless spinner.send(:tty?)
|
136
|
+
status = result[:status] == "success" ? "✔" : "✖"
|
137
|
+
log [status, display_item(task), message].join(" ")
|
138
|
+
end
|
139
|
+
end
|
101
140
|
|
102
|
-
|
103
|
-
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
104
|
-
to_connection.exec("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS SELECT * FROM #{quote_ident_full(table)} WITH NO DATA")
|
141
|
+
options = {start: start, finish: finish}
|
105
142
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
143
|
+
jobs = opts[:jobs]
|
144
|
+
if opts[:debug] || opts[:in_batches] || opts[:defer_constraints]
|
145
|
+
warning "--jobs ignored" if jobs
|
146
|
+
jobs = 0
|
147
|
+
end
|
114
148
|
|
115
|
-
|
116
|
-
|
117
|
-
to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident_full(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident_full(table)} WHERE #{quote_ident_full(table)}.#{quote_ident(primary_key)} = #{quote_ident_full(temp_table)}.#{quote_ident(primary_key)}))")
|
118
|
-
else
|
119
|
-
to_connection.transaction do
|
120
|
-
to_connection.exec("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
|
121
|
-
to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
|
122
|
-
end
|
123
|
-
end
|
149
|
+
if windows?
|
150
|
+
options[:in_threads] = jobs || 4
|
124
151
|
else
|
125
|
-
|
126
|
-
to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
|
127
|
-
from_connection.copy_data copy_to_command do
|
128
|
-
while (row = from_connection.get_copy_data)
|
129
|
-
to_connection.put_copy_data(row)
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
seq_values.each do |seq, value|
|
135
|
-
to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
|
152
|
+
options[:in_processes] = jobs if jobs
|
136
153
|
end
|
137
154
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
when PG::ConnectionBad
|
146
|
-
# likely fine to show simplified message here
|
147
|
-
# the full message will be shown when first trying to connect
|
148
|
-
"Connection failed"
|
149
|
-
when PG::Error
|
150
|
-
e.message.sub("ERROR: ", "")
|
151
|
-
when Error
|
152
|
-
e.message
|
153
|
-
else
|
154
|
-
"#{e.class.name}: #{e.message}"
|
155
|
-
end
|
155
|
+
maybe_defer_constraints do
|
156
|
+
# could try to use `raise Parallel::Kill` to fail faster with --fail-fast
|
157
|
+
# see `fast_faster` branch
|
158
|
+
# however, need to make sure connections are cleaned up properly
|
159
|
+
Parallel.each(tasks, **options) do |task|
|
160
|
+
source.reconnect_if_needed
|
161
|
+
destination.reconnect_if_needed
|
156
162
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
destination.close if destination
|
161
|
-
end
|
163
|
+
task.perform
|
164
|
+
end
|
165
|
+
end
|
162
166
|
|
163
|
-
|
167
|
+
notices.each do |notice|
|
168
|
+
warning notice
|
169
|
+
end
|
164
170
|
|
165
|
-
|
166
|
-
def rule_match?(table, column, rule)
|
167
|
-
regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
|
168
|
-
regex.match(column) || regex.match("#{table.split(".", 2)[-1]}.#{column}") || regex.match("#{table}.#{column}")
|
171
|
+
fail_sync(failed_tables) if failed_tables.any?
|
169
172
|
end
|
170
173
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
174
|
+
def maybe_defer_constraints
|
175
|
+
if opts[:defer_constraints]
|
176
|
+
destination.transaction do
|
177
|
+
destination.execute("SET CONSTRAINTS ALL DEFERRED")
|
178
|
+
|
179
|
+
# create a transaction on the source
|
180
|
+
# to ensure we get a consistent snapshot
|
181
|
+
source.transaction do
|
182
|
+
yield
|
183
|
+
end
|
180
184
|
end
|
181
185
|
else
|
182
|
-
|
183
|
-
when "untouched"
|
184
|
-
quote_ident(column)
|
185
|
-
when "unique_email"
|
186
|
-
"'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
|
187
|
-
when "unique_phone"
|
188
|
-
"(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
|
189
|
-
when "unique_secret"
|
190
|
-
"'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
|
191
|
-
when "random_int", "random_number"
|
192
|
-
"(RANDOM() * 100)::int"
|
193
|
-
when "random_date"
|
194
|
-
"date '1970-01-01' + (RANDOM() * 10000)::int"
|
195
|
-
when "random_time"
|
196
|
-
"NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
|
197
|
-
when "random_ip"
|
198
|
-
"(1 + RANDOM() * 254)::int::text || '.0.0.1'"
|
199
|
-
when "random_letter"
|
200
|
-
"chr(65 + (RANDOM() * 26)::int)"
|
201
|
-
when "random_string"
|
202
|
-
"RIGHT(MD5(RANDOM()::text), 10)"
|
203
|
-
when "null", nil
|
204
|
-
"NULL"
|
205
|
-
else
|
206
|
-
raise Error, "Unknown rule #{rule} for column #{column}"
|
207
|
-
end
|
186
|
+
yield
|
208
187
|
end
|
209
188
|
end
|
210
189
|
|
211
|
-
def
|
212
|
-
raise "
|
213
|
-
"#{quote_ident_full(table)}.#{quote_ident(primary_key)}"
|
190
|
+
def fail_sync(failed_tables)
|
191
|
+
raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}"
|
214
192
|
end
|
215
193
|
|
216
|
-
def
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
PG::Connection.quote_ident(value)
|
222
|
-
end
|
223
|
-
|
224
|
-
def escape(value)
|
225
|
-
if value.is_a?(String)
|
226
|
-
"'#{quote_string(value)}'"
|
227
|
-
else
|
228
|
-
value
|
229
|
-
end
|
194
|
+
def display_item(item)
|
195
|
+
messages = []
|
196
|
+
messages << task_name(item)
|
197
|
+
messages << item.opts[:sql] if item.opts[:sql]
|
198
|
+
messages.join(" ")
|
230
199
|
end
|
231
200
|
|
232
|
-
|
233
|
-
|
234
|
-
s.gsub(/\\/, '\&\&').gsub(/'/, "''")
|
201
|
+
def windows?
|
202
|
+
Gem.win_platform?
|
235
203
|
end
|
236
204
|
end
|
237
205
|
end
|
data/lib/pgsync/task.rb
ADDED
@@ -0,0 +1,329 @@
|
|
1
|
+
module PgSync
|
2
|
+
class Task
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
attr_reader :source, :destination, :config, :table, :opts
|
6
|
+
attr_accessor :from_columns, :to_columns
|
7
|
+
|
8
|
+
def initialize(source:, destination:, config:, table:, opts:)
|
9
|
+
@source = source
|
10
|
+
@destination = destination
|
11
|
+
@config = config
|
12
|
+
@table = table
|
13
|
+
@opts = opts
|
14
|
+
end
|
15
|
+
|
16
|
+
def quoted_table
|
17
|
+
quote_ident_full(table)
|
18
|
+
end
|
19
|
+
|
20
|
+
def perform
|
21
|
+
with_notices do
|
22
|
+
handle_errors do
|
23
|
+
maybe_disable_triggers do
|
24
|
+
sync_data
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def from_fields
|
31
|
+
@from_fields ||= from_columns.map { |c| c[:name] }
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_fields
|
35
|
+
@to_fields ||= to_columns.map { |c| c[:name] }
|
36
|
+
end
|
37
|
+
|
38
|
+
def shared_fields
|
39
|
+
@shared_fields ||= to_fields & from_fields
|
40
|
+
end
|
41
|
+
|
42
|
+
def from_sequences
|
43
|
+
@from_sequences ||= opts[:no_sequences] ? [] : source.sequences(table, shared_fields)
|
44
|
+
end
|
45
|
+
|
46
|
+
def to_sequences
|
47
|
+
@to_sequences ||= opts[:no_sequences] ? [] : destination.sequences(table, shared_fields)
|
48
|
+
end
|
49
|
+
|
50
|
+
def shared_sequences
|
51
|
+
@shared_sequences ||= to_sequences & from_sequences
|
52
|
+
end
|
53
|
+
|
54
|
+
def notes
|
55
|
+
notes = []
|
56
|
+
if shared_fields.empty?
|
57
|
+
notes << "No fields to copy"
|
58
|
+
else
|
59
|
+
extra_fields = to_fields - from_fields
|
60
|
+
notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
|
61
|
+
|
62
|
+
missing_fields = from_fields - to_fields
|
63
|
+
notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
|
64
|
+
|
65
|
+
extra_sequences = to_sequences - from_sequences
|
66
|
+
notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
|
67
|
+
|
68
|
+
missing_sequences = from_sequences - to_sequences
|
69
|
+
notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
70
|
+
|
71
|
+
from_types = from_columns.map { |c| [c[:name], c[:type]] }.to_h
|
72
|
+
to_types = to_columns.map { |c| [c[:name], c[:type]] }.to_h
|
73
|
+
different_types = []
|
74
|
+
shared_fields.each do |field|
|
75
|
+
if from_types[field] != to_types[field]
|
76
|
+
different_types << "#{field} (#{from_types[field]} -> #{to_types[field]})"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
notes << "Different column types: #{different_types.join(", ")}" if different_types.any?
|
80
|
+
end
|
81
|
+
notes
|
82
|
+
end
|
83
|
+
|
84
|
+
def sync_data
|
85
|
+
raise Error, "This should never happen. Please file a bug." if shared_fields.empty?
|
86
|
+
|
87
|
+
sql_clause = String.new("")
|
88
|
+
sql_clause << " #{opts[:sql]}" if opts[:sql]
|
89
|
+
|
90
|
+
bad_fields = opts[:no_rules] ? [] : config["data_rules"]
|
91
|
+
primary_key = destination.primary_key(table)
|
92
|
+
copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quoted_table}.#{quote_ident(f)}" }.join(", ")
|
93
|
+
fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
|
94
|
+
|
95
|
+
seq_values = {}
|
96
|
+
shared_sequences.each do |seq|
|
97
|
+
seq_values[seq] = source.last_value(seq)
|
98
|
+
end
|
99
|
+
|
100
|
+
copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{sql_clause}) TO STDOUT"
|
101
|
+
if opts[:in_batches]
|
102
|
+
raise Error, "No primary key" if primary_key.empty?
|
103
|
+
primary_key = primary_key.first
|
104
|
+
|
105
|
+
destination.truncate(table) if opts[:truncate]
|
106
|
+
|
107
|
+
from_max_id = source.max_id(table, primary_key)
|
108
|
+
to_max_id = destination.max_id(table, primary_key) + 1
|
109
|
+
|
110
|
+
if to_max_id == 1
|
111
|
+
from_min_id = source.min_id(table, primary_key)
|
112
|
+
to_max_id = from_min_id if from_min_id > 0
|
113
|
+
end
|
114
|
+
|
115
|
+
starting_id = to_max_id
|
116
|
+
batch_size = opts[:batch_size]
|
117
|
+
|
118
|
+
i = 1
|
119
|
+
batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
|
120
|
+
|
121
|
+
while starting_id <= from_max_id
|
122
|
+
where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
|
123
|
+
log " #{i}/#{batch_count}: #{where}"
|
124
|
+
|
125
|
+
# TODO be smarter for advance sql clauses
|
126
|
+
batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
|
127
|
+
|
128
|
+
batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{batch_sql_clause}) TO STDOUT"
|
129
|
+
copy(batch_copy_to_command, dest_table: table, dest_fields: fields)
|
130
|
+
|
131
|
+
starting_id += batch_size
|
132
|
+
i += 1
|
133
|
+
|
134
|
+
if opts[:sleep] && starting_id <= from_max_id
|
135
|
+
sleep(opts[:sleep])
|
136
|
+
end
|
137
|
+
end
|
138
|
+
elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
|
139
|
+
raise Error, "No primary key" if primary_key.empty?
|
140
|
+
|
141
|
+
# create a temp table
|
142
|
+
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
143
|
+
destination.execute("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS TABLE #{quoted_table} WITH NO DATA")
|
144
|
+
|
145
|
+
# load data
|
146
|
+
copy(copy_to_command, dest_table: temp_table, dest_fields: fields)
|
147
|
+
|
148
|
+
on_conflict = primary_key.map { |pk| quote_ident(pk) }.join(", ")
|
149
|
+
action =
|
150
|
+
if opts[:preserve]
|
151
|
+
"NOTHING"
|
152
|
+
else # overwrite or sql clause
|
153
|
+
setter = shared_fields.reject { |f| primary_key.include?(f) }.map { |f| "#{quote_ident(f)} = EXCLUDED.#{quote_ident(f)}" }
|
154
|
+
"UPDATE SET #{setter.join(", ")}"
|
155
|
+
end
|
156
|
+
destination.execute("INSERT INTO #{quoted_table} (SELECT * FROM #{quote_ident_full(temp_table)}) ON CONFLICT (#{on_conflict}) DO #{action}")
|
157
|
+
else
|
158
|
+
# use delete instead of truncate for foreign keys
|
159
|
+
if opts[:defer_constraints]
|
160
|
+
destination.execute("DELETE FROM #{quoted_table}")
|
161
|
+
else
|
162
|
+
destination.truncate(table)
|
163
|
+
end
|
164
|
+
copy(copy_to_command, dest_table: table, dest_fields: fields)
|
165
|
+
end
|
166
|
+
seq_values.each do |seq, value|
|
167
|
+
destination.execute("SELECT setval(#{escape(seq)}, #{escape(value)})")
|
168
|
+
end
|
169
|
+
|
170
|
+
{status: "success"}
|
171
|
+
end
|
172
|
+
|
173
|
+
private
|
174
|
+
|
175
|
+
def with_notices
|
176
|
+
notices = []
|
177
|
+
[source, destination].each do |data_source|
|
178
|
+
data_source.send(:conn).set_notice_processor do |message|
|
179
|
+
notices << message.strip
|
180
|
+
end
|
181
|
+
end
|
182
|
+
result = yield
|
183
|
+
result[:notices] = notices if result
|
184
|
+
result
|
185
|
+
ensure
|
186
|
+
# clear notice processor
|
187
|
+
[source, destination].each do |data_source|
|
188
|
+
data_source.send(:conn).set_notice_processor
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# TODO add retries
|
193
|
+
def handle_errors
|
194
|
+
yield
|
195
|
+
rescue => e
|
196
|
+
raise e if opts[:debug]
|
197
|
+
|
198
|
+
message =
|
199
|
+
case e
|
200
|
+
when PG::ConnectionBad
|
201
|
+
# likely fine to show simplified message here
|
202
|
+
# the full message will be shown when first trying to connect
|
203
|
+
"Connection failed"
|
204
|
+
when PG::Error
|
205
|
+
e.message.sub("ERROR: ", "")
|
206
|
+
when Error
|
207
|
+
e.message
|
208
|
+
else
|
209
|
+
"#{e.class.name}: #{e.message}"
|
210
|
+
end
|
211
|
+
|
212
|
+
{status: "error", message: message}
|
213
|
+
end
|
214
|
+
|
215
|
+
def copy(source_command, dest_table:, dest_fields:)
|
216
|
+
destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN"
|
217
|
+
destination.conn.copy_data(destination_command) do
|
218
|
+
source.conn.copy_data(source_command) do
|
219
|
+
while (row = source.conn.get_copy_data)
|
220
|
+
destination.conn.put_copy_data(row)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# TODO better performance
|
227
|
+
def rule_match?(table, column, rule)
|
228
|
+
regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
|
229
|
+
regex.match(column) || regex.match("#{table.name}.#{column}") || regex.match("#{table.schema}.#{table.name}.#{column}")
|
230
|
+
end
|
231
|
+
|
232
|
+
# TODO wildcard rules
|
233
|
+
def apply_strategy(rule, table, column, primary_key)
|
234
|
+
if rule.is_a?(Hash)
|
235
|
+
if rule.key?("value")
|
236
|
+
escape(rule["value"])
|
237
|
+
elsif rule.key?("statement")
|
238
|
+
rule["statement"]
|
239
|
+
else
|
240
|
+
raise Error, "Unknown rule #{rule.inspect} for column #{column}"
|
241
|
+
end
|
242
|
+
else
|
243
|
+
case rule
|
244
|
+
when "untouched"
|
245
|
+
quote_ident(column)
|
246
|
+
when "unique_email"
|
247
|
+
"'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
|
248
|
+
when "unique_phone"
|
249
|
+
"(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
|
250
|
+
when "unique_secret"
|
251
|
+
"'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
|
252
|
+
when "random_int", "random_number"
|
253
|
+
"(RANDOM() * 100)::int"
|
254
|
+
when "random_date"
|
255
|
+
"date '1970-01-01' + (RANDOM() * 10000)::int"
|
256
|
+
when "random_time"
|
257
|
+
"NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
|
258
|
+
when "random_ip"
|
259
|
+
"(1 + RANDOM() * 254)::int::text || '.0.0.1'"
|
260
|
+
when "random_letter"
|
261
|
+
"chr(65 + (RANDOM() * 26)::int)"
|
262
|
+
when "random_string"
|
263
|
+
"RIGHT(MD5(RANDOM()::text), 10)"
|
264
|
+
when "null", nil
|
265
|
+
"NULL"
|
266
|
+
else
|
267
|
+
raise Error, "Unknown rule #{rule} for column #{column}"
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def quoted_primary_key(table, primary_key, rule)
|
273
|
+
raise Error, "Single column primary key required for this data rule: #{rule}" unless primary_key.size == 1
|
274
|
+
"#{quoted_table}.#{quote_ident(primary_key.first)}"
|
275
|
+
end
|
276
|
+
|
277
|
+
def maybe_disable_triggers
|
278
|
+
if opts[:disable_integrity] || opts[:disable_integrity_v2] || opts[:disable_user_triggers]
|
279
|
+
destination.transaction do
|
280
|
+
triggers = destination.triggers(table)
|
281
|
+
triggers.select! { |t| t["enabled"] == "t" }
|
282
|
+
internal_triggers, user_triggers = triggers.partition { |t| t["internal"] == "t" }
|
283
|
+
integrity_triggers = internal_triggers.select { |t| t["integrity"] == "t" }
|
284
|
+
restore_triggers = []
|
285
|
+
|
286
|
+
# both --disable-integrity options require superuser privileges
|
287
|
+
# however, only v2 works on Amazon RDS, which added specific support for it
|
288
|
+
# https://aws.amazon.com/about-aws/whats-new/2014/11/10/amazon-rds-postgresql-read-replicas/
|
289
|
+
#
|
290
|
+
# session_replication_role disables more than foreign keys (like triggers and rules)
|
291
|
+
# this is probably fine, but keep the current default for now
|
292
|
+
if opts[:disable_integrity_v2] || (opts[:disable_integrity] && rds?)
|
293
|
+
# SET LOCAL lasts until the end of the transaction
|
294
|
+
# https://www.postgresql.org/docs/current/sql-set.html
|
295
|
+
destination.execute("SET LOCAL session_replication_role = replica")
|
296
|
+
elsif opts[:disable_integrity]
|
297
|
+
integrity_triggers.each do |trigger|
|
298
|
+
destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER #{quote_ident(trigger["name"])}")
|
299
|
+
end
|
300
|
+
restore_triggers.concat(integrity_triggers)
|
301
|
+
end
|
302
|
+
|
303
|
+
if opts[:disable_user_triggers]
|
304
|
+
# important!
|
305
|
+
# rely on Postgres to disable user triggers
|
306
|
+
# we don't want to accidentally disable non-user triggers if logic above is off
|
307
|
+
destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER USER")
|
308
|
+
restore_triggers.concat(user_triggers)
|
309
|
+
end
|
310
|
+
|
311
|
+
result = yield
|
312
|
+
|
313
|
+
# restore triggers that were previously enabled
|
314
|
+
restore_triggers.each do |trigger|
|
315
|
+
destination.execute("ALTER TABLE #{quoted_table} ENABLE TRIGGER #{quote_ident(trigger["name"])}")
|
316
|
+
end
|
317
|
+
|
318
|
+
result
|
319
|
+
end
|
320
|
+
else
|
321
|
+
yield
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
def rds?
|
326
|
+
destination.execute("SELECT name, setting FROM pg_settings WHERE name LIKE 'rds.%'").any?
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|