pgsync 0.5.4 → 0.6.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pgsync might be problematic. Click here for more details.

@@ -0,0 +1,28 @@
1
+ # minimal class to keep schema and table name separate
2
+ module PgSync
3
+ class Table
4
+ attr_reader :schema, :name
5
+
6
+ def initialize(schema, name)
7
+ @schema = schema
8
+ @name = name
9
+ end
10
+
11
+ def full_name
12
+ "#{schema}.#{name}"
13
+ end
14
+
15
+ def eql?(other)
16
+ other.schema == schema && other.name == name
17
+ end
18
+
19
+ # override hash when overriding eql?
20
+ def hash
21
+ [schema, name].hash
22
+ end
23
+
24
+ def to_s
25
+ full_name
26
+ end
27
+ end
28
+ end
@@ -2,234 +2,233 @@ module PgSync
2
2
  class TableSync
3
3
  include Utils
4
4
 
5
- def sync(config, table, opts, source_url, destination_url)
6
- start_time = Time.now
7
-
8
- source = DataSource.new(source_url, timeout: 0)
9
- destination = DataSource.new(destination_url, timeout: 0)
10
-
11
- from_connection = source.conn
12
- to_connection = destination.conn
13
-
14
- from_fields = source.columns(table)
15
- to_fields = destination.columns(table)
16
- shared_fields = to_fields & from_fields
17
- extra_fields = to_fields - from_fields
18
- missing_fields = from_fields - to_fields
5
+ attr_reader :source, :destination, :tasks, :opts, :resolver
6
+
7
+ def initialize(source:, destination:, tasks:, opts:, resolver:)
8
+ @source = source
9
+ @destination = destination
10
+ @tasks = tasks
11
+ @opts = opts
12
+ @resolver = resolver
13
+ end
19
14
 
20
- if opts[:no_sequences]
21
- from_sequences = []
22
- to_sequences = []
23
- else
24
- from_sequences = source.sequences(table, shared_fields)
25
- to_sequences = destination.sequences(table, shared_fields)
26
- end
15
+ def perform
16
+ confirm_tables_exist(destination, tasks, "destination")
27
17
 
28
- shared_sequences = to_sequences & from_sequences
29
- extra_sequences = to_sequences - from_sequences
30
- missing_sequences = from_sequences - to_sequences
18
+ add_columns
31
19
 
32
- sql_clause = String.new("")
33
- sql_clause << " #{opts[:sql]}" if opts[:sql]
20
+ show_notes
34
21
 
35
- notes = []
36
- notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
37
- notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
38
- notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
39
- notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
22
+ # don't sync tables with no shared fields
23
+ # we show a warning message above
24
+ run_tasks(tasks.reject { |task| task.shared_fields.empty? })
25
+ end
40
26
 
41
- return {status: "success", message: "No fields to copy"} if shared_fields.empty?
27
+ # TODO only query specific tables
28
+ # TODO add sequences, primary keys, etc
29
+ def add_columns
30
+ source_columns = columns(source)
31
+ destination_columns = columns(destination)
42
32
 
43
- bad_fields = opts[:no_rules] ? [] : config["data_rules"]
44
- primary_key = destination.primary_key(table)
45
- copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quote_ident_full(table)}.#{quote_ident(f)}" }.join(", ")
46
- fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
33
+ tasks.each do |task|
34
+ task.from_columns = source_columns[task.table] || []
35
+ task.to_columns = destination_columns[task.table] || []
36
+ end
37
+ end
47
38
 
48
- seq_values = {}
49
- shared_sequences.each do |seq|
50
- seq_values[seq] = source.last_value(seq)
39
+ def show_notes
40
+ # for tables
41
+ resolver.notes.each do |note|
42
+ warning note
51
43
  end
52
44
 
53
- copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{sql_clause}) TO STDOUT"
54
- if opts[:in_batches]
55
- raise Error, "Cannot use --overwrite with --in-batches" if opts[:overwrite]
56
- raise Error, "No primary key" unless primary_key
45
+ # for columns and sequences
46
+ tasks.each do |task|
47
+ task.notes.each do |note|
48
+ warning "#{task_name(task)}: #{note}"
49
+ end
50
+ end
57
51
 
58
- destination.truncate(table) if opts[:truncate]
52
+ # for non-deferrable constraints
53
+ if opts[:defer_constraints]
54
+ constraints = non_deferrable_constraints(destination)
55
+ constraints = tasks.flat_map { |t| constraints[t.table] || [] }
56
+ warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any?
57
+ end
58
+ end
59
59
 
60
- from_max_id = source.max_id(table, primary_key)
61
- to_max_id = destination.max_id(table, primary_key) + 1
60
+ def columns(data_source)
61
+ query = <<~SQL
62
+ SELECT
63
+ table_schema AS schema,
64
+ table_name AS table,
65
+ column_name AS column,
66
+ data_type AS type
67
+ FROM
68
+ information_schema.columns
69
+ ORDER BY 1, 2, 3
70
+ SQL
71
+ data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
72
+ [k, v.map { |r| {name: r["column"], type: r["type"]} }]
73
+ end.to_h
74
+ end
62
75
 
63
- if to_max_id == 1
64
- from_min_id = source.min_id(table, primary_key)
65
- to_max_id = from_min_id if from_min_id > 0
66
- end
76
+ def non_deferrable_constraints(data_source)
77
+ query = <<~SQL
78
+ SELECT
79
+ table_schema AS schema,
80
+ table_name AS table,
81
+ constraint_name
82
+ FROM
83
+ information_schema.table_constraints
84
+ WHERE
85
+ constraint_type = 'FOREIGN KEY' AND
86
+ is_deferrable = 'NO'
87
+ SQL
88
+ data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
89
+ [k, v.map { |r| r["constraint_name"] }]
90
+ end.to_h
91
+ end
67
92
 
68
- starting_id = to_max_id
69
- batch_size = opts[:batch_size]
93
+ def run_tasks(tasks, &block)
94
+ notices = []
95
+ failed_tables = []
70
96
 
71
- i = 1
72
- batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
97
+ spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
98
+ task_spinners = {}
99
+ started_at = {}
73
100
 
74
- while starting_id <= from_max_id
75
- where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
76
- log " #{i}/#{batch_count}: #{where}"
101
+ start = lambda do |task, i|
102
+ message = ":spinner #{display_item(task)}"
103
+ spinner = spinners.register(message)
104
+ if opts[:in_batches]
105
+ # log instead of spin for non-tty
106
+ log message.sub(":spinner", "⠋")
107
+ else
108
+ spinner.auto_spin
109
+ end
110
+ task_spinners[task] = spinner
111
+ started_at[task] = Time.now
112
+ end
77
113
 
78
- # TODO be smarter for advance sql clauses
79
- batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
114
+ finish = lambda do |task, i, result|
115
+ spinner = task_spinners[task]
116
+ time = (Time.now - started_at[task]).round(1)
80
117
 
81
- batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{batch_sql_clause}) TO STDOUT"
82
- to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
83
- from_connection.copy_data batch_copy_to_command do
84
- while (row = from_connection.get_copy_data)
85
- to_connection.put_copy_data(row)
86
- end
87
- end
118
+ message =
119
+ if result[:message]
120
+ "(#{result[:message].lines.first.to_s.strip})"
121
+ else
122
+ "- #{time}s"
88
123
  end
89
124
 
90
- starting_id += batch_size
91
- i += 1
125
+ notices.concat(result[:notices])
92
126
 
93
- if opts[:sleep] && starting_id <= from_max_id
94
- sleep(opts[:sleep])
95
- end
96
- end
97
- elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
98
- raise Error, "No primary key" unless primary_key
99
-
100
- # create a temp table
101
- temp_table = "pgsync_#{rand(1_000_000_000)}"
102
- to_connection.exec("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS SELECT * FROM #{quote_ident_full(table)} WITH NO DATA")
103
-
104
- # load data
105
- to_connection.copy_data "COPY #{quote_ident_full(temp_table)} (#{fields}) FROM STDIN" do
106
- from_connection.copy_data copy_to_command do
107
- while (row = from_connection.get_copy_data)
108
- to_connection.put_copy_data(row)
109
- end
110
- end
127
+ if result[:status] == "success"
128
+ spinner.success(message)
129
+ else
130
+ spinner.error(message)
131
+ failed_tables << task_name(task)
132
+ fail_sync(failed_tables) if opts[:fail_fast]
111
133
  end
112
134
 
113
- if opts[:preserve]
114
- # insert into
115
- to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident_full(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident_full(table)} WHERE #{quote_ident_full(table)}.#{quote_ident(primary_key)} = #{quote_ident_full(temp_table)}.#{quote_ident(primary_key)}))")
116
- else
117
- to_connection.transaction do
118
- to_connection.exec("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
119
- to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
120
- end
135
+ unless spinner.send(:tty?)
136
+ status = result[:status] == "success" ? "✔" : "✖"
137
+ log [status, display_item(task), message].join(" ")
121
138
  end
139
+ end
140
+
141
+ options = {start: start, finish: finish}
142
+
143
+ jobs = opts[:jobs]
144
+ if opts[:debug] || opts[:in_batches] || opts[:defer_constraints] || opts[:defer_constraints_v2] || opts[:disable_integrity] || opts[:disable_integrity_v2]
145
+ warning "--jobs ignored" if jobs
146
+ jobs = 0
147
+ end
148
+
149
+ if windows?
150
+ options[:in_threads] = jobs || 4
122
151
  else
123
- destination.truncate(table)
124
- to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
125
- from_connection.copy_data copy_to_command do
126
- while (row = from_connection.get_copy_data)
127
- to_connection.put_copy_data(row)
128
- end
129
- end
152
+ options[:in_processes] = jobs if jobs
153
+ end
154
+
155
+ maybe_defer_constraints do
156
+ # could try to use `raise Parallel::Kill` to fail faster with --fail-fast
157
+ # see `fast_faster` branch
158
+ # however, need to make sure connections are cleaned up properly
159
+ Parallel.each(tasks, **options) do |task|
160
+ source.reconnect_if_needed
161
+ destination.reconnect_if_needed
162
+
163
+ task.perform
130
164
  end
131
165
  end
132
- seq_values.each do |seq, value|
133
- to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
166
+
167
+ notices.each do |notice|
168
+ warning notice
134
169
  end
135
170
 
136
- message = nil
137
- message = notes.join(", ") if notes.any?
138
-
139
- {status: "success", message: message, time: (Time.now - start_time).round(1)}
140
- rescue => e
141
- message =
142
- case e
143
- when PG::ConnectionBad
144
- # likely fine to show simplified message here
145
- # the full message will be shown when first trying to connect
146
- "Connection failed"
147
- when PG::Error
148
- e.message.sub("ERROR: ", "")
149
- when Error
150
- e.message
151
- else
152
- "#{e.class.name}: #{e.message}"
171
+ fail_sync(failed_tables) if failed_tables.any?
172
+ end
173
+
174
+ def maybe_defer_constraints
175
+ if opts[:disable_integrity] || opts[:disable_integrity_v2]
176
+ # create a transaction on the source
177
+ # to ensure we get a consistent snapshot
178
+ source.transaction do
179
+ yield
153
180
  end
181
+ elsif opts[:defer_constraints] || opts[:defer_constraints_v2]
182
+ destination.transaction do
183
+ if opts[:defer_constraints_v2]
184
+ table_constraints = non_deferrable_constraints(destination)
185
+ table_constraints.each do |table, constraints|
186
+ constraints.each do |constraint|
187
+ destination.execute("ALTER TABLE #{quote_ident_full(table)} ALTER CONSTRAINT #{quote_ident(constraint)} DEFERRABLE")
188
+ end
189
+ end
190
+ end
154
191
 
155
- {status: "error", message: message}
156
- ensure
157
- source.close if source
158
- destination.close if destination
159
- end
192
+ destination.execute("SET CONSTRAINTS ALL DEFERRED")
160
193
 
161
- private
194
+ # create a transaction on the source
195
+ # to ensure we get a consistent snapshot
196
+ source.transaction do
197
+ yield
198
+ end
162
199
 
163
- # TODO better performance
164
- def rule_match?(table, column, rule)
165
- regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
166
- regex.match(column) || regex.match("#{table.split(".", 2)[-1]}.#{column}") || regex.match("#{table}.#{column}")
167
- end
200
+ # set them back
201
+ # there are 3 modes: DEFERRABLE INITIALLY DEFERRED, DEFERRABLE INITIALLY IMMEDIATE, and NOT DEFERRABLE
202
+ # we only update NOT DEFERRABLE
203
+ # https://www.postgresql.org/docs/current/sql-set-constraints.html
204
+ if opts[:defer_constraints_v2]
205
+ destination.execute("SET CONSTRAINTS ALL IMMEDIATE")
168
206
 
169
- # TODO wildcard rules
170
- def apply_strategy(rule, table, column, primary_key)
171
- if rule.is_a?(Hash)
172
- if rule.key?("value")
173
- escape(rule["value"])
174
- elsif rule.key?("statement")
175
- rule["statement"]
176
- else
177
- raise Error, "Unknown rule #{rule.inspect} for column #{column}"
207
+ table_constraints.each do |table, constraints|
208
+ constraints.each do |constraint|
209
+ destination.execute("ALTER TABLE #{quote_ident_full(table)} ALTER CONSTRAINT #{quote_ident(constraint)} NOT DEFERRABLE")
210
+ end
211
+ end
212
+ end
178
213
  end
179
214
  else
180
- case rule
181
- when "untouched"
182
- quote_ident(column)
183
- when "unique_email"
184
- "'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
185
- when "unique_phone"
186
- "(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
187
- when "unique_secret"
188
- "'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
189
- when "random_int", "random_number"
190
- "(RANDOM() * 100)::int"
191
- when "random_date"
192
- "date '1970-01-01' + (RANDOM() * 10000)::int"
193
- when "random_time"
194
- "NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
195
- when "random_ip"
196
- "(1 + RANDOM() * 254)::int::text || '.0.0.1'"
197
- when "random_letter"
198
- "chr(65 + (RANDOM() * 26)::int)"
199
- when "random_string"
200
- "RIGHT(MD5(RANDOM()::text), 10)"
201
- when "null", nil
202
- "NULL"
203
- else
204
- raise Error, "Unknown rule #{rule} for column #{column}"
205
- end
215
+ yield
206
216
  end
207
217
  end
208
218
 
209
- def quoted_primary_key(table, primary_key, rule)
210
- raise "Primary key required for this data rule: #{rule}" unless primary_key
211
- "#{quote_ident_full(table)}.#{quote_ident(primary_key)}"
212
- end
213
-
214
- def quote_ident_full(ident)
215
- ident.split(".").map { |v| quote_ident(v) }.join(".")
219
+ def fail_sync(failed_tables)
220
+ raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}"
216
221
  end
217
222
 
218
- def quote_ident(value)
219
- PG::Connection.quote_ident(value)
220
- end
221
-
222
- def escape(value)
223
- if value.is_a?(String)
224
- "'#{quote_string(value)}'"
225
- else
226
- value
227
- end
223
+ def display_item(item)
224
+ messages = []
225
+ messages << task_name(item)
226
+ messages << item.opts[:sql] if item.opts[:sql]
227
+ messages.join(" ")
228
228
  end
229
229
 
230
- # activerecord
231
- def quote_string(s)
232
- s.gsub(/\\/, '\&\&').gsub(/'/, "''")
230
+ def windows?
231
+ Gem.win_platform?
233
232
  end
234
233
  end
235
234
  end
@@ -0,0 +1,329 @@
1
+ module PgSync
2
+ class Task
3
+ include Utils
4
+
5
+ attr_reader :source, :destination, :config, :table, :opts
6
+ attr_accessor :from_columns, :to_columns
7
+
8
+ def initialize(source:, destination:, config:, table:, opts:)
9
+ @source = source
10
+ @destination = destination
11
+ @config = config
12
+ @table = table
13
+ @opts = opts
14
+ end
15
+
16
+ def quoted_table
17
+ quote_ident_full(table)
18
+ end
19
+
20
+ def perform
21
+ with_notices do
22
+ handle_errors do
23
+ maybe_disable_triggers do
24
+ sync_data
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ def from_fields
31
+ @from_fields ||= from_columns.map { |c| c[:name] }
32
+ end
33
+
34
+ def to_fields
35
+ @to_fields ||= to_columns.map { |c| c[:name] }
36
+ end
37
+
38
+ def shared_fields
39
+ @shared_fields ||= to_fields & from_fields
40
+ end
41
+
42
+ def from_sequences
43
+ @from_sequences ||= opts[:no_sequences] ? [] : source.sequences(table, shared_fields)
44
+ end
45
+
46
+ def to_sequences
47
+ @to_sequences ||= opts[:no_sequences] ? [] : destination.sequences(table, shared_fields)
48
+ end
49
+
50
+ def shared_sequences
51
+ @shared_sequences ||= to_sequences & from_sequences
52
+ end
53
+
54
+ def notes
55
+ notes = []
56
+ if shared_fields.empty?
57
+ notes << "No fields to copy"
58
+ else
59
+ extra_fields = to_fields - from_fields
60
+ notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
61
+
62
+ missing_fields = from_fields - to_fields
63
+ notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
64
+
65
+ extra_sequences = to_sequences - from_sequences
66
+ notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
67
+
68
+ missing_sequences = from_sequences - to_sequences
69
+ notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
70
+
71
+ from_types = from_columns.map { |c| [c[:name], c[:type]] }.to_h
72
+ to_types = to_columns.map { |c| [c[:name], c[:type]] }.to_h
73
+ different_types = []
74
+ shared_fields.each do |field|
75
+ if from_types[field] != to_types[field]
76
+ different_types << "#{field} (#{from_types[field]} -> #{to_types[field]})"
77
+ end
78
+ end
79
+ notes << "Different column types: #{different_types.join(", ")}" if different_types.any?
80
+ end
81
+ notes
82
+ end
83
+
84
+ def sync_data
85
+ raise Error, "This should never happen. Please file a bug." if shared_fields.empty?
86
+
87
+ sql_clause = String.new("")
88
+ sql_clause << " #{opts[:sql]}" if opts[:sql]
89
+
90
+ bad_fields = opts[:no_rules] ? [] : config["data_rules"]
91
+ primary_key = destination.primary_key(table)
92
+ copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quoted_table}.#{quote_ident(f)}" }.join(", ")
93
+ fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
94
+
95
+ seq_values = {}
96
+ shared_sequences.each do |seq|
97
+ seq_values[seq] = source.last_value(seq)
98
+ end
99
+
100
+ copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{sql_clause}) TO STDOUT"
101
+ if opts[:in_batches]
102
+ raise Error, "No primary key" if primary_key.empty?
103
+ primary_key = primary_key.first
104
+
105
+ destination.truncate(table) if opts[:truncate]
106
+
107
+ from_max_id = source.max_id(table, primary_key)
108
+ to_max_id = destination.max_id(table, primary_key) + 1
109
+
110
+ if to_max_id == 1
111
+ from_min_id = source.min_id(table, primary_key)
112
+ to_max_id = from_min_id if from_min_id > 0
113
+ end
114
+
115
+ starting_id = to_max_id
116
+ batch_size = opts[:batch_size]
117
+
118
+ i = 1
119
+ batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
120
+
121
+ while starting_id <= from_max_id
122
+ where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
123
+ log " #{i}/#{batch_count}: #{where}"
124
+
125
+ # TODO be smarter for advance sql clauses
126
+ batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
127
+
128
+ batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{batch_sql_clause}) TO STDOUT"
129
+ copy(batch_copy_to_command, dest_table: table, dest_fields: fields)
130
+
131
+ starting_id += batch_size
132
+ i += 1
133
+
134
+ if opts[:sleep] && starting_id <= from_max_id
135
+ sleep(opts[:sleep])
136
+ end
137
+ end
138
+ elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
139
+ raise Error, "No primary key" if primary_key.empty?
140
+
141
+ # create a temp table
142
+ temp_table = "pgsync_#{rand(1_000_000_000)}"
143
+ destination.execute("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS TABLE #{quoted_table} WITH NO DATA")
144
+
145
+ # load data
146
+ copy(copy_to_command, dest_table: temp_table, dest_fields: fields)
147
+
148
+ on_conflict = primary_key.map { |pk| quote_ident(pk) }.join(", ")
149
+ action =
150
+ if opts[:preserve]
151
+ "NOTHING"
152
+ else # overwrite or sql clause
153
+ setter = shared_fields.reject { |f| primary_key.include?(f) }.map { |f| "#{quote_ident(f)} = EXCLUDED.#{quote_ident(f)}" }
154
+ "UPDATE SET #{setter.join(", ")}"
155
+ end
156
+ destination.execute("INSERT INTO #{quoted_table} (SELECT * FROM #{quote_ident_full(temp_table)}) ON CONFLICT (#{on_conflict}) DO #{action}")
157
+ else
158
+ # use delete instead of truncate for foreign keys
159
+ if opts[:defer_constraints] || opts[:defer_constraints_v2]
160
+ destination.execute("DELETE FROM #{quoted_table}")
161
+ else
162
+ destination.truncate(table)
163
+ end
164
+ copy(copy_to_command, dest_table: table, dest_fields: fields)
165
+ end
166
+ seq_values.each do |seq, value|
167
+ destination.execute("SELECT setval(#{escape(seq)}, #{escape(value)})")
168
+ end
169
+
170
+ {status: "success"}
171
+ end
172
+
173
+ private
174
+
175
+ def with_notices
176
+ notices = []
177
+ [source, destination].each do |data_source|
178
+ data_source.send(:conn).set_notice_processor do |message|
179
+ notices << message.strip
180
+ end
181
+ end
182
+ result = yield
183
+ result[:notices] = notices if result
184
+ result
185
+ ensure
186
+ # clear notice processor
187
+ [source, destination].each do |data_source|
188
+ data_source.send(:conn).set_notice_processor
189
+ end
190
+ end
191
+
192
+ # TODO add retries
193
+ def handle_errors
194
+ yield
195
+ rescue => e
196
+ raise e if opts[:debug]
197
+
198
+ message =
199
+ case e
200
+ when PG::ConnectionBad
201
+ # likely fine to show simplified message here
202
+ # the full message will be shown when first trying to connect
203
+ "Connection failed"
204
+ when PG::Error
205
+ e.message.sub("ERROR: ", "")
206
+ when Error
207
+ e.message
208
+ else
209
+ "#{e.class.name}: #{e.message}"
210
+ end
211
+
212
+ {status: "error", message: message}
213
+ end
214
+
215
+ def copy(source_command, dest_table:, dest_fields:)
216
+ destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN"
217
+ destination.conn.copy_data(destination_command) do
218
+ source.conn.copy_data(source_command) do
219
+ while (row = source.conn.get_copy_data)
220
+ destination.conn.put_copy_data(row)
221
+ end
222
+ end
223
+ end
224
+ end
225
+
226
+ # TODO better performance
227
+ def rule_match?(table, column, rule)
228
+ regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
229
+ regex.match(column) || regex.match("#{table.name}.#{column}") || regex.match("#{table.schema}.#{table.name}.#{column}")
230
+ end
231
+
232
+ # TODO wildcard rules
233
+ def apply_strategy(rule, table, column, primary_key)
234
+ if rule.is_a?(Hash)
235
+ if rule.key?("value")
236
+ escape(rule["value"])
237
+ elsif rule.key?("statement")
238
+ rule["statement"]
239
+ else
240
+ raise Error, "Unknown rule #{rule.inspect} for column #{column}"
241
+ end
242
+ else
243
+ case rule
244
+ when "untouched"
245
+ quote_ident(column)
246
+ when "unique_email"
247
+ "'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
248
+ when "unique_phone"
249
+ "(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
250
+ when "unique_secret"
251
+ "'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
252
+ when "random_int", "random_number"
253
+ "(RANDOM() * 100)::int"
254
+ when "random_date"
255
+ "date '1970-01-01' + (RANDOM() * 10000)::int"
256
+ when "random_time"
257
+ "NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
258
+ when "random_ip"
259
+ "(1 + RANDOM() * 254)::int::text || '.0.0.1'"
260
+ when "random_letter"
261
+ "chr(65 + (RANDOM() * 26)::int)"
262
+ when "random_string"
263
+ "RIGHT(MD5(RANDOM()::text), 10)"
264
+ when "null", nil
265
+ "NULL"
266
+ else
267
+ raise Error, "Unknown rule #{rule} for column #{column}"
268
+ end
269
+ end
270
+ end
271
+
272
+ def quoted_primary_key(table, primary_key, rule)
273
+ raise Error, "Single column primary key required for this data rule: #{rule}" unless primary_key.size == 1
274
+ "#{quoted_table}.#{quote_ident(primary_key.first)}"
275
+ end
276
+
277
+ def maybe_disable_triggers
278
+ if opts[:disable_integrity] || opts[:disable_integrity_v2] || opts[:disable_user_triggers]
279
+ destination.transaction do
280
+ triggers = destination.triggers(table)
281
+ triggers.select! { |t| t["enabled"] == "t" }
282
+ internal_triggers, user_triggers = triggers.partition { |t| t["internal"] == "t" }
283
+ integrity_triggers = internal_triggers.select { |t| t["integrity"] == "t" }
284
+ restore_triggers = []
285
+
286
+ # both --disable-integrity options require superuser privileges
287
+ # however, only v2 works on Amazon RDS, which added specific support for it
288
+ # https://aws.amazon.com/about-aws/whats-new/2014/11/10/amazon-rds-postgresql-read-replicas/
289
+ #
290
+ # session_replication_role disables more than foreign keys (like triggers and rules)
291
+ # this is probably fine, but keep the current default for now
292
+ if opts[:disable_integrity_v2] || (opts[:disable_integrity] && rds?)
293
+ # SET LOCAL lasts until the end of the transaction
294
+ # https://www.postgresql.org/docs/current/sql-set.html
295
+ destination.execute("SET LOCAL session_replication_role = replica")
296
+ elsif opts[:disable_integrity]
297
+ integrity_triggers.each do |trigger|
298
+ destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER #{quote_ident(trigger["name"])}")
299
+ end
300
+ restore_triggers.concat(integrity_triggers)
301
+ end
302
+
303
+ if opts[:disable_user_triggers]
304
+ # important!
305
+ # rely on Postgres to disable user triggers
306
+ # we don't want to accidentally disable non-user triggers if logic above is off
307
+ destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER USER")
308
+ restore_triggers.concat(user_triggers)
309
+ end
310
+
311
+ result = yield
312
+
313
+ # restore triggers that were previously enabled
314
+ restore_triggers.each do |trigger|
315
+ destination.execute("ALTER TABLE #{quoted_table} ENABLE TRIGGER #{quote_ident(trigger["name"])}")
316
+ end
317
+
318
+ result
319
+ end
320
+ else
321
+ yield
322
+ end
323
+ end
324
+
325
+ def rds?
326
+ destination.execute("SELECT name, setting FROM pg_settings WHERE name LIKE 'rds.%'").any?
327
+ end
328
+ end
329
+ end