pgsync 0.5.3 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pgsync might be problematic. Click here for more details.

@@ -0,0 +1,28 @@
1
+ # minimal class to keep schema and table name separate
2
+ module PgSync
3
+ class Table
4
+ attr_reader :schema, :name
5
+
6
+ def initialize(schema, name)
7
+ @schema = schema
8
+ @name = name
9
+ end
10
+
11
+ def full_name
12
+ "#{schema}.#{name}"
13
+ end
14
+
15
+ def eql?(other)
16
+ other.schema == schema && other.name == name
17
+ end
18
+
19
+ # override hash when overriding eql?
20
+ def hash
21
+ [schema, name].hash
22
+ end
23
+
24
+ def to_s
25
+ full_name
26
+ end
27
+ end
28
+ end
@@ -2,236 +2,204 @@ module PgSync
2
2
  class TableSync
3
3
  include Utils
4
4
 
5
- def sync(config, table, opts, source_url, destination_url)
6
- start_time = Time.now
7
-
8
- source = DataSource.new(source_url, timeout: 0)
9
- destination = DataSource.new(destination_url, timeout: 0)
5
+ attr_reader :source, :destination, :tasks, :opts, :resolver
6
+
7
+ def initialize(source:, destination:, tasks:, opts:, resolver:)
8
+ @source = source
9
+ @destination = destination
10
+ @tasks = tasks
11
+ @opts = opts
12
+ @resolver = resolver
13
+ end
10
14
 
11
- from_connection = source.conn
12
- to_connection = destination.conn
15
+ def perform
16
+ confirm_tables_exist(destination, tasks, "destination")
13
17
 
14
- from_fields = source.columns(table)
15
- to_fields = destination.columns(table)
16
- shared_fields = to_fields & from_fields
17
- extra_fields = to_fields - from_fields
18
- missing_fields = from_fields - to_fields
18
+ add_columns
19
19
 
20
- if opts[:no_sequences]
21
- from_sequences = []
22
- to_sequences = []
23
- else
24
- from_sequences = source.sequences(table, shared_fields)
25
- to_sequences = destination.sequences(table, shared_fields)
26
- end
20
+ show_notes
27
21
 
28
- shared_sequences = to_sequences & from_sequences
29
- extra_sequences = to_sequences - from_sequences
30
- missing_sequences = from_sequences - to_sequences
31
-
32
- sql_clause = String.new("")
33
- sql_clause << " #{opts[:sql]}" if opts[:sql]
34
-
35
- notes = []
36
- notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
37
- notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
38
- notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
39
- notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
22
+ # don't sync tables with no shared fields
23
+ # we show a warning message above
24
+ run_tasks(tasks.reject { |task| task.shared_fields.empty? })
25
+ end
40
26
 
41
- return {status: "success", message: "No fields to copy"} if shared_fields.empty?
27
+ # TODO only query specific tables
28
+ # TODO add sequences, primary keys, etc
29
+ def add_columns
30
+ source_columns = columns(source)
31
+ destination_columns = columns(destination)
42
32
 
43
- bad_fields = opts[:no_rules] ? [] : config["data_rules"]
44
- primary_key = destination.primary_key(table)
45
- copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quote_ident_full(table)}.#{quote_ident(f)}" }.join(", ")
46
- fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
33
+ tasks.each do |task|
34
+ task.from_columns = source_columns[task.table] || []
35
+ task.to_columns = destination_columns[task.table] || []
36
+ end
37
+ end
47
38
 
48
- seq_values = {}
49
- shared_sequences.each do |seq|
50
- seq_values[seq] = source.last_value(seq)
39
+ def show_notes
40
+ # for tables
41
+ resolver.notes.each do |note|
42
+ warning note
51
43
  end
52
44
 
53
- copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{sql_clause}) TO STDOUT"
54
- if opts[:in_batches]
55
- raise Error, "Cannot use --overwrite with --in-batches" if opts[:overwrite]
56
- raise Error, "No primary key" unless primary_key
45
+ # for columns and sequences
46
+ tasks.each do |task|
47
+ task.notes.each do |note|
48
+ warning "#{task_name(task)}: #{note}"
49
+ end
50
+ end
57
51
 
58
- destination.truncate(table) if opts[:truncate]
52
+ # for non-deferrable constraints
53
+ if opts[:defer_constraints]
54
+ constraints = non_deferrable_constraints(destination)
55
+ constraints = tasks.flat_map { |t| constraints[t.table] || [] }
56
+ warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any?
57
+ end
58
+ end
59
59
 
60
- from_max_id = source.max_id(table, primary_key)
61
- to_max_id = destination.max_id(table, primary_key) + 1
60
+ def columns(data_source)
61
+ query = <<~SQL
62
+ SELECT
63
+ table_schema AS schema,
64
+ table_name AS table,
65
+ column_name AS column,
66
+ data_type AS type
67
+ FROM
68
+ information_schema.columns
69
+ ORDER BY 1, 2, 3
70
+ SQL
71
+ data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
72
+ [k, v.map { |r| {name: r["column"], type: r["type"]} }]
73
+ end.to_h
74
+ end
62
75
 
63
- if to_max_id == 1
64
- from_min_id = source.min_id(table, primary_key)
65
- to_max_id = from_min_id if from_min_id > 0
66
- end
76
+ def non_deferrable_constraints(data_source)
77
+ query = <<~SQL
78
+ SELECT
79
+ table_schema AS schema,
80
+ table_name AS table,
81
+ constraint_name
82
+ FROM
83
+ information_schema.table_constraints
84
+ WHERE
85
+ constraint_type = 'FOREIGN KEY' AND
86
+ is_deferrable = 'NO'
87
+ SQL
88
+ data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
89
+ [k, v.map { |r| r["constraint_name"] }]
90
+ end.to_h
91
+ end
67
92
 
68
- starting_id = to_max_id
69
- batch_size = opts[:batch_size]
93
+ def run_tasks(tasks, &block)
94
+ notices = []
95
+ failed_tables = []
70
96
 
71
- i = 1
72
- batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
97
+ spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
98
+ task_spinners = {}
99
+ started_at = {}
73
100
 
74
- while starting_id <= from_max_id
75
- where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
76
- log " #{i}/#{batch_count}: #{where}"
101
+ start = lambda do |task, i|
102
+ message = ":spinner #{display_item(task)}"
103
+ spinner = spinners.register(message)
104
+ if opts[:in_batches]
105
+ # log instead of spin for non-tty
106
+ log message.sub(":spinner", "⠋")
107
+ else
108
+ spinner.auto_spin
109
+ end
110
+ task_spinners[task] = spinner
111
+ started_at[task] = Time.now
112
+ end
77
113
 
78
- # TODO be smarter for advance sql clauses
79
- batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
114
+ finish = lambda do |task, i, result|
115
+ spinner = task_spinners[task]
116
+ time = (Time.now - started_at[task]).round(1)
80
117
 
81
- batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{batch_sql_clause}) TO STDOUT"
82
- to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
83
- from_connection.copy_data batch_copy_to_command do
84
- while (row = from_connection.get_copy_data)
85
- to_connection.put_copy_data(row)
86
- end
87
- end
118
+ message =
119
+ if result[:message]
120
+ "(#{result[:message].lines.first.to_s.strip})"
121
+ else
122
+ "- #{time}s"
88
123
  end
89
124
 
90
- starting_id += batch_size
91
- i += 1
125
+ notices.concat(result[:notices])
92
126
 
93
- if opts[:sleep] && starting_id <= from_max_id
94
- sleep(opts[:sleep])
95
- end
127
+ if result[:status] == "success"
128
+ spinner.success(message)
129
+ else
130
+ spinner.error(message)
131
+ failed_tables << task_name(task)
132
+ fail_sync(failed_tables) if opts[:fail_fast]
96
133
  end
97
134
 
98
- log # add extra line for spinner
99
- elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
100
- raise Error, "No primary key" unless primary_key
135
+ unless spinner.send(:tty?)
136
+ status = result[:status] == "success" ? "✔" : "✖"
137
+ log [status, display_item(task), message].join(" ")
138
+ end
139
+ end
101
140
 
102
- # create a temp table
103
- temp_table = "pgsync_#{rand(1_000_000_000)}"
104
- to_connection.exec("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS SELECT * FROM #{quote_ident_full(table)} WITH NO DATA")
141
+ options = {start: start, finish: finish}
105
142
 
106
- # load data
107
- to_connection.copy_data "COPY #{quote_ident_full(temp_table)} (#{fields}) FROM STDIN" do
108
- from_connection.copy_data copy_to_command do
109
- while (row = from_connection.get_copy_data)
110
- to_connection.put_copy_data(row)
111
- end
112
- end
113
- end
143
+ jobs = opts[:jobs]
144
+ if opts[:debug] || opts[:in_batches] || opts[:defer_constraints]
145
+ warning "--jobs ignored" if jobs
146
+ jobs = 0
147
+ end
114
148
 
115
- if opts[:preserve]
116
- # insert into
117
- to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident_full(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident_full(table)} WHERE #{quote_ident_full(table)}.#{quote_ident(primary_key)} = #{quote_ident_full(temp_table)}.#{quote_ident(primary_key)}))")
118
- else
119
- to_connection.transaction do
120
- to_connection.exec("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
121
- to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
122
- end
123
- end
149
+ if windows?
150
+ options[:in_threads] = jobs || 4
124
151
  else
125
- destination.truncate(table)
126
- to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
127
- from_connection.copy_data copy_to_command do
128
- while (row = from_connection.get_copy_data)
129
- to_connection.put_copy_data(row)
130
- end
131
- end
132
- end
133
- end
134
- seq_values.each do |seq, value|
135
- to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
152
+ options[:in_processes] = jobs if jobs
136
153
  end
137
154
 
138
- message = nil
139
- message = notes.join(", ") if notes.any?
140
-
141
- {status: "success", message: message, time: (Time.now - start_time).round(1)}
142
- rescue => e
143
- message =
144
- case e
145
- when PG::ConnectionBad
146
- # likely fine to show simplified message here
147
- # the full message will be shown when first trying to connect
148
- "Connection failed"
149
- when PG::Error
150
- e.message.sub("ERROR: ", "")
151
- when Error
152
- e.message
153
- else
154
- "#{e.class.name}: #{e.message}"
155
- end
155
+ maybe_defer_constraints do
156
+ # could try to use `raise Parallel::Kill` to fail faster with --fail-fast
157
+ # see `fast_faster` branch
158
+ # however, need to make sure connections are cleaned up properly
159
+ Parallel.each(tasks, **options) do |task|
160
+ source.reconnect_if_needed
161
+ destination.reconnect_if_needed
156
162
 
157
- {status: "error", message: message}
158
- ensure
159
- source.close if source
160
- destination.close if destination
161
- end
163
+ task.perform
164
+ end
165
+ end
162
166
 
163
- private
167
+ notices.each do |notice|
168
+ warning notice
169
+ end
164
170
 
165
- # TODO better performance
166
- def rule_match?(table, column, rule)
167
- regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
168
- regex.match(column) || regex.match("#{table.split(".", 2)[-1]}.#{column}") || regex.match("#{table}.#{column}")
171
+ fail_sync(failed_tables) if failed_tables.any?
169
172
  end
170
173
 
171
- # TODO wildcard rules
172
- def apply_strategy(rule, table, column, primary_key)
173
- if rule.is_a?(Hash)
174
- if rule.key?("value")
175
- escape(rule["value"])
176
- elsif rule.key?("statement")
177
- rule["statement"]
178
- else
179
- raise Error, "Unknown rule #{rule.inspect} for column #{column}"
174
+ def maybe_defer_constraints
175
+ if opts[:defer_constraints]
176
+ destination.transaction do
177
+ destination.execute("SET CONSTRAINTS ALL DEFERRED")
178
+
179
+ # create a transaction on the source
180
+ # to ensure we get a consistent snapshot
181
+ source.transaction do
182
+ yield
183
+ end
180
184
  end
181
185
  else
182
- case rule
183
- when "untouched"
184
- quote_ident(column)
185
- when "unique_email"
186
- "'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
187
- when "unique_phone"
188
- "(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
189
- when "unique_secret"
190
- "'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
191
- when "random_int", "random_number"
192
- "(RANDOM() * 100)::int"
193
- when "random_date"
194
- "date '1970-01-01' + (RANDOM() * 10000)::int"
195
- when "random_time"
196
- "NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
197
- when "random_ip"
198
- "(1 + RANDOM() * 254)::int::text || '.0.0.1'"
199
- when "random_letter"
200
- "chr(65 + (RANDOM() * 26)::int)"
201
- when "random_string"
202
- "RIGHT(MD5(RANDOM()::text), 10)"
203
- when "null", nil
204
- "NULL"
205
- else
206
- raise Error, "Unknown rule #{rule} for column #{column}"
207
- end
186
+ yield
208
187
  end
209
188
  end
210
189
 
211
- def quoted_primary_key(table, primary_key, rule)
212
- raise "Primary key required for this data rule: #{rule}" unless primary_key
213
- "#{quote_ident_full(table)}.#{quote_ident(primary_key)}"
190
+ def fail_sync(failed_tables)
191
+ raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}"
214
192
  end
215
193
 
216
- def quote_ident_full(ident)
217
- ident.split(".").map { |v| quote_ident(v) }.join(".")
218
- end
219
-
220
- def quote_ident(value)
221
- PG::Connection.quote_ident(value)
222
- end
223
-
224
- def escape(value)
225
- if value.is_a?(String)
226
- "'#{quote_string(value)}'"
227
- else
228
- value
229
- end
194
+ def display_item(item)
195
+ messages = []
196
+ messages << task_name(item)
197
+ messages << item.opts[:sql] if item.opts[:sql]
198
+ messages.join(" ")
230
199
  end
231
200
 
232
- # activerecord
233
- def quote_string(s)
234
- s.gsub(/\\/, '\&\&').gsub(/'/, "''")
201
+ def windows?
202
+ Gem.win_platform?
235
203
  end
236
204
  end
237
205
  end
@@ -0,0 +1,329 @@
1
+ module PgSync
2
+ class Task
3
+ include Utils
4
+
5
+ attr_reader :source, :destination, :config, :table, :opts
6
+ attr_accessor :from_columns, :to_columns
7
+
8
+ def initialize(source:, destination:, config:, table:, opts:)
9
+ @source = source
10
+ @destination = destination
11
+ @config = config
12
+ @table = table
13
+ @opts = opts
14
+ end
15
+
16
+ def quoted_table
17
+ quote_ident_full(table)
18
+ end
19
+
20
+ def perform
21
+ with_notices do
22
+ handle_errors do
23
+ maybe_disable_triggers do
24
+ sync_data
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ def from_fields
31
+ @from_fields ||= from_columns.map { |c| c[:name] }
32
+ end
33
+
34
+ def to_fields
35
+ @to_fields ||= to_columns.map { |c| c[:name] }
36
+ end
37
+
38
+ def shared_fields
39
+ @shared_fields ||= to_fields & from_fields
40
+ end
41
+
42
+ def from_sequences
43
+ @from_sequences ||= opts[:no_sequences] ? [] : source.sequences(table, shared_fields)
44
+ end
45
+
46
+ def to_sequences
47
+ @to_sequences ||= opts[:no_sequences] ? [] : destination.sequences(table, shared_fields)
48
+ end
49
+
50
+ def shared_sequences
51
+ @shared_sequences ||= to_sequences & from_sequences
52
+ end
53
+
54
+ def notes
55
+ notes = []
56
+ if shared_fields.empty?
57
+ notes << "No fields to copy"
58
+ else
59
+ extra_fields = to_fields - from_fields
60
+ notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
61
+
62
+ missing_fields = from_fields - to_fields
63
+ notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
64
+
65
+ extra_sequences = to_sequences - from_sequences
66
+ notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
67
+
68
+ missing_sequences = from_sequences - to_sequences
69
+ notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
70
+
71
+ from_types = from_columns.map { |c| [c[:name], c[:type]] }.to_h
72
+ to_types = to_columns.map { |c| [c[:name], c[:type]] }.to_h
73
+ different_types = []
74
+ shared_fields.each do |field|
75
+ if from_types[field] != to_types[field]
76
+ different_types << "#{field} (#{from_types[field]} -> #{to_types[field]})"
77
+ end
78
+ end
79
+ notes << "Different column types: #{different_types.join(", ")}" if different_types.any?
80
+ end
81
+ notes
82
+ end
83
+
84
+ def sync_data
85
+ raise Error, "This should never happen. Please file a bug." if shared_fields.empty?
86
+
87
+ sql_clause = String.new("")
88
+ sql_clause << " #{opts[:sql]}" if opts[:sql]
89
+
90
+ bad_fields = opts[:no_rules] ? [] : config["data_rules"]
91
+ primary_key = destination.primary_key(table)
92
+ copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quoted_table}.#{quote_ident(f)}" }.join(", ")
93
+ fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
94
+
95
+ seq_values = {}
96
+ shared_sequences.each do |seq|
97
+ seq_values[seq] = source.last_value(seq)
98
+ end
99
+
100
+ copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{sql_clause}) TO STDOUT"
101
+ if opts[:in_batches]
102
+ raise Error, "No primary key" if primary_key.empty?
103
+ primary_key = primary_key.first
104
+
105
+ destination.truncate(table) if opts[:truncate]
106
+
107
+ from_max_id = source.max_id(table, primary_key)
108
+ to_max_id = destination.max_id(table, primary_key) + 1
109
+
110
+ if to_max_id == 1
111
+ from_min_id = source.min_id(table, primary_key)
112
+ to_max_id = from_min_id if from_min_id > 0
113
+ end
114
+
115
+ starting_id = to_max_id
116
+ batch_size = opts[:batch_size]
117
+
118
+ i = 1
119
+ batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
120
+
121
+ while starting_id <= from_max_id
122
+ where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
123
+ log " #{i}/#{batch_count}: #{where}"
124
+
125
+ # TODO be smarter for advance sql clauses
126
+ batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
127
+
128
+ batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{batch_sql_clause}) TO STDOUT"
129
+ copy(batch_copy_to_command, dest_table: table, dest_fields: fields)
130
+
131
+ starting_id += batch_size
132
+ i += 1
133
+
134
+ if opts[:sleep] && starting_id <= from_max_id
135
+ sleep(opts[:sleep])
136
+ end
137
+ end
138
+ elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
139
+ raise Error, "No primary key" if primary_key.empty?
140
+
141
+ # create a temp table
142
+ temp_table = "pgsync_#{rand(1_000_000_000)}"
143
+ destination.execute("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS TABLE #{quoted_table} WITH NO DATA")
144
+
145
+ # load data
146
+ copy(copy_to_command, dest_table: temp_table, dest_fields: fields)
147
+
148
+ on_conflict = primary_key.map { |pk| quote_ident(pk) }.join(", ")
149
+ action =
150
+ if opts[:preserve]
151
+ "NOTHING"
152
+ else # overwrite or sql clause
153
+ setter = shared_fields.reject { |f| primary_key.include?(f) }.map { |f| "#{quote_ident(f)} = EXCLUDED.#{quote_ident(f)}" }
154
+ "UPDATE SET #{setter.join(", ")}"
155
+ end
156
+ destination.execute("INSERT INTO #{quoted_table} (SELECT * FROM #{quote_ident_full(temp_table)}) ON CONFLICT (#{on_conflict}) DO #{action}")
157
+ else
158
+ # use delete instead of truncate for foreign keys
159
+ if opts[:defer_constraints]
160
+ destination.execute("DELETE FROM #{quoted_table}")
161
+ else
162
+ destination.truncate(table)
163
+ end
164
+ copy(copy_to_command, dest_table: table, dest_fields: fields)
165
+ end
166
+ seq_values.each do |seq, value|
167
+ destination.execute("SELECT setval(#{escape(seq)}, #{escape(value)})")
168
+ end
169
+
170
+ {status: "success"}
171
+ end
172
+
173
+ private
174
+
175
+ def with_notices
176
+ notices = []
177
+ [source, destination].each do |data_source|
178
+ data_source.send(:conn).set_notice_processor do |message|
179
+ notices << message.strip
180
+ end
181
+ end
182
+ result = yield
183
+ result[:notices] = notices if result
184
+ result
185
+ ensure
186
+ # clear notice processor
187
+ [source, destination].each do |data_source|
188
+ data_source.send(:conn).set_notice_processor
189
+ end
190
+ end
191
+
192
+ # TODO add retries
193
+ def handle_errors
194
+ yield
195
+ rescue => e
196
+ raise e if opts[:debug]
197
+
198
+ message =
199
+ case e
200
+ when PG::ConnectionBad
201
+ # likely fine to show simplified message here
202
+ # the full message will be shown when first trying to connect
203
+ "Connection failed"
204
+ when PG::Error
205
+ e.message.sub("ERROR: ", "")
206
+ when Error
207
+ e.message
208
+ else
209
+ "#{e.class.name}: #{e.message}"
210
+ end
211
+
212
+ {status: "error", message: message}
213
+ end
214
+
215
+ def copy(source_command, dest_table:, dest_fields:)
216
+ destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN"
217
+ destination.conn.copy_data(destination_command) do
218
+ source.conn.copy_data(source_command) do
219
+ while (row = source.conn.get_copy_data)
220
+ destination.conn.put_copy_data(row)
221
+ end
222
+ end
223
+ end
224
+ end
225
+
226
+ # TODO better performance
227
+ def rule_match?(table, column, rule)
228
+ regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
229
+ regex.match(column) || regex.match("#{table.name}.#{column}") || regex.match("#{table.schema}.#{table.name}.#{column}")
230
+ end
231
+
232
+ # TODO wildcard rules
233
+ def apply_strategy(rule, table, column, primary_key)
234
+ if rule.is_a?(Hash)
235
+ if rule.key?("value")
236
+ escape(rule["value"])
237
+ elsif rule.key?("statement")
238
+ rule["statement"]
239
+ else
240
+ raise Error, "Unknown rule #{rule.inspect} for column #{column}"
241
+ end
242
+ else
243
+ case rule
244
+ when "untouched"
245
+ quote_ident(column)
246
+ when "unique_email"
247
+ "'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
248
+ when "unique_phone"
249
+ "(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
250
+ when "unique_secret"
251
+ "'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
252
+ when "random_int", "random_number"
253
+ "(RANDOM() * 100)::int"
254
+ when "random_date"
255
+ "date '1970-01-01' + (RANDOM() * 10000)::int"
256
+ when "random_time"
257
+ "NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
258
+ when "random_ip"
259
+ "(1 + RANDOM() * 254)::int::text || '.0.0.1'"
260
+ when "random_letter"
261
+ "chr(65 + (RANDOM() * 26)::int)"
262
+ when "random_string"
263
+ "RIGHT(MD5(RANDOM()::text), 10)"
264
+ when "null", nil
265
+ "NULL"
266
+ else
267
+ raise Error, "Unknown rule #{rule} for column #{column}"
268
+ end
269
+ end
270
+ end
271
+
272
+ def quoted_primary_key(table, primary_key, rule)
273
+ raise Error, "Single column primary key required for this data rule: #{rule}" unless primary_key.size == 1
274
+ "#{quoted_table}.#{quote_ident(primary_key.first)}"
275
+ end
276
+
277
+ def maybe_disable_triggers
278
+ if opts[:disable_integrity] || opts[:disable_integrity_v2] || opts[:disable_user_triggers]
279
+ destination.transaction do
280
+ triggers = destination.triggers(table)
281
+ triggers.select! { |t| t["enabled"] == "t" }
282
+ internal_triggers, user_triggers = triggers.partition { |t| t["internal"] == "t" }
283
+ integrity_triggers = internal_triggers.select { |t| t["integrity"] == "t" }
284
+ restore_triggers = []
285
+
286
+ # both --disable-integrity options require superuser privileges
287
+ # however, only v2 works on Amazon RDS, which added specific support for it
288
+ # https://aws.amazon.com/about-aws/whats-new/2014/11/10/amazon-rds-postgresql-read-replicas/
289
+ #
290
+ # session_replication_role disables more than foreign keys (like triggers and rules)
291
+ # this is probably fine, but keep the current default for now
292
+ if opts[:disable_integrity_v2] || (opts[:disable_integrity] && rds?)
293
+ # SET LOCAL lasts until the end of the transaction
294
+ # https://www.postgresql.org/docs/current/sql-set.html
295
+ destination.execute("SET LOCAL session_replication_role = replica")
296
+ elsif opts[:disable_integrity]
297
+ integrity_triggers.each do |trigger|
298
+ destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER #{quote_ident(trigger["name"])}")
299
+ end
300
+ restore_triggers.concat(integrity_triggers)
301
+ end
302
+
303
+ if opts[:disable_user_triggers]
304
+ # important!
305
+ # rely on Postgres to disable user triggers
306
+ # we don't want to accidentally disable non-user triggers if logic above is off
307
+ destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER USER")
308
+ restore_triggers.concat(user_triggers)
309
+ end
310
+
311
+ result = yield
312
+
313
+ # restore triggers that were previously enabled
314
+ restore_triggers.each do |trigger|
315
+ destination.execute("ALTER TABLE #{quoted_table} ENABLE TRIGGER #{quote_ident(trigger["name"])}")
316
+ end
317
+
318
+ result
319
+ end
320
+ else
321
+ yield
322
+ end
323
+ end
324
+
325
+ def rds?
326
+ destination.execute("SELECT name, setting FROM pg_settings WHERE name LIKE 'rds.%'").any?
327
+ end
328
+ end
329
+ end