pgsync 0.5.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pgsync might be problematic. Click here for more details.

@@ -0,0 +1,28 @@
1
+ # minimal class to keep schema and table name separate
2
+ module PgSync
3
+ class Table
4
+ attr_reader :schema, :name
5
+
6
+ def initialize(schema, name)
7
+ @schema = schema
8
+ @name = name
9
+ end
10
+
11
+ def full_name
12
+ "#{schema}.#{name}"
13
+ end
14
+
15
+ def eql?(other)
16
+ other.schema == schema && other.name == name
17
+ end
18
+
19
+ # override hash when overriding eql?
20
+ def hash
21
+ [schema, name].hash
22
+ end
23
+
24
+ def to_s
25
+ full_name
26
+ end
27
+ end
28
+ end
@@ -1,245 +1,205 @@
1
1
  module PgSync
2
2
  class TableSync
3
- def sync(config, table, opts, source_url, destination_url)
4
- start_time = Time.now
5
- source = DataSource.new(source_url, timeout: 0)
6
- destination = DataSource.new(destination_url, timeout: 0)
7
-
8
- begin
9
- from_connection = source.conn
10
- to_connection = destination.conn
11
-
12
- bad_fields = opts[:no_rules] ? [] : config["data_rules"]
13
-
14
- from_fields = source.columns(table)
15
- to_fields = destination.columns(table)
16
- shared_fields = to_fields & from_fields
17
- extra_fields = to_fields - from_fields
18
- missing_fields = from_fields - to_fields
19
-
20
- if opts[:no_sequences]
21
- from_sequences = []
22
- to_sequences = []
23
- else
24
- from_sequences = source.sequences(table, shared_fields)
25
- to_sequences = destination.sequences(table, shared_fields)
26
- end
3
+ include Utils
27
4
 
28
- shared_sequences = to_sequences & from_sequences
29
- extra_sequences = to_sequences - from_sequences
30
- missing_sequences = from_sequences - to_sequences
5
+ attr_reader :source, :destination, :tasks, :opts, :resolver
31
6
 
32
- sql_clause = String.new
7
+ def initialize(source:, destination:, tasks:, opts:, resolver:)
8
+ @source = source
9
+ @destination = destination
10
+ @tasks = tasks
11
+ @opts = opts
12
+ @resolver = resolver
13
+ end
33
14
 
34
- if opts[:sql]
35
- sql_clause << " #{opts[:sql]}"
36
- end
15
+ def perform
16
+ confirm_tables_exist(destination, tasks, "destination")
37
17
 
38
- notes = []
39
- notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
40
- notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
41
- notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
42
- notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
18
+ add_columns
43
19
 
44
- if shared_fields.empty?
45
- return {status: "success", message: "No fields to copy"}
46
- end
20
+ show_notes
47
21
 
48
- if shared_fields.any?
49
- primary_key = destination.primary_key(table)
50
- copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quote_ident_full(table)}.#{quote_ident(f)}" }.join(", ")
51
- fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
22
+ # don't sync tables with no shared fields
23
+ # we show a warning message above
24
+ run_tasks(tasks.reject { |task| task.shared_fields.empty? })
25
+ end
52
26
 
53
- seq_values = {}
54
- shared_sequences.each do |seq|
55
- seq_values[seq] = source.last_value(seq)
56
- end
27
+ # TODO only query specific tables
28
+ # TODO add sequences, primary keys, etc
29
+ def add_columns
30
+ source_columns = columns(source)
31
+ destination_columns = columns(destination)
57
32
 
58
- copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{sql_clause}) TO STDOUT"
59
- if opts[:in_batches]
60
- raise PgSync::Error, "Cannot use --overwrite with --in-batches" if opts[:overwrite]
61
- raise PgSync::Error, "No primary key" unless primary_key
62
-
63
- destination.truncate(table) if opts[:truncate]
64
-
65
- from_max_id = source.max_id(table, primary_key)
66
- to_max_id = destination.max_id(table, primary_key) + 1
67
-
68
- if to_max_id == 1
69
- from_min_id = source.min_id(table, primary_key)
70
- to_max_id = from_min_id if from_min_id > 0
71
- end
72
-
73
- starting_id = to_max_id
74
- batch_size = opts[:batch_size]
75
-
76
- i = 1
77
- batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
78
-
79
- while starting_id <= from_max_id
80
- where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
81
- log " #{i}/#{batch_count}: #{where}"
82
-
83
- # TODO be smarter for advance sql clauses
84
- batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
85
-
86
- batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{batch_sql_clause}) TO STDOUT"
87
- to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
88
- from_connection.copy_data batch_copy_to_command do
89
- while (row = from_connection.get_copy_data)
90
- to_connection.put_copy_data(row)
91
- end
92
- end
93
- end
94
-
95
- starting_id += batch_size
96
- i += 1
97
-
98
- if opts[:sleep] && starting_id <= from_max_id
99
- sleep(opts[:sleep])
100
- end
101
- end
102
- elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
103
- raise PgSync::Error, "No primary key" unless primary_key
104
-
105
- temp_table = "pgsync_#{rand(1_000_000_000)}"
106
- file = Tempfile.new(temp_table)
107
- begin
108
- from_connection.copy_data copy_to_command do
109
- while (row = from_connection.get_copy_data)
110
- file.write(row)
111
- end
112
- end
113
- file.rewind
114
-
115
- # create a temp table
116
- to_connection.exec("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS SELECT * FROM #{quote_ident_full(table)} WITH NO DATA")
117
-
118
- # load file
119
- to_connection.copy_data "COPY #{quote_ident_full(temp_table)} (#{fields}) FROM STDIN" do
120
- file.each do |row|
121
- to_connection.put_copy_data(row)
122
- end
123
- end
124
-
125
- if opts[:preserve]
126
- # insert into
127
- to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident_full(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident_full(table)} WHERE #{quote_ident_full(table)}.#{quote_ident(primary_key)} = #{quote_ident_full(temp_table)}.#{quote_ident(primary_key)}))")
128
- else
129
- to_connection.transaction do
130
- to_connection.exec("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
131
- to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
132
- end
133
- end
134
- ensure
135
- file.close
136
- file.unlink
137
- end
138
- else
139
- destination.truncate(table)
140
- to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
141
- from_connection.copy_data copy_to_command do
142
- while (row = from_connection.get_copy_data)
143
- to_connection.put_copy_data(row)
144
- end
145
- end
146
- end
147
- end
148
- seq_values.each do |seq, value|
149
- to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
150
- end
151
- end
33
+ tasks.each do |task|
34
+ task.from_columns = source_columns[task.table] || []
35
+ task.to_columns = destination_columns[task.table] || []
36
+ end
37
+ end
152
38
 
153
- message = nil
154
- if notes.any?
155
- message = notes.join(", ")
39
+ def show_notes
40
+ # for tables
41
+ resolver.notes.each do |note|
42
+ warning note
43
+ end
44
+
45
+ # for columns and sequences
46
+ tasks.each do |task|
47
+ task.notes.each do |note|
48
+ warning "#{task_name(task)}: #{note}"
156
49
  end
50
+ end
157
51
 
158
- {status: "success", message: message, time: (Time.now - start_time).round(1)}
159
- ensure
160
- source.close
161
- destination.close
52
+ # for non-deferrable constraints
53
+ if opts[:defer_constraints]
54
+ constraints = non_deferrable_constraints(destination)
55
+ constraints = tasks.flat_map { |t| constraints[t.table] || [] }
56
+ warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any?
162
57
  end
163
- rescue PgSync::Error => e
164
- {status: "error", message: e.message}
165
58
  end
166
59
 
167
- private
60
+ def columns(data_source)
61
+ query = <<~SQL
62
+ SELECT
63
+ table_schema AS schema,
64
+ table_name AS table,
65
+ column_name AS column,
66
+ data_type AS type
67
+ FROM
68
+ information_schema.columns
69
+ ORDER BY 1, 2, 3
70
+ SQL
71
+ data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
72
+ [k, v.map { |r| {name: r["column"], type: r["type"]} }]
73
+ end.to_h
74
+ end
168
75
 
169
- # TODO better performance
170
- def rule_match?(table, column, rule)
171
- regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
172
- regex.match(column) || regex.match("#{table.split(".", 2)[-1]}.#{column}") || regex.match("#{table}.#{column}")
76
+ def non_deferrable_constraints(data_source)
77
+ query = <<~SQL
78
+ SELECT
79
+ table_schema AS schema,
80
+ table_name AS table,
81
+ constraint_name
82
+ FROM
83
+ information_schema.table_constraints
84
+ WHERE
85
+ constraint_type = 'FOREIGN KEY' AND
86
+ is_deferrable = 'NO'
87
+ SQL
88
+ data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
89
+ [k, v.map { |r| r["constraint_name"] }]
90
+ end.to_h
173
91
  end
174
92
 
175
- # TODO wildcard rules
176
- def apply_strategy(rule, table, column, primary_key)
177
- if rule.is_a?(Hash)
178
- if rule.key?("value")
179
- escape(rule["value"])
180
- elsif rule.key?("statement")
181
- rule["statement"]
93
+ def run_tasks(tasks, &block)
94
+ notices = []
95
+ failed_tables = []
96
+
97
+ spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
98
+ task_spinners = {}
99
+ started_at = {}
100
+
101
+ start = lambda do |task, i|
102
+ message = ":spinner #{display_item(task)}"
103
+ spinner = spinners.register(message)
104
+ if opts[:in_batches]
105
+ # log instead of spin for non-tty
106
+ log message.sub(":spinner", "⠋")
182
107
  else
183
- raise PgSync::Error, "Unknown rule #{rule.inspect} for column #{column}"
108
+ spinner.auto_spin
184
109
  end
185
- else
186
- case rule
187
- when "untouched"
188
- quote_ident(column)
189
- when "unique_email"
190
- "'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
191
- when "unique_phone"
192
- "(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
193
- when "unique_secret"
194
- "'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
195
- when "random_int", "random_number"
196
- "(RANDOM() * 100)::int"
197
- when "random_date"
198
- "date '1970-01-01' + (RANDOM() * 10000)::int"
199
- when "random_time"
200
- "NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
201
- when "random_ip"
202
- "(1 + RANDOM() * 254)::int::text || '.0.0.1'"
203
- when "random_letter"
204
- "chr(65 + (RANDOM() * 26)::int)"
205
- when "random_string"
206
- "RIGHT(MD5(RANDOM()::text), 10)"
207
- when "null", nil
208
- "NULL"
110
+ task_spinners[task] = spinner
111
+ started_at[task] = Time.now
112
+ end
113
+
114
+ finish = lambda do |task, i, result|
115
+ spinner = task_spinners[task]
116
+ time = (Time.now - started_at[task]).round(1)
117
+
118
+ message =
119
+ if result[:message]
120
+ "(#{result[:message].lines.first.to_s.strip})"
121
+ else
122
+ "- #{time}s"
123
+ end
124
+
125
+ notices.concat(result[:notices])
126
+
127
+ if result[:status] == "success"
128
+ spinner.success(message)
209
129
  else
210
- raise PgSync::Error, "Unknown rule #{rule} for column #{column}"
130
+ spinner.error(message)
131
+ failed_tables << task_name(task)
132
+ fail_sync(failed_tables) if opts[:fail_fast]
133
+ end
134
+
135
+ unless spinner.send(:tty?)
136
+ status = result[:status] == "success" ? "✔" : "✖"
137
+ log [status, display_item(task), message].join(" ")
211
138
  end
212
139
  end
213
- end
214
140
 
215
- def quoted_primary_key(table, primary_key, rule)
216
- raise "Primary key required for this data rule: #{rule}" unless primary_key
217
- "#{quote_ident_full(table)}.#{quote_ident(primary_key)}"
218
- end
141
+ options = {start: start, finish: finish}
219
142
 
220
- def log(message = nil)
221
- $stderr.puts message
222
- end
143
+ jobs = opts[:jobs]
144
+ if opts[:debug] || opts[:in_batches] || opts[:defer_constraints]
145
+ warning "--jobs ignored" if jobs
146
+ jobs = 0
147
+ end
223
148
 
224
- def quote_ident_full(ident)
225
- ident.split(".").map { |v| quote_ident(v) }.join(".")
226
- end
149
+ if windows?
150
+ options[:in_threads] = jobs || 4
151
+ else
152
+ options[:in_processes] = jobs if jobs
153
+ end
154
+
155
+ maybe_defer_constraints do
156
+ # could try to use `raise Parallel::Kill` to fail faster with --fail-fast
157
+ # see `fast_faster` branch
158
+ # however, need to make sure connections are cleaned up properly
159
+ Parallel.each(tasks, **options) do |task|
160
+ source.reconnect_if_needed
161
+ destination.reconnect_if_needed
227
162
 
228
- def quote_ident(value)
229
- PG::Connection.quote_ident(value)
163
+ task.perform
164
+ end
165
+ end
166
+
167
+ notices.each do |notice|
168
+ warning notice
169
+ end
170
+
171
+ fail_sync(failed_tables) if failed_tables.any?
230
172
  end
231
173
 
232
- def escape(value)
233
- if value.is_a?(String)
234
- "'#{quote_string(value)}'"
174
+ def maybe_defer_constraints
175
+ if opts[:defer_constraints]
176
+ destination.transaction do
177
+ destination.execute("SET CONSTRAINTS ALL DEFERRED")
178
+
179
+ # create a transaction on the source
180
+ # to ensure we get a consistent snapshot
181
+ source.transaction do
182
+ yield
183
+ end
184
+ end
235
185
  else
236
- value
186
+ yield
237
187
  end
238
188
  end
239
189
 
240
- # activerecord
241
- def quote_string(s)
242
- s.gsub(/\\/, '\&\&').gsub(/'/, "''")
190
+ def fail_sync(failed_tables)
191
+ raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}"
192
+ end
193
+
194
+ def display_item(item)
195
+ messages = []
196
+ messages << task_name(item)
197
+ messages << item.opts[:sql] if item.opts[:sql]
198
+ messages.join(" ")
199
+ end
200
+
201
+ def windows?
202
+ Gem.win_platform?
243
203
  end
244
204
  end
245
205
  end
@@ -0,0 +1,315 @@
1
+ module PgSync
2
+ class Task
3
+ include Utils
4
+
5
+ attr_reader :source, :destination, :config, :table, :opts
6
+ attr_accessor :from_columns, :to_columns
7
+
8
+ def initialize(source:, destination:, config:, table:, opts:)
9
+ @source = source
10
+ @destination = destination
11
+ @config = config
12
+ @table = table
13
+ @opts = opts
14
+ end
15
+
16
+ def quoted_table
17
+ quote_ident_full(table)
18
+ end
19
+
20
+ def perform
21
+ with_notices do
22
+ handle_errors do
23
+ maybe_disable_triggers do
24
+ sync_data
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ def from_fields
31
+ @from_fields ||= from_columns.map { |c| c[:name] }
32
+ end
33
+
34
+ def to_fields
35
+ @to_fields ||= to_columns.map { |c| c[:name] }
36
+ end
37
+
38
+ def shared_fields
39
+ @shared_fields ||= to_fields & from_fields
40
+ end
41
+
42
+ def from_sequences
43
+ @from_sequences ||= opts[:no_sequences] ? [] : source.sequences(table, shared_fields)
44
+ end
45
+
46
+ def to_sequences
47
+ @to_sequences ||= opts[:no_sequences] ? [] : destination.sequences(table, shared_fields)
48
+ end
49
+
50
+ def shared_sequences
51
+ @shared_sequences ||= to_sequences & from_sequences
52
+ end
53
+
54
+ def notes
55
+ notes = []
56
+ if shared_fields.empty?
57
+ notes << "No fields to copy"
58
+ else
59
+ extra_fields = to_fields - from_fields
60
+ notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
61
+
62
+ missing_fields = from_fields - to_fields
63
+ notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
64
+
65
+ extra_sequences = to_sequences - from_sequences
66
+ notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
67
+
68
+ missing_sequences = from_sequences - to_sequences
69
+ notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
70
+
71
+ from_types = from_columns.map { |c| [c[:name], c[:type]] }.to_h
72
+ to_types = to_columns.map { |c| [c[:name], c[:type]] }.to_h
73
+ different_types = []
74
+ shared_fields.each do |field|
75
+ if from_types[field] != to_types[field]
76
+ different_types << "#{field} (#{from_types[field]} -> #{to_types[field]})"
77
+ end
78
+ end
79
+ notes << "Different column types: #{different_types.join(", ")}" if different_types.any?
80
+ end
81
+ notes
82
+ end
83
+
84
+ def sync_data
85
+ raise Error, "This should never happen. Please file a bug." if shared_fields.empty?
86
+
87
+ sql_clause = String.new("")
88
+ sql_clause << " #{opts[:sql]}" if opts[:sql]
89
+
90
+ bad_fields = opts[:no_rules] ? [] : config["data_rules"]
91
+ primary_key = destination.primary_key(table)
92
+ copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quoted_table}.#{quote_ident(f)}" }.join(", ")
93
+ fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
94
+
95
+ seq_values = {}
96
+ shared_sequences.each do |seq|
97
+ seq_values[seq] = source.last_value(seq)
98
+ end
99
+
100
+ copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{sql_clause}) TO STDOUT"
101
+ if opts[:in_batches]
102
+ raise Error, "No primary key" if primary_key.empty?
103
+ primary_key = primary_key.first
104
+
105
+ destination.truncate(table) if opts[:truncate]
106
+
107
+ from_max_id = source.max_id(table, primary_key)
108
+ to_max_id = destination.max_id(table, primary_key) + 1
109
+
110
+ if to_max_id == 1
111
+ from_min_id = source.min_id(table, primary_key)
112
+ to_max_id = from_min_id if from_min_id > 0
113
+ end
114
+
115
+ starting_id = to_max_id
116
+ batch_size = opts[:batch_size]
117
+
118
+ i = 1
119
+ batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
120
+
121
+ while starting_id <= from_max_id
122
+ where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
123
+ log " #{i}/#{batch_count}: #{where}"
124
+
125
+ # TODO be smarter for advance sql clauses
126
+ batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
127
+
128
+ batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{batch_sql_clause}) TO STDOUT"
129
+ copy(batch_copy_to_command, dest_table: table, dest_fields: fields)
130
+
131
+ starting_id += batch_size
132
+ i += 1
133
+
134
+ if opts[:sleep] && starting_id <= from_max_id
135
+ sleep(opts[:sleep])
136
+ end
137
+ end
138
+ elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
139
+ raise Error, "No primary key" if primary_key.empty?
140
+
141
+ # create a temp table
142
+ temp_table = "pgsync_#{rand(1_000_000_000)}"
143
+ destination.execute("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS TABLE #{quoted_table} WITH NO DATA")
144
+
145
+ # load data
146
+ copy(copy_to_command, dest_table: temp_table, dest_fields: fields)
147
+
148
+ on_conflict = primary_key.map { |pk| quote_ident(pk) }.join(", ")
149
+ action =
150
+ if opts[:preserve]
151
+ "NOTHING"
152
+ else # overwrite or sql clause
153
+ setter = shared_fields.reject { |f| primary_key.include?(f) }.map { |f| "#{quote_ident(f)} = EXCLUDED.#{quote_ident(f)}" }
154
+ "UPDATE SET #{setter.join(", ")}"
155
+ end
156
+ destination.execute("INSERT INTO #{quoted_table} (SELECT * FROM #{quote_ident_full(temp_table)}) ON CONFLICT (#{on_conflict}) DO #{action}")
157
+ else
158
+ # use delete instead of truncate for foreign keys
159
+ if opts[:defer_constraints]
160
+ destination.execute("DELETE FROM #{quoted_table}")
161
+ else
162
+ destination.truncate(table)
163
+ end
164
+ copy(copy_to_command, dest_table: table, dest_fields: fields)
165
+ end
166
+ seq_values.each do |seq, value|
167
+ destination.execute("SELECT setval(#{escape(seq)}, #{escape(value)})")
168
+ end
169
+
170
+ {status: "success"}
171
+ end
172
+
173
+ private
174
+
175
+ def with_notices
176
+ notices = []
177
+ [source, destination].each do |data_source|
178
+ data_source.send(:conn).set_notice_processor do |message|
179
+ notices << message.strip
180
+ end
181
+ end
182
+ result = yield
183
+ result[:notices] = notices if result
184
+ result
185
+ ensure
186
+ # clear notice processor
187
+ [source, destination].each do |data_source|
188
+ data_source.send(:conn).set_notice_processor
189
+ end
190
+ end
191
+
192
+ # TODO add retries
193
+ def handle_errors
194
+ yield
195
+ rescue => e
196
+ raise e if opts[:debug]
197
+
198
+ message =
199
+ case e
200
+ when PG::ConnectionBad
201
+ # likely fine to show simplified message here
202
+ # the full message will be shown when first trying to connect
203
+ "Connection failed"
204
+ when PG::Error
205
+ e.message.sub("ERROR: ", "")
206
+ when Error
207
+ e.message
208
+ else
209
+ "#{e.class.name}: #{e.message}"
210
+ end
211
+
212
+ {status: "error", message: message}
213
+ end
214
+
215
+ def copy(source_command, dest_table:, dest_fields:)
216
+ destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN"
217
+ destination.conn.copy_data(destination_command) do
218
+ source.conn.copy_data(source_command) do
219
+ while (row = source.conn.get_copy_data)
220
+ destination.conn.put_copy_data(row)
221
+ end
222
+ end
223
+ end
224
+ end
225
+
226
+ # TODO better performance
227
+ def rule_match?(table, column, rule)
228
+ regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
229
+ regex.match(column) || regex.match("#{table.name}.#{column}") || regex.match("#{table.schema}.#{table.name}.#{column}")
230
+ end
231
+
232
+ # TODO wildcard rules
233
+ def apply_strategy(rule, table, column, primary_key)
234
+ if rule.is_a?(Hash)
235
+ if rule.key?("value")
236
+ escape(rule["value"])
237
+ elsif rule.key?("statement")
238
+ rule["statement"]
239
+ else
240
+ raise Error, "Unknown rule #{rule.inspect} for column #{column}"
241
+ end
242
+ else
243
+ case rule
244
+ when "untouched"
245
+ quote_ident(column)
246
+ when "unique_email"
247
+ "'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
248
+ when "unique_phone"
249
+ "(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
250
+ when "unique_secret"
251
+ "'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
252
+ when "random_int", "random_number"
253
+ "(RANDOM() * 100)::int"
254
+ when "random_date"
255
+ "date '1970-01-01' + (RANDOM() * 10000)::int"
256
+ when "random_time"
257
+ "NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
258
+ when "random_ip"
259
+ "(1 + RANDOM() * 254)::int::text || '.0.0.1'"
260
+ when "random_letter"
261
+ "chr(65 + (RANDOM() * 26)::int)"
262
+ when "random_string"
263
+ "RIGHT(MD5(RANDOM()::text), 10)"
264
+ when "null", nil
265
+ "NULL"
266
+ else
267
+ raise Error, "Unknown rule #{rule} for column #{column}"
268
+ end
269
+ end
270
+ end
271
+
272
+ def quoted_primary_key(table, primary_key, rule)
273
+ raise Error, "Single column primary key required for this data rule: #{rule}" unless primary_key.size == 1
274
+ "#{quoted_table}.#{quote_ident(primary_key.first)}"
275
+ end
276
+
277
+ def maybe_disable_triggers
278
+ if opts[:disable_integrity] || opts[:disable_user_triggers]
279
+ destination.transaction do
280
+ triggers = destination.triggers(table)
281
+ triggers.select! { |t| t["enabled"] == "t" }
282
+ internal_triggers, user_triggers = triggers.partition { |t| t["internal"] == "t" }
283
+ integrity_triggers = internal_triggers.select { |t| t["integrity"] == "t" }
284
+ restore_triggers = []
285
+
286
+ if opts[:disable_integrity]
287
+ integrity_triggers.each do |trigger|
288
+ destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER #{quote_ident(trigger["name"])}")
289
+ end
290
+ restore_triggers.concat(integrity_triggers)
291
+ end
292
+
293
+ if opts[:disable_user_triggers]
294
+ # important!
295
+ # rely on Postgres to disable user triggers
296
+ # we don't want to accidentally disable non-user triggers if logic above is off
297
+ destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER USER")
298
+ restore_triggers.concat(user_triggers)
299
+ end
300
+
301
+ result = yield
302
+
303
+ # restore triggers that were previously enabled
304
+ restore_triggers.each do |trigger|
305
+ destination.execute("ALTER TABLE #{quoted_table} ENABLE TRIGGER #{quote_ident(trigger["name"])}")
306
+ end
307
+
308
+ result
309
+ end
310
+ else
311
+ yield
312
+ end
313
+ end
314
+ end
315
+ end