pgsync 0.5.2 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pgsync might be problematic. Click here for more details.

@@ -0,0 +1,28 @@
1
+ # minimal class to keep schema and table name separate
2
+ module PgSync
3
+ class Table
4
+ attr_reader :schema, :name
5
+
6
+ def initialize(schema, name)
7
+ @schema = schema
8
+ @name = name
9
+ end
10
+
11
+ def full_name
12
+ "#{schema}.#{name}"
13
+ end
14
+
15
+ def eql?(other)
16
+ other.schema == schema && other.name == name
17
+ end
18
+
19
+ # override hash when overriding eql?
20
+ def hash
21
+ [schema, name].hash
22
+ end
23
+
24
+ def to_s
25
+ full_name
26
+ end
27
+ end
28
+ end
@@ -2,256 +2,204 @@ module PgSync
2
2
  class TableSync
3
3
  include Utils
4
4
 
5
- def sync(config, table, opts, source_url, destination_url)
6
- start_time = Time.now
7
- source = DataSource.new(source_url, timeout: 0)
8
- destination = DataSource.new(destination_url, timeout: 0)
9
-
10
- begin
11
- from_connection = source.conn
12
- to_connection = destination.conn
13
-
14
- bad_fields = opts[:no_rules] ? [] : config["data_rules"]
15
-
16
- from_fields = source.columns(table)
17
- to_fields = destination.columns(table)
18
- shared_fields = to_fields & from_fields
19
- extra_fields = to_fields - from_fields
20
- missing_fields = from_fields - to_fields
21
-
22
- if opts[:no_sequences]
23
- from_sequences = []
24
- to_sequences = []
25
- else
26
- from_sequences = source.sequences(table, shared_fields)
27
- to_sequences = destination.sequences(table, shared_fields)
28
- end
5
+ attr_reader :source, :destination, :tasks, :opts, :resolver
6
+
7
+ def initialize(source:, destination:, tasks:, opts:, resolver:)
8
+ @source = source
9
+ @destination = destination
10
+ @tasks = tasks
11
+ @opts = opts
12
+ @resolver = resolver
13
+ end
29
14
 
30
- shared_sequences = to_sequences & from_sequences
31
- extra_sequences = to_sequences - from_sequences
32
- missing_sequences = from_sequences - to_sequences
15
+ def perform
16
+ confirm_tables_exist(destination, tasks, "destination")
33
17
 
34
- sql_clause = String.new
18
+ add_columns
35
19
 
36
- if opts[:sql]
37
- sql_clause << " #{opts[:sql]}"
38
- end
20
+ show_notes
39
21
 
40
- notes = []
41
- notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
42
- notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
43
- notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
44
- notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
45
-
46
- if shared_fields.empty?
47
- return {status: "success", message: "No fields to copy"}
48
- end
22
+ # don't sync tables with no shared fields
23
+ # we show a warning message above
24
+ run_tasks(tasks.reject { |task| task.shared_fields.empty? })
25
+ end
49
26
 
50
- if shared_fields.any?
51
- primary_key = destination.primary_key(table)
52
- copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quote_ident_full(table)}.#{quote_ident(f)}" }.join(", ")
53
- fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
27
+ # TODO only query specific tables
28
+ # TODO add sequences, primary keys, etc
29
+ def add_columns
30
+ source_columns = columns(source)
31
+ destination_columns = columns(destination)
54
32
 
55
- seq_values = {}
56
- shared_sequences.each do |seq|
57
- seq_values[seq] = source.last_value(seq)
58
- end
33
+ tasks.each do |task|
34
+ task.from_columns = source_columns[task.table] || []
35
+ task.to_columns = destination_columns[task.table] || []
36
+ end
37
+ end
59
38
 
60
- copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{sql_clause}) TO STDOUT"
61
- if opts[:in_batches]
62
- raise Error, "Cannot use --overwrite with --in-batches" if opts[:overwrite]
63
- raise Error, "No primary key" unless primary_key
64
-
65
- destination.truncate(table) if opts[:truncate]
66
-
67
- from_max_id = source.max_id(table, primary_key)
68
- to_max_id = destination.max_id(table, primary_key) + 1
69
-
70
- if to_max_id == 1
71
- from_min_id = source.min_id(table, primary_key)
72
- to_max_id = from_min_id if from_min_id > 0
73
- end
74
-
75
- starting_id = to_max_id
76
- batch_size = opts[:batch_size]
77
-
78
- i = 1
79
- batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
80
-
81
- while starting_id <= from_max_id
82
- where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
83
- log " #{i}/#{batch_count}: #{where}"
84
-
85
- # TODO be smarter for advance sql clauses
86
- batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
87
-
88
- batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{batch_sql_clause}) TO STDOUT"
89
- to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
90
- from_connection.copy_data batch_copy_to_command do
91
- while (row = from_connection.get_copy_data)
92
- to_connection.put_copy_data(row)
93
- end
94
- end
95
- end
96
-
97
- starting_id += batch_size
98
- i += 1
99
-
100
- if opts[:sleep] && starting_id <= from_max_id
101
- sleep(opts[:sleep])
102
- end
103
- end
104
-
105
- log # add extra line for spinner
106
- elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
107
- raise Error, "No primary key" unless primary_key
108
-
109
- temp_table = "pgsync_#{rand(1_000_000_000)}"
110
- file = Tempfile.new(temp_table)
111
- begin
112
- from_connection.copy_data copy_to_command do
113
- while (row = from_connection.get_copy_data)
114
- file.write(row)
115
- end
116
- end
117
- file.rewind
118
-
119
- # create a temp table
120
- to_connection.exec("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS SELECT * FROM #{quote_ident_full(table)} WITH NO DATA")
121
-
122
- # load file
123
- to_connection.copy_data "COPY #{quote_ident_full(temp_table)} (#{fields}) FROM STDIN" do
124
- file.each do |row|
125
- to_connection.put_copy_data(row)
126
- end
127
- end
128
-
129
- if opts[:preserve]
130
- # insert into
131
- to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident_full(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident_full(table)} WHERE #{quote_ident_full(table)}.#{quote_ident(primary_key)} = #{quote_ident_full(temp_table)}.#{quote_ident(primary_key)}))")
132
- else
133
- to_connection.transaction do
134
- to_connection.exec("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
135
- to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
136
- end
137
- end
138
- ensure
139
- file.close
140
- file.unlink
141
- end
142
- else
143
- destination.truncate(table)
144
- to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
145
- from_connection.copy_data copy_to_command do
146
- while (row = from_connection.get_copy_data)
147
- to_connection.put_copy_data(row)
148
- end
149
- end
150
- end
151
- end
152
- seq_values.each do |seq, value|
153
- to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
154
- end
155
- end
39
+ def show_notes
40
+ # for tables
41
+ resolver.notes.each do |note|
42
+ warning note
43
+ end
156
44
 
157
- message = nil
158
- if notes.any?
159
- message = notes.join(", ")
45
+ # for columns and sequences
46
+ tasks.each do |task|
47
+ task.notes.each do |note|
48
+ warning "#{task_name(task)}: #{note}"
160
49
  end
50
+ end
161
51
 
162
- {status: "success", message: message, time: (Time.now - start_time).round(1)}
163
- ensure
164
- source.close
165
- destination.close
52
+ # for non-deferrable constraints
53
+ if opts[:defer_constraints]
54
+ constraints = non_deferrable_constraints(destination)
55
+ constraints = tasks.flat_map { |t| constraints[t.table] || [] }
56
+ warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any?
166
57
  end
167
- rescue => e
168
- message =
169
- case e
170
- when PG::Error
171
- # likely fine to show simplified message here
172
- # the full message will be shown when first trying to connect
173
- "Connection failed"
174
- when Error
175
- e.message
58
+ end
59
+
60
+ def columns(data_source)
61
+ query = <<~SQL
62
+ SELECT
63
+ table_schema AS schema,
64
+ table_name AS table,
65
+ column_name AS column,
66
+ data_type AS type
67
+ FROM
68
+ information_schema.columns
69
+ ORDER BY 1, 2, 3
70
+ SQL
71
+ data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
72
+ [k, v.map { |r| {name: r["column"], type: r["type"]} }]
73
+ end.to_h
74
+ end
75
+
76
+ def non_deferrable_constraints(data_source)
77
+ query = <<~SQL
78
+ SELECT
79
+ table_schema AS schema,
80
+ table_name AS table,
81
+ constraint_name
82
+ FROM
83
+ information_schema.table_constraints
84
+ WHERE
85
+ constraint_type = 'FOREIGN KEY' AND
86
+ is_deferrable = 'NO'
87
+ SQL
88
+ data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
89
+ [k, v.map { |r| r["constraint_name"] }]
90
+ end.to_h
91
+ end
92
+
93
+ def run_tasks(tasks, &block)
94
+ notices = []
95
+ failed_tables = []
96
+
97
+ spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
98
+ task_spinners = {}
99
+ started_at = {}
100
+
101
+ start = lambda do |task, i|
102
+ message = ":spinner #{display_item(task)}"
103
+ spinner = spinners.register(message)
104
+ if opts[:in_batches]
105
+ # log instead of spin for non-tty
106
+ log message.sub(":spinner", "⠋")
176
107
  else
177
- "#{e.class.name}: #{e.message}"
108
+ spinner.auto_spin
178
109
  end
110
+ task_spinners[task] = spinner
111
+ started_at[task] = Time.now
112
+ end
179
113
 
180
- {status: "error", message: message}
181
- end
114
+ finish = lambda do |task, i, result|
115
+ spinner = task_spinners[task]
116
+ time = (Time.now - started_at[task]).round(1)
182
117
 
183
- private
118
+ message =
119
+ if result[:message]
120
+ "(#{result[:message].lines.first.to_s.strip})"
121
+ else
122
+ "- #{time}s"
123
+ end
184
124
 
185
- # TODO better performance
186
- def rule_match?(table, column, rule)
187
- regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
188
- regex.match(column) || regex.match("#{table.split(".", 2)[-1]}.#{column}") || regex.match("#{table}.#{column}")
189
- end
125
+ notices.concat(result[:notices])
190
126
 
191
- # TODO wildcard rules
192
- def apply_strategy(rule, table, column, primary_key)
193
- if rule.is_a?(Hash)
194
- if rule.key?("value")
195
- escape(rule["value"])
196
- elsif rule.key?("statement")
197
- rule["statement"]
127
+ if result[:status] == "success"
128
+ spinner.success(message)
198
129
  else
199
- raise Error, "Unknown rule #{rule.inspect} for column #{column}"
130
+ spinner.error(message)
131
+ failed_tables << task_name(task)
132
+ fail_sync(failed_tables) if opts[:fail_fast]
200
133
  end
134
+
135
+ unless spinner.send(:tty?)
136
+ status = result[:status] == "success" ? "✔" : "✖"
137
+ log [status, display_item(task), message].join(" ")
138
+ end
139
+ end
140
+
141
+ options = {start: start, finish: finish}
142
+
143
+ jobs = opts[:jobs]
144
+ if opts[:debug] || opts[:in_batches] || opts[:defer_constraints]
145
+ warning "--jobs ignored" if jobs
146
+ jobs = 0
147
+ end
148
+
149
+ if windows?
150
+ options[:in_threads] = jobs || 4
201
151
  else
202
- case rule
203
- when "untouched"
204
- quote_ident(column)
205
- when "unique_email"
206
- "'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
207
- when "unique_phone"
208
- "(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
209
- when "unique_secret"
210
- "'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
211
- when "random_int", "random_number"
212
- "(RANDOM() * 100)::int"
213
- when "random_date"
214
- "date '1970-01-01' + (RANDOM() * 10000)::int"
215
- when "random_time"
216
- "NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
217
- when "random_ip"
218
- "(1 + RANDOM() * 254)::int::text || '.0.0.1'"
219
- when "random_letter"
220
- "chr(65 + (RANDOM() * 26)::int)"
221
- when "random_string"
222
- "RIGHT(MD5(RANDOM()::text), 10)"
223
- when "null", nil
224
- "NULL"
225
- else
226
- raise Error, "Unknown rule #{rule} for column #{column}"
152
+ options[:in_processes] = jobs if jobs
153
+ end
154
+
155
+ maybe_defer_constraints do
156
+ # could try to use `raise Parallel::Kill` to fail faster with --fail-fast
157
+ # see `fast_faster` branch
158
+ # however, need to make sure connections are cleaned up properly
159
+ Parallel.each(tasks, **options) do |task|
160
+ source.reconnect_if_needed
161
+ destination.reconnect_if_needed
162
+
163
+ task.perform
227
164
  end
228
165
  end
229
- end
230
166
 
231
- def quoted_primary_key(table, primary_key, rule)
232
- raise "Primary key required for this data rule: #{rule}" unless primary_key
233
- "#{quote_ident_full(table)}.#{quote_ident(primary_key)}"
234
- end
167
+ notices.each do |notice|
168
+ warning notice
169
+ end
235
170
 
236
- def quote_ident_full(ident)
237
- ident.split(".").map { |v| quote_ident(v) }.join(".")
171
+ fail_sync(failed_tables) if failed_tables.any?
238
172
  end
239
173
 
240
- def quote_ident(value)
241
- PG::Connection.quote_ident(value)
242
- end
174
+ def maybe_defer_constraints
175
+ if opts[:defer_constraints]
176
+ destination.transaction do
177
+ destination.execute("SET CONSTRAINTS ALL DEFERRED")
243
178
 
244
- def escape(value)
245
- if value.is_a?(String)
246
- "'#{quote_string(value)}'"
179
+ # create a transaction on the source
180
+ # to ensure we get a consistent snapshot
181
+ source.transaction do
182
+ yield
183
+ end
184
+ end
247
185
  else
248
- value
186
+ yield
249
187
  end
250
188
  end
251
189
 
252
- # activerecord
253
- def quote_string(s)
254
- s.gsub(/\\/, '\&\&').gsub(/'/, "''")
190
+ def fail_sync(failed_tables)
191
+ raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}"
192
+ end
193
+
194
+ def display_item(item)
195
+ messages = []
196
+ messages << task_name(item)
197
+ messages << item.opts[:sql] if item.opts[:sql]
198
+ messages.join(" ")
199
+ end
200
+
201
+ def windows?
202
+ Gem.win_platform?
255
203
  end
256
204
  end
257
205
  end