pgsync 0.6.3 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pgsync might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0d3ff88829338544bfe3434a16a00640e6496b7b89b1ef5dd43e66f844ea51ce
4
- data.tar.gz: d5457e8c2596fdda651c9739712fe0934af14028295ea9cd6addfc70438ced16
3
+ metadata.gz: 0ecb467f4d3112edfcfebe19913e29daeddfe1e4cc10e18bd9e8850fbceced57
4
+ data.tar.gz: 4d8309dec9e8238074267baf583fe333c7b1d3300ad80398748e15905eb1e539
5
5
  SHA512:
6
- metadata.gz: 6f9549d0d85cb502b5b88a3178fe617bf94f032d72787ab6dcf8f6097dc256ca2e969e2c4a434ebf33e4de17a4f11c590c57ef8cae420b20fe4f39bfed795baa
7
- data.tar.gz: b10ac04af7dc26c3067c2ff73f163c69d73dfb5c1f559585adcf774312ac78ac07dcd790e1706cc571a90f0e4573060cedf01fc6ebd8b1732fbaa16b486213dd
6
+ metadata.gz: 145eb31e2565257a5adedc05c1308692dffb1e0785728ecd38230bb23c34c4ba6ecf08512201f707f457b136e0b7a22ca511b46fb7834f2045c041ff43dd6ccc
7
+ data.tar.gz: cde51dee36149f9f8c21e26eecd504de68d8a47c2524f084b74962d7ceae1a01b59841261fc6c112cf94b31a89f55b9a991c05d1af0db41c46ed615fc56e888f
@@ -1,3 +1,8 @@
1
+ ## 0.6.4 (2020-06-10)
2
+
3
+ - Log SQL with `--debug` option
4
+ - Improved sequence queries
5
+
1
6
  ## 0.6.3 (2020-06-09)
2
7
 
3
8
  - Added `--defer-constraints-v2` option
data/README.md CHANGED
@@ -307,6 +307,14 @@ exclude:
307
307
  - schema_migrations
308
308
  ```
309
309
 
310
+ ## Debugging
311
+
312
+ To view the SQL that’s run, use:
313
+
314
+ ```sh
315
+ pgsync --debug
316
+ ```
317
+
310
318
  ## Other Commands
311
319
 
312
320
  Help
@@ -18,6 +18,7 @@ require "pgsync/client"
18
18
  require "pgsync/data_source"
19
19
  require "pgsync/init"
20
20
  require "pgsync/schema_sync"
21
+ require "pgsync/sequence"
21
22
  require "pgsync/sync"
22
23
  require "pgsync/table"
23
24
  require "pgsync/table_sync"
@@ -4,8 +4,10 @@ module PgSync
4
4
 
5
5
  attr_reader :url
6
6
 
7
- def initialize(url)
7
+ def initialize(url, name:, debug:)
8
8
  @url = url
9
+ @name = name
10
+ @debug = debug
9
11
  end
10
12
 
11
13
  def exists?
@@ -50,10 +52,6 @@ module PgSync
50
52
  table_set.include?(table)
51
53
  end
52
54
 
53
- def sequences(table, columns)
54
- execute("SELECT #{columns.map { |f| "pg_get_serial_sequence(#{escape("#{quote_ident_full(table)}")}, #{escape(f)}) AS #{quote_ident(f)}" }.join(", ")}").first.values.compact
55
- end
56
-
57
55
  def max_id(table, primary_key, sql_clause = nil)
58
56
  execute("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["max"].to_i
59
57
  end
@@ -62,39 +60,14 @@ module PgSync
62
60
  execute("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["min"].to_i
63
61
  end
64
62
 
65
- # this value comes from pg_get_serial_sequence which is already quoted
66
63
  def last_value(seq)
67
- execute("SELECT last_value FROM #{seq}").first["last_value"]
64
+ execute("SELECT last_value FROM #{quote_ident_full(seq)}").first["last_value"]
68
65
  end
69
66
 
70
67
  def truncate(table)
71
68
  execute("TRUNCATE #{quote_ident_full(table)} CASCADE")
72
69
  end
73
70
 
74
- # https://stackoverflow.com/a/20537829
75
- # TODO can simplify with array_position in Postgres 9.5+
76
- def primary_key(table)
77
- query = <<~SQL
78
- SELECT
79
- pg_attribute.attname,
80
- format_type(pg_attribute.atttypid, pg_attribute.atttypmod),
81
- pg_attribute.attnum,
82
- pg_index.indkey
83
- FROM
84
- pg_index, pg_class, pg_attribute, pg_namespace
85
- WHERE
86
- nspname = $1 AND
87
- relname = $2 AND
88
- indrelid = pg_class.oid AND
89
- pg_class.relnamespace = pg_namespace.oid AND
90
- pg_attribute.attrelid = pg_class.oid AND
91
- pg_attribute.attnum = any(pg_index.indkey) AND
92
- indisprimary
93
- SQL
94
- rows = execute(query, [table.schema, table.name])
95
- rows.sort_by { |r| r["indkey"].split(" ").index(r["attnum"]) }.map { |r| r["attname"] }
96
- end
97
-
98
71
  def triggers(table)
99
72
  query = <<~SQL
100
73
  SELECT
@@ -148,20 +121,34 @@ module PgSync
148
121
  end
149
122
 
150
123
  def execute(query, params = [])
124
+ log_sql query, params
151
125
  conn.exec_params(query, params).to_a
152
126
  end
153
127
 
154
128
  def transaction
155
129
  if conn.transaction_status == 0
156
130
  # not currently in transaction
157
- conn.transaction do
158
- yield
159
- end
131
+ log_sql "BEGIN"
132
+ result =
133
+ conn.transaction do
134
+ yield
135
+ end
136
+ log_sql "COMMIT"
137
+ result
160
138
  else
161
139
  yield
162
140
  end
163
141
  end
164
142
 
143
+ # TODO log time for each statement
144
+ def log_sql(query, params = {})
145
+ if @debug
146
+ message = "#{colorize("[#{@name}]", :cyan)} #{query.gsub(/\s+/, " ").strip}"
147
+ message = "#{message} #{params.inspect}" if params.any?
148
+ log message
149
+ end
150
+ end
151
+
165
152
  private
166
153
 
167
154
  def concurrent_id
@@ -0,0 +1,29 @@
1
+ # minimal class to keep schema and sequence name separate
2
+ module PgSync
3
+ class Sequence
4
+ attr_reader :schema, :name, :column
5
+
6
+ def initialize(schema, name, column:)
7
+ @schema = schema
8
+ @name = name
9
+ @column = column
10
+ end
11
+
12
+ def full_name
13
+ "#{schema}.#{name}"
14
+ end
15
+
16
+ def eql?(other)
17
+ other.schema == schema && other.name == name
18
+ end
19
+
20
+ # override hash when overriding eql?
21
+ def hash
22
+ [schema, name].hash
23
+ end
24
+
25
+ def to_s
26
+ full_name
27
+ end
28
+ end
29
+ end
@@ -34,13 +34,13 @@ module PgSync
34
34
  raise Error, "Danger! Add `to_safe: true` to `.pgsync.yml` if the destination is not localhost or 127.0.0.1"
35
35
  end
36
36
 
37
+ print_description("From", source)
38
+ print_description("To", destination)
39
+
37
40
  if (opts[:preserve] || opts[:overwrite]) && destination.server_version_num < 90500
38
41
  raise Error, "Postgres 9.5+ is required for --preserve and --overwrite"
39
42
  end
40
43
 
41
- print_description("From", source)
42
- print_description("To", destination)
43
-
44
44
  resolver = TaskResolver.new(args: args, opts: opts, source: source, destination: destination, config: config, first_schema: first_schema)
45
45
  tasks =
46
46
  resolver.tasks.map do |task|
@@ -126,15 +126,15 @@ module PgSync
126
126
  end
127
127
 
128
128
  def source
129
- @source ||= data_source(@options[:from])
129
+ @source ||= data_source(@options[:from], "from")
130
130
  end
131
131
 
132
132
  def destination
133
- @destination ||= data_source(@options[:to])
133
+ @destination ||= data_source(@options[:to], "to")
134
134
  end
135
135
 
136
- def data_source(url)
137
- ds = DataSource.new(url)
136
+ def data_source(url, name)
137
+ ds = DataSource.new(url, name: name, debug: @options[:debug])
138
138
  ObjectSpace.define_finalizer(self, self.class.finalize(ds))
139
139
  ds
140
140
  end
@@ -17,6 +17,10 @@ module PgSync
17
17
 
18
18
  add_columns
19
19
 
20
+ add_primary_keys
21
+
22
+ add_sequences unless opts[:no_sequences]
23
+
20
24
  show_notes
21
25
 
22
26
  # don't sync tables with no shared fields
@@ -24,8 +28,6 @@ module PgSync
24
28
  run_tasks(tasks.reject { |task| task.shared_fields.empty? })
25
29
  end
26
30
 
27
- # TODO only query specific tables
28
- # TODO add sequences, primary keys, etc
29
31
  def add_columns
30
32
  source_columns = columns(source)
31
33
  destination_columns = columns(destination)
@@ -36,6 +38,79 @@ module PgSync
36
38
  end
37
39
  end
38
40
 
41
+ def add_primary_keys
42
+ destination_primary_keys = primary_keys(destination)
43
+
44
+ tasks.each do |task|
45
+ task.to_primary_key = destination_primary_keys[task.table] || []
46
+ end
47
+ end
48
+
49
+ def add_sequences
50
+ source_sequences = sequences(source)
51
+ destination_sequences = sequences(destination)
52
+
53
+ tasks.each do |task|
54
+ shared_columns = Set.new(task.shared_fields)
55
+
56
+ task.from_sequences = (source_sequences[task.table] || []).select { |s| shared_columns.include?(s.column) }
57
+ task.to_sequences = (destination_sequences[task.table] || []).select { |s| shared_columns.include?(s.column) }
58
+ end
59
+ end
60
+
61
+ def sequences(data_source)
62
+ query = <<~SQL
63
+ SELECT
64
+ nt.nspname as schema,
65
+ t.relname as table,
66
+ a.attname as column,
67
+ n.nspname as sequence_schema,
68
+ s.relname as sequence
69
+ FROM
70
+ pg_class s
71
+ INNER JOIN
72
+ pg_depend d ON d.objid = s.oid
73
+ INNER JOIN
74
+ pg_class t ON d.objid = s.oid AND d.refobjid = t.oid
75
+ INNER JOIN
76
+ pg_attribute a ON (d.refobjid, d.refobjsubid) = (a.attrelid, a.attnum)
77
+ INNER JOIN
78
+ pg_namespace n ON n.oid = s.relnamespace
79
+ INNER JOIN
80
+ pg_namespace nt ON nt.oid = t.relnamespace
81
+ WHERE
82
+ s.relkind = 'S'
83
+ SQL
84
+ data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
85
+ [k, v.map { |r| Sequence.new(r["sequence_schema"], r["sequence"], column: r["column"]) }]
86
+ end.to_h
87
+ end
88
+
89
+ def primary_keys(data_source)
90
+ # https://stackoverflow.com/a/20537829
91
+ # TODO can simplify with array_position in Postgres 9.5+
92
+ query = <<~SQL
93
+ SELECT
94
+ nspname AS schema,
95
+ relname AS table,
96
+ pg_attribute.attname AS column,
97
+ format_type(pg_attribute.atttypid, pg_attribute.atttypmod),
98
+ pg_attribute.attnum,
99
+ pg_index.indkey
100
+ FROM
101
+ pg_index, pg_class, pg_attribute, pg_namespace
102
+ WHERE
103
+ indrelid = pg_class.oid AND
104
+ pg_class.relnamespace = pg_namespace.oid AND
105
+ pg_attribute.attrelid = pg_class.oid AND
106
+ pg_attribute.attnum = any(pg_index.indkey) AND
107
+ indisprimary
108
+ SQL
109
+ data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
110
+ [k, v.sort_by { |r| r["indkey"].split(" ").index(r["attnum"]) }.map { |r| r["column"] }]
111
+ end.to_h
112
+ end
113
+
39
114
  def show_notes
40
115
  # for tables
41
116
  resolver.notes.each do |note|
@@ -93,28 +168,33 @@ module PgSync
93
168
  def run_tasks(tasks, &block)
94
169
  notices = []
95
170
  failed_tables = []
96
-
97
- spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
98
- task_spinners = {}
99
171
  started_at = {}
100
172
 
173
+ show_spinners = output.tty? && !opts[:in_batches] && !opts[:debug]
174
+ if show_spinners
175
+ spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
176
+ task_spinners = {}
177
+ end
178
+
101
179
  start = lambda do |task, i|
102
180
  message = ":spinner #{display_item(task)}"
103
- spinner = spinners.register(message)
104
- if opts[:in_batches]
105
- # log instead of spin for non-tty
106
- log message.sub(":spinner", "⠋")
107
- else
181
+
182
+ if show_spinners
183
+ spinner = spinners.register(message)
108
184
  spinner.auto_spin
185
+ task_spinners[task] = spinner
186
+ elsif opts[:in_batches]
187
+ log message.sub(":spinner", "⠋")
109
188
  end
110
- task_spinners[task] = spinner
189
+
111
190
  started_at[task] = Time.now
112
191
  end
113
192
 
114
193
  finish = lambda do |task, i, result|
115
- spinner = task_spinners[task]
116
194
  time = (Time.now - started_at[task]).round(1)
117
195
 
196
+ success = result[:status] == "success"
197
+
118
198
  message =
119
199
  if result[:message]
120
200
  "(#{result[:message].lines.first.to_s.strip})"
@@ -124,23 +204,30 @@ module PgSync
124
204
 
125
205
  notices.concat(result[:notices])
126
206
 
127
- if result[:status] == "success"
128
- spinner.success(message)
207
+ if show_spinners
208
+ spinner = task_spinners[task]
209
+ if success
210
+ spinner.success(message)
211
+ else
212
+ spinner.error(message)
213
+ end
129
214
  else
130
- spinner.error(message)
131
- failed_tables << task_name(task)
132
- fail_sync(failed_tables) if opts[:fail_fast]
215
+ status = success ? "✔" : "✖"
216
+ log [status, display_item(task), message].join(" ")
133
217
  end
134
218
 
135
- unless spinner.send(:tty?)
136
- status = result[:status] == "success" ? "✔" : "✖"
137
- log [status, display_item(task), message].join(" ")
219
+ unless success
220
+ failed_tables << task_name(task)
221
+ fail_sync(failed_tables) if opts[:fail_fast]
138
222
  end
139
223
  end
140
224
 
141
225
  options = {start: start, finish: finish}
142
226
 
143
227
  jobs = opts[:jobs]
228
+
229
+ # disable multiple jobs for defer constraints and disable integrity
230
+ # so we can use a transaction to ensure a consistent snapshot
144
231
  if opts[:debug] || opts[:in_batches] || opts[:defer_constraints] || opts[:defer_constraints_v2] || opts[:disable_integrity] || opts[:disable_integrity_v2]
145
232
  warning "--jobs ignored" if jobs
146
233
  jobs = 0
@@ -171,6 +258,7 @@ module PgSync
171
258
  fail_sync(failed_tables) if failed_tables.any?
172
259
  end
173
260
 
261
+ # TODO add option to open transaction on source when manually specifying order of tables
174
262
  def maybe_defer_constraints
175
263
  if opts[:disable_integrity] || opts[:disable_integrity_v2]
176
264
  # create a transaction on the source
@@ -3,7 +3,7 @@ module PgSync
3
3
  include Utils
4
4
 
5
5
  attr_reader :source, :destination, :config, :table, :opts
6
- attr_accessor :from_columns, :to_columns
6
+ attr_accessor :from_columns, :to_columns, :from_sequences, :to_sequences, :to_primary_key
7
7
 
8
8
  def initialize(source:, destination:, config:, table:, opts:)
9
9
  @source = source
@@ -11,6 +11,8 @@ module PgSync
11
11
  @config = config
12
12
  @table = table
13
13
  @opts = opts
14
+ @from_sequences = []
15
+ @to_sequences = []
14
16
  end
15
17
 
16
18
  def quoted_table
@@ -39,14 +41,6 @@ module PgSync
39
41
  @shared_fields ||= to_fields & from_fields
40
42
  end
41
43
 
42
- def from_sequences
43
- @from_sequences ||= opts[:no_sequences] ? [] : source.sequences(table, shared_fields)
44
- end
45
-
46
- def to_sequences
47
- @to_sequences ||= opts[:no_sequences] ? [] : destination.sequences(table, shared_fields)
48
- end
49
-
50
44
  def shared_sequences
51
45
  @shared_sequences ||= to_sequences & from_sequences
52
46
  end
@@ -88,15 +82,10 @@ module PgSync
88
82
  sql_clause << " #{opts[:sql]}" if opts[:sql]
89
83
 
90
84
  bad_fields = opts[:no_rules] ? [] : config["data_rules"]
91
- primary_key = destination.primary_key(table)
85
+ primary_key = to_primary_key
92
86
  copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quoted_table}.#{quote_ident(f)}" }.join(", ")
93
87
  fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
94
88
 
95
- seq_values = {}
96
- shared_sequences.each do |seq|
97
- seq_values[seq] = source.last_value(seq)
98
- end
99
-
100
89
  copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{sql_clause}) TO STDOUT"
101
90
  if opts[:in_batches]
102
91
  raise Error, "No primary key" if primary_key.empty?
@@ -163,8 +152,11 @@ module PgSync
163
152
  end
164
153
  copy(copy_to_command, dest_table: table, dest_fields: fields)
165
154
  end
166
- seq_values.each do |seq, value|
167
- destination.execute("SELECT setval(#{escape(seq)}, #{escape(value)})")
155
+
156
+ # update sequences
157
+ shared_sequences.each do |seq|
158
+ value = source.last_value(seq)
159
+ destination.execute("SELECT setval(#{escape(quote_ident_full(seq))}, #{escape(value)})")
168
160
  end
169
161
 
170
162
  {status: "success"}
@@ -214,6 +206,10 @@ module PgSync
214
206
 
215
207
  def copy(source_command, dest_table:, dest_fields:)
216
208
  destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN"
209
+
210
+ source.log_sql(source_command)
211
+ destination.log_sql(destination_command)
212
+
217
213
  destination.conn.copy_data(destination_command) do
218
214
  source.conn.copy_data(source_command) do
219
215
  while (row = source.conn.get_copy_data)
@@ -3,7 +3,8 @@ module PgSync
3
3
  COLOR_CODES = {
4
4
  red: 31,
5
5
  green: 32,
6
- yellow: 33
6
+ yellow: 33,
7
+ cyan: 36
7
8
  }
8
9
 
9
10
  def log(message = nil)
@@ -59,7 +60,7 @@ module PgSync
59
60
  end
60
61
 
61
62
  def quote_ident_full(ident)
62
- if ident.is_a?(Table)
63
+ if ident.is_a?(Table) || ident.is_a?(Sequence)
63
64
  [quote_ident(ident.schema), quote_ident(ident.name)].join(".")
64
65
  else # temp table names are strings
65
66
  quote_ident(ident)
@@ -1,3 +1,3 @@
1
1
  module PgSync
2
- VERSION = "0.6.3"
2
+ VERSION = "0.6.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgsync
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.3
4
+ version: 0.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-06-10 00:00:00.000000000 Z
11
+ date: 2020-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: parallel
@@ -125,6 +125,7 @@ files:
125
125
  - lib/pgsync/data_source.rb
126
126
  - lib/pgsync/init.rb
127
127
  - lib/pgsync/schema_sync.rb
128
+ - lib/pgsync/sequence.rb
128
129
  - lib/pgsync/sync.rb
129
130
  - lib/pgsync/table.rb
130
131
  - lib/pgsync/table_sync.rb