pgsync 0.5.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pgsync might be problematic. Click here for more details.

@@ -1,10 +1,11 @@
1
1
  module PgSync
2
2
  class DataSource
3
+ include Utils
4
+
3
5
  attr_reader :url
4
6
 
5
- def initialize(source, timeout: 3)
6
- @url = resolve_url(source)
7
- @timeout = timeout
7
+ def initialize(url)
8
+ @url = url
8
9
  end
9
10
 
10
11
  def exists?
@@ -30,8 +31,18 @@ module PgSync
30
31
  # gets visible tables
31
32
  def tables
32
33
  @tables ||= begin
33
- query = "SELECT table_schema, table_name FROM information_schema.tables WHERE table_type = 'BASE TABLE' AND table_schema NOT IN ('information_schema', 'pg_catalog') ORDER BY 1, 2"
34
- execute(query).map { |row| "#{row["table_schema"]}.#{row["table_name"]}" }
34
+ query = <<~SQL
35
+ SELECT
36
+ table_schema AS schema,
37
+ table_name AS table
38
+ FROM
39
+ information_schema.tables
40
+ WHERE
41
+ table_type = 'BASE TABLE' AND
42
+ table_schema NOT IN ('information_schema', 'pg_catalog')
43
+ ORDER BY 1, 2
44
+ SQL
45
+ execute(query).map { |row| Table.new(row["schema"], row["table"]) }
35
46
  end
36
47
  end
37
48
 
@@ -39,25 +50,21 @@ module PgSync
39
50
  table_set.include?(table)
40
51
  end
41
52
 
42
- def columns(table)
43
- query = "SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2"
44
- execute(query, table.split(".", 2)).map { |row| row["column_name"] }
45
- end
46
-
47
53
  def sequences(table, columns)
48
- execute("SELECT #{columns.map { |f| "pg_get_serial_sequence(#{escape("#{quote_ident_full(table)}")}, #{escape(f)}) AS #{quote_ident(f)}" }.join(", ")}")[0].values.compact
54
+ execute("SELECT #{columns.map { |f| "pg_get_serial_sequence(#{escape("#{quote_ident_full(table)}")}, #{escape(f)}) AS #{quote_ident(f)}" }.join(", ")}").first.values.compact
49
55
  end
50
56
 
51
57
  def max_id(table, primary_key, sql_clause = nil)
52
- execute("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}")[0]["max"].to_i
58
+ execute("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["max"].to_i
53
59
  end
54
60
 
55
61
  def min_id(table, primary_key, sql_clause = nil)
56
- execute("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}")[0]["min"].to_i
62
+ execute("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["min"].to_i
57
63
  end
58
64
 
65
+ # this value comes from pg_get_serial_sequence which is already quoted
59
66
  def last_value(seq)
60
- execute("select last_value from #{seq}")[0]["last_value"]
67
+ execute("SELECT last_value FROM #{seq}").first["last_value"]
61
68
  end
62
69
 
63
70
  def truncate(table)
@@ -65,40 +72,57 @@ module PgSync
65
72
  end
66
73
 
67
74
  # https://stackoverflow.com/a/20537829
75
+ # TODO can simplify with array_position in Postgres 9.5+
68
76
  def primary_key(table)
69
- query = <<-SQL
77
+ query = <<~SQL
70
78
  SELECT
71
79
  pg_attribute.attname,
72
- format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
80
+ format_type(pg_attribute.atttypid, pg_attribute.atttypmod),
81
+ pg_attribute.attnum,
82
+ pg_index.indkey
73
83
  FROM
74
84
  pg_index, pg_class, pg_attribute, pg_namespace
75
85
  WHERE
76
- pg_class.oid = $2::regclass AND
77
- indrelid = pg_class.oid AND
78
86
  nspname = $1 AND
87
+ relname = $2 AND
88
+ indrelid = pg_class.oid AND
79
89
  pg_class.relnamespace = pg_namespace.oid AND
80
90
  pg_attribute.attrelid = pg_class.oid AND
81
91
  pg_attribute.attnum = any(pg_index.indkey) AND
82
92
  indisprimary
83
93
  SQL
84
- row = execute(query, [table.split(".", 2)[0], quote_ident_full(table)])[0]
85
- row && row["attname"]
94
+ rows = execute(query, [table.schema, table.name])
95
+ rows.sort_by { |r| r["indkey"].split(" ").index(r["attnum"]) }.map { |r| r["attname"] }
96
+ end
97
+
98
+ def triggers(table)
99
+ query = <<~SQL
100
+ SELECT
101
+ tgname AS name,
102
+ tgisinternal AS internal,
103
+ tgenabled != 'D' AS enabled,
104
+ tgconstraint != 0 AS integrity
105
+ FROM
106
+ pg_trigger
107
+ WHERE
108
+ pg_trigger.tgrelid = $1::regclass
109
+ SQL
110
+ execute(query, [quote_ident_full(table)])
86
111
  end
87
112
 
88
113
  def conn
89
114
  @conn ||= begin
90
115
  begin
91
- ENV["PGCONNECT_TIMEOUT"] ||= @timeout.to_s
116
+ ENV["PGCONNECT_TIMEOUT"] ||= "3"
92
117
  if @url =~ /\Apostgres(ql)?:\/\//
93
118
  config = @url
94
119
  else
95
120
  config = {dbname: @url}
96
121
  end
122
+ @concurrent_id = concurrent_id
97
123
  PG::Connection.new(config)
98
- rescue PG::ConnectionBad => e
99
- raise PgSync::Error, e.message
100
124
  rescue URI::InvalidURIError
101
- raise PgSync::Error, "Invalid connection string"
125
+ raise Error, "Invalid connection string. Make sure it works with `psql`"
102
126
  end
103
127
  end
104
128
  end
@@ -110,79 +134,56 @@ module PgSync
110
134
  end
111
135
  end
112
136
 
113
- def dump_command(tables)
114
- tables = tables ? tables.keys.map { |t| "-t #{Shellwords.escape(quote_ident_full(t))}" }.join(" ") : ""
115
- "pg_dump -Fc --verbose --schema-only --no-owner --no-acl #{tables} -d #{@url}"
116
- end
117
-
118
- def restore_command
119
- psql_version = `psql --version`.lines[0].chomp.split(" ")[-1].split(/[^\d.]/)[0]
120
- if_exists = Gem::Version.new(psql_version) >= Gem::Version.new("9.4.0")
121
- "pg_restore --verbose --no-owner --no-acl --clean #{if_exists ? "--if-exists" : nil} -d #{@url}"
122
- end
123
-
124
- def fully_resolve_tables(tables)
125
- no_schema_tables = {}
126
- search_path_index = Hash[search_path.map.with_index.to_a]
127
- self.tables.group_by { |t| t.split(".", 2)[-1] }.each do |group, t2|
128
- no_schema_tables[group] = t2.sort_by { |t| [search_path_index[t.split(".", 2)[0]] || 1000000, t] }[0]
129
- end
130
-
131
- Hash[tables.map { |k, v| [no_schema_tables[k] || k, v] }]
137
+ # reconnect for new thread or process
138
+ def reconnect_if_needed
139
+ reconnect if @concurrent_id != concurrent_id
132
140
  end
133
141
 
134
142
  def search_path
135
- @search_path ||= execute("SELECT current_schemas(true)")[0]["current_schemas"][1..-2].split(",")
143
+ @search_path ||= execute("SELECT unnest(current_schemas(true)) AS schema").map { |r| r["schema"] }
136
144
  end
137
145
 
138
- private
139
-
140
- def table_set
141
- @table_set ||= Set.new(tables)
146
+ def server_version_num
147
+ @server_version_num ||= execute("SHOW server_version_num").first["server_version_num"].to_i
142
148
  end
143
149
 
144
- def conninfo
145
- @conninfo ||= conn.conninfo_hash
150
+ def execute(query, params = [])
151
+ conn.exec_params(query, params).to_a
146
152
  end
147
153
 
148
- def quote_ident_full(ident)
149
- ident.split(".", 2).map { |v| quote_ident(v) }.join(".")
154
+ def transaction
155
+ if conn.transaction_status == 0
156
+ # not currently in transaction
157
+ conn.transaction do
158
+ yield
159
+ end
160
+ else
161
+ yield
162
+ end
150
163
  end
151
164
 
152
- def execute(query, params = [])
153
- conn.exec_params(query, params).to_a
154
- end
165
+ private
155
166
 
156
- def quote_ident(value)
157
- PG::Connection.quote_ident(value)
167
+ def concurrent_id
168
+ [Process.pid, Thread.current.object_id]
158
169
  end
159
170
 
160
- def escape(value)
161
- if value.is_a?(String)
162
- "'#{quote_string(value)}'"
163
- else
164
- value
165
- end
171
+ def reconnect
172
+ @conn.reset
173
+ @concurrent_id = concurrent_id
166
174
  end
167
175
 
168
- # activerecord
169
- def quote_string(s)
170
- s.gsub(/\\/, '\&\&').gsub(/'/, "''")
176
+ def table_set
177
+ @table_set ||= Set.new(tables)
171
178
  end
172
179
 
173
- def resolve_url(source)
174
- if source
175
- source = source.dup
176
- source.gsub!(/\$\([^)]+\)/) do |m|
177
- command = m[2..-2]
178
- result = `#{command}`.chomp
179
- unless $?.success?
180
- raise PgSync::Error, "Command exited with non-zero status:\n#{command}"
181
- end
182
- result
180
+ def conninfo
181
+ @conninfo ||= begin
182
+ unless conn.respond_to?(:conninfo_hash)
183
+ raise Error, "libpq is too old. Upgrade it and run `gem install pg`"
183
184
  end
185
+ conn.conninfo_hash
184
186
  end
185
- source
186
187
  end
187
188
  end
188
189
  end
@@ -0,0 +1,61 @@
1
+ module PgSync
2
+ class Init
3
+ include Utils
4
+
5
+ def initialize(arguments, options)
6
+ @arguments = arguments
7
+ @options = options
8
+ end
9
+
10
+ def perform
11
+ if @arguments.size > 1
12
+ raise Error, "Usage:\n pgsync --init [db]"
13
+ end
14
+
15
+ file =
16
+ if @options[:config]
17
+ @options[:config]
18
+ elsif @arguments.any?
19
+ db_config_file(@arguments.first)
20
+ elsif @options[:db]
21
+ db_config_file(@options[:db])
22
+ else
23
+ ".pgsync.yml"
24
+ end
25
+
26
+ if File.exist?(file)
27
+ raise Error, "#{file} exists."
28
+ else
29
+ exclude =
30
+ if rails?
31
+ <<~EOS
32
+ exclude:
33
+ - schema_migrations
34
+ - ar_internal_metadata
35
+ EOS
36
+ else
37
+ <<~EOS
38
+ # exclude:
39
+ # - table1
40
+ # - table2
41
+ EOS
42
+ end
43
+
44
+ # create file
45
+ contents = File.read(__dir__ + "/../../config.yml")
46
+ contents.sub!("$(some_command)", "$(heroku config:get DATABASE_URL)") if heroku?
47
+ File.write(file, contents % {exclude: exclude})
48
+
49
+ log "#{file} created. Add your database credentials."
50
+ end
51
+ end
52
+
53
+ def heroku?
54
+ `git remote -v 2>&1`.include?("git.heroku.com") rescue false
55
+ end
56
+
57
+ def rails?
58
+ File.exist?("bin/rails")
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,83 @@
1
+ module PgSync
2
+ class SchemaSync
3
+ include Utils
4
+
5
+ attr_reader :args, :opts
6
+
7
+ def initialize(source:, destination:, tasks:, args:, opts:)
8
+ @source = source
9
+ @destination = destination
10
+ @tasks = tasks
11
+ @args = args
12
+ @opts = opts
13
+ end
14
+
15
+ def perform
16
+ if opts[:preserve]
17
+ raise Error, "Cannot use --preserve with --schema-first or --schema-only"
18
+ end
19
+
20
+ show_spinner = output.tty? && !opts[:debug]
21
+
22
+ if show_spinner
23
+ spinner = TTY::Spinner.new(":spinner Syncing schema", format: :dots)
24
+ spinner.auto_spin
25
+ end
26
+
27
+ # if spinner, capture lines to show on error
28
+ lines = []
29
+ success =
30
+ run_command("#{dump_command} | #{restore_command}") do |line|
31
+ if show_spinner
32
+ lines << line
33
+ else
34
+ log line
35
+ end
36
+ end
37
+
38
+ if show_spinner
39
+ if success
40
+ spinner.success
41
+ else
42
+ spinner.error
43
+ log lines.join
44
+ end
45
+ end
46
+
47
+ raise Error, "Schema sync returned non-zero exit code" unless success
48
+ end
49
+
50
+ private
51
+
52
+ def run_command(command)
53
+ Open3.popen2e(command) do |stdin, stdout, wait_thr|
54
+ stdout.each do |line|
55
+ yield line
56
+ end
57
+ wait_thr.value.success?
58
+ end
59
+ end
60
+
61
+ def pg_restore_version
62
+ `pg_restore --version`.lines[0].chomp.split(" ")[-1].split(/[^\d.]/)[0]
63
+ rescue Errno::ENOENT
64
+ raise Error, "pg_restore not found"
65
+ end
66
+
67
+ def dump_command
68
+ tables =
69
+ if !opts[:all_schemas] || opts[:tables] || opts[:groups] || args[0] || opts[:exclude] || opts[:schemas]
70
+ @tasks.map { |task| "-t #{Shellwords.escape(task.quoted_table)}" }
71
+ else
72
+ []
73
+ end
74
+
75
+ "pg_dump -Fc --verbose --schema-only --no-owner --no-acl #{tables.join(" ")} -d #{@source.url}"
76
+ end
77
+
78
+ def restore_command
79
+ if_exists = Gem::Version.new(pg_restore_version) >= Gem::Version.new("9.4.0")
80
+ "pg_restore --verbose --no-owner --no-acl --clean #{if_exists ? "--if-exists" : nil} -d #{@destination.url}"
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,162 @@
1
+ module PgSync
2
+ class Sync
3
+ include Utils
4
+
5
+ def initialize(arguments, options)
6
+ @arguments = arguments
7
+ @options = options
8
+ end
9
+
10
+ def perform
11
+ started_at = Time.now
12
+
13
+ args = @arguments
14
+ opts = @options
15
+
16
+ # only resolve commands from config, not CLI arguments
17
+ [:to, :from].each do |opt|
18
+ opts[opt] ||= resolve_source(config[opt.to_s])
19
+ end
20
+
21
+ # merge other config
22
+ [:to_safe, :exclude, :schemas].each do |opt|
23
+ opts[opt] ||= config[opt.to_s]
24
+ end
25
+
26
+ if args.size > 2
27
+ raise Error, "Usage:\n pgsync [options]"
28
+ end
29
+
30
+ raise Error, "No source" unless source.exists?
31
+ raise Error, "No destination" unless destination.exists?
32
+
33
+ unless opts[:to_safe] || destination.local?
34
+ raise Error, "Danger! Add `to_safe: true` to `.pgsync.yml` if the destination is not localhost or 127.0.0.1"
35
+ end
36
+
37
+ if (opts[:preserve] || opts[:overwrite]) && destination.server_version_num < 90500
38
+ raise Error, "Postgres 9.5+ is required for --preserve and --overwrite"
39
+ end
40
+
41
+ print_description("From", source)
42
+ print_description("To", destination)
43
+
44
+ resolver = TaskResolver.new(args: args, opts: opts, source: source, destination: destination, config: config, first_schema: first_schema)
45
+ tasks =
46
+ resolver.tasks.map do |task|
47
+ Task.new(source: source, destination: destination, config: config, table: task[:table], opts: opts.merge(sql: task[:sql]))
48
+ end
49
+
50
+ if opts[:in_batches] && tasks.size > 1
51
+ raise Error, "Cannot use --in-batches with multiple tables"
52
+ end
53
+
54
+ confirm_tables_exist(source, tasks, "source")
55
+
56
+ if opts[:list]
57
+ confirm_tables_exist(destination, tasks, "destination")
58
+ tasks.each do |task|
59
+ log task_name(task)
60
+ end
61
+ else
62
+ if opts[:schema_first] || opts[:schema_only]
63
+ SchemaSync.new(source: source, destination: destination, tasks: tasks, args: args, opts: opts).perform
64
+ end
65
+
66
+ unless opts[:schema_only]
67
+ TableSync.new(source: source, destination: destination, tasks: tasks, opts: opts, resolver: resolver).perform
68
+ end
69
+
70
+ log_completed(started_at)
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ def config
77
+ @config ||= begin
78
+ file = config_file
79
+ if file
80
+ begin
81
+ YAML.load_file(file) || {}
82
+ rescue Psych::SyntaxError => e
83
+ raise Error, e.message
84
+ rescue Errno::ENOENT
85
+ raise Error, "Config file not found: #{file}"
86
+ end
87
+ else
88
+ {}
89
+ end
90
+ end
91
+ end
92
+
93
+ def config_file
94
+ if @options[:config]
95
+ @options[:config]
96
+ elsif @options[:db]
97
+ file = db_config_file(@options[:db])
98
+ search_tree(file) || file
99
+ else
100
+ search_tree(".pgsync.yml")
101
+ end
102
+ end
103
+
104
+ def search_tree(file)
105
+ return file if File.exist?(file)
106
+
107
+ path = Dir.pwd
108
+ # prevent infinite loop
109
+ 20.times do
110
+ absolute_file = File.join(path, file)
111
+ break absolute_file if File.exist?(absolute_file)
112
+ path = File.dirname(path)
113
+ break if path == "/"
114
+ end
115
+ end
116
+
117
+ def print_description(prefix, source)
118
+ location = " on #{source.host}:#{source.port}" if source.host
119
+ log "#{prefix}: #{source.dbname}#{location}"
120
+ end
121
+
122
+ def log_completed(started_at)
123
+ time = Time.now - started_at
124
+ message = "Completed in #{time.round(1)}s"
125
+ log colorize(message, :green)
126
+ end
127
+
128
+ def source
129
+ @source ||= data_source(@options[:from])
130
+ end
131
+
132
+ def destination
133
+ @destination ||= data_source(@options[:to])
134
+ end
135
+
136
+ def data_source(url)
137
+ ds = DataSource.new(url)
138
+ ObjectSpace.define_finalizer(self, self.class.finalize(ds))
139
+ ds
140
+ end
141
+
142
+ def resolve_source(source)
143
+ if source
144
+ source = source.dup
145
+ source.gsub!(/\$\([^)]+\)/) do |m|
146
+ command = m[2..-2]
147
+ result = `#{command}`.chomp
148
+ unless $?.success?
149
+ raise Error, "Command exited with non-zero status:\n#{command}"
150
+ end
151
+ result
152
+ end
153
+ end
154
+ source
155
+ end
156
+
157
+ def self.finalize(ds)
158
+ # must use proc instead of stabby lambda
159
+ proc { ds.close }
160
+ end
161
+ end
162
+ end