pgsync 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -0
- data/LICENSE.txt +1 -1
- data/README.md +84 -41
- data/config.yml +5 -4
- data/exe/pgsync +3 -11
- data/lib/pgsync.rb +8 -5
- data/lib/pgsync/client.rb +60 -332
- data/lib/pgsync/data_source.rb +78 -77
- data/lib/pgsync/init.rb +61 -0
- data/lib/pgsync/schema_sync.rb +83 -0
- data/lib/pgsync/sync.rb +162 -0
- data/lib/pgsync/table.rb +28 -0
- data/lib/pgsync/table_sync.rb +168 -208
- data/lib/pgsync/task.rb +315 -0
- data/lib/pgsync/task_resolver.rb +235 -0
- data/lib/pgsync/utils.rb +86 -0
- data/lib/pgsync/version.rb +1 -1
- metadata +11 -5
- data/lib/pgsync/table_list.rb +0 -107
data/lib/pgsync/data_source.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
module PgSync
|
2
2
|
class DataSource
|
3
|
+
include Utils
|
4
|
+
|
3
5
|
attr_reader :url
|
4
6
|
|
5
|
-
def initialize(
|
6
|
-
@url =
|
7
|
-
@timeout = timeout
|
7
|
+
def initialize(url)
|
8
|
+
@url = url
|
8
9
|
end
|
9
10
|
|
10
11
|
def exists?
|
@@ -30,8 +31,18 @@ module PgSync
|
|
30
31
|
# gets visible tables
|
31
32
|
def tables
|
32
33
|
@tables ||= begin
|
33
|
-
query =
|
34
|
-
|
34
|
+
query = <<~SQL
|
35
|
+
SELECT
|
36
|
+
table_schema AS schema,
|
37
|
+
table_name AS table
|
38
|
+
FROM
|
39
|
+
information_schema.tables
|
40
|
+
WHERE
|
41
|
+
table_type = 'BASE TABLE' AND
|
42
|
+
table_schema NOT IN ('information_schema', 'pg_catalog')
|
43
|
+
ORDER BY 1, 2
|
44
|
+
SQL
|
45
|
+
execute(query).map { |row| Table.new(row["schema"], row["table"]) }
|
35
46
|
end
|
36
47
|
end
|
37
48
|
|
@@ -39,25 +50,21 @@ module PgSync
|
|
39
50
|
table_set.include?(table)
|
40
51
|
end
|
41
52
|
|
42
|
-
def columns(table)
|
43
|
-
query = "SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2"
|
44
|
-
execute(query, table.split(".", 2)).map { |row| row["column_name"] }
|
45
|
-
end
|
46
|
-
|
47
53
|
def sequences(table, columns)
|
48
|
-
execute("SELECT #{columns.map { |f| "pg_get_serial_sequence(#{escape("#{quote_ident_full(table)}")}, #{escape(f)}) AS #{quote_ident(f)}" }.join(", ")}")
|
54
|
+
execute("SELECT #{columns.map { |f| "pg_get_serial_sequence(#{escape("#{quote_ident_full(table)}")}, #{escape(f)}) AS #{quote_ident(f)}" }.join(", ")}").first.values.compact
|
49
55
|
end
|
50
56
|
|
51
57
|
def max_id(table, primary_key, sql_clause = nil)
|
52
|
-
execute("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}")[
|
58
|
+
execute("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["max"].to_i
|
53
59
|
end
|
54
60
|
|
55
61
|
def min_id(table, primary_key, sql_clause = nil)
|
56
|
-
execute("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}")[
|
62
|
+
execute("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["min"].to_i
|
57
63
|
end
|
58
64
|
|
65
|
+
# this value comes from pg_get_serial_sequence which is already quoted
|
59
66
|
def last_value(seq)
|
60
|
-
execute("
|
67
|
+
execute("SELECT last_value FROM #{seq}").first["last_value"]
|
61
68
|
end
|
62
69
|
|
63
70
|
def truncate(table)
|
@@ -65,40 +72,57 @@ module PgSync
|
|
65
72
|
end
|
66
73
|
|
67
74
|
# https://stackoverflow.com/a/20537829
|
75
|
+
# TODO can simplify with array_position in Postgres 9.5+
|
68
76
|
def primary_key(table)
|
69
|
-
query =
|
77
|
+
query = <<~SQL
|
70
78
|
SELECT
|
71
79
|
pg_attribute.attname,
|
72
|
-
format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
|
80
|
+
format_type(pg_attribute.atttypid, pg_attribute.atttypmod),
|
81
|
+
pg_attribute.attnum,
|
82
|
+
pg_index.indkey
|
73
83
|
FROM
|
74
84
|
pg_index, pg_class, pg_attribute, pg_namespace
|
75
85
|
WHERE
|
76
|
-
pg_class.oid = $2::regclass AND
|
77
|
-
indrelid = pg_class.oid AND
|
78
86
|
nspname = $1 AND
|
87
|
+
relname = $2 AND
|
88
|
+
indrelid = pg_class.oid AND
|
79
89
|
pg_class.relnamespace = pg_namespace.oid AND
|
80
90
|
pg_attribute.attrelid = pg_class.oid AND
|
81
91
|
pg_attribute.attnum = any(pg_index.indkey) AND
|
82
92
|
indisprimary
|
83
93
|
SQL
|
84
|
-
|
85
|
-
|
94
|
+
rows = execute(query, [table.schema, table.name])
|
95
|
+
rows.sort_by { |r| r["indkey"].split(" ").index(r["attnum"]) }.map { |r| r["attname"] }
|
96
|
+
end
|
97
|
+
|
98
|
+
def triggers(table)
|
99
|
+
query = <<~SQL
|
100
|
+
SELECT
|
101
|
+
tgname AS name,
|
102
|
+
tgisinternal AS internal,
|
103
|
+
tgenabled != 'D' AS enabled,
|
104
|
+
tgconstraint != 0 AS integrity
|
105
|
+
FROM
|
106
|
+
pg_trigger
|
107
|
+
WHERE
|
108
|
+
pg_trigger.tgrelid = $1::regclass
|
109
|
+
SQL
|
110
|
+
execute(query, [quote_ident_full(table)])
|
86
111
|
end
|
87
112
|
|
88
113
|
def conn
|
89
114
|
@conn ||= begin
|
90
115
|
begin
|
91
|
-
ENV["PGCONNECT_TIMEOUT"] ||=
|
116
|
+
ENV["PGCONNECT_TIMEOUT"] ||= "3"
|
92
117
|
if @url =~ /\Apostgres(ql)?:\/\//
|
93
118
|
config = @url
|
94
119
|
else
|
95
120
|
config = {dbname: @url}
|
96
121
|
end
|
122
|
+
@concurrent_id = concurrent_id
|
97
123
|
PG::Connection.new(config)
|
98
|
-
rescue PG::ConnectionBad => e
|
99
|
-
raise PgSync::Error, e.message
|
100
124
|
rescue URI::InvalidURIError
|
101
|
-
raise
|
125
|
+
raise Error, "Invalid connection string. Make sure it works with `psql`"
|
102
126
|
end
|
103
127
|
end
|
104
128
|
end
|
@@ -110,79 +134,56 @@ module PgSync
|
|
110
134
|
end
|
111
135
|
end
|
112
136
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
end
|
117
|
-
|
118
|
-
def restore_command
|
119
|
-
psql_version = `psql --version`.lines[0].chomp.split(" ")[-1].split(/[^\d.]/)[0]
|
120
|
-
if_exists = Gem::Version.new(psql_version) >= Gem::Version.new("9.4.0")
|
121
|
-
"pg_restore --verbose --no-owner --no-acl --clean #{if_exists ? "--if-exists" : nil} -d #{@url}"
|
122
|
-
end
|
123
|
-
|
124
|
-
def fully_resolve_tables(tables)
|
125
|
-
no_schema_tables = {}
|
126
|
-
search_path_index = Hash[search_path.map.with_index.to_a]
|
127
|
-
self.tables.group_by { |t| t.split(".", 2)[-1] }.each do |group, t2|
|
128
|
-
no_schema_tables[group] = t2.sort_by { |t| [search_path_index[t.split(".", 2)[0]] || 1000000, t] }[0]
|
129
|
-
end
|
130
|
-
|
131
|
-
Hash[tables.map { |k, v| [no_schema_tables[k] || k, v] }]
|
137
|
+
# reconnect for new thread or process
|
138
|
+
def reconnect_if_needed
|
139
|
+
reconnect if @concurrent_id != concurrent_id
|
132
140
|
end
|
133
141
|
|
134
142
|
def search_path
|
135
|
-
@search_path ||= execute("SELECT current_schemas(true)")[
|
143
|
+
@search_path ||= execute("SELECT unnest(current_schemas(true)) AS schema").map { |r| r["schema"] }
|
136
144
|
end
|
137
145
|
|
138
|
-
|
139
|
-
|
140
|
-
def table_set
|
141
|
-
@table_set ||= Set.new(tables)
|
146
|
+
def server_version_num
|
147
|
+
@server_version_num ||= execute("SHOW server_version_num").first["server_version_num"].to_i
|
142
148
|
end
|
143
149
|
|
144
|
-
def
|
145
|
-
|
150
|
+
def execute(query, params = [])
|
151
|
+
conn.exec_params(query, params).to_a
|
146
152
|
end
|
147
153
|
|
148
|
-
def
|
149
|
-
|
154
|
+
def transaction
|
155
|
+
if conn.transaction_status == 0
|
156
|
+
# not currently in transaction
|
157
|
+
conn.transaction do
|
158
|
+
yield
|
159
|
+
end
|
160
|
+
else
|
161
|
+
yield
|
162
|
+
end
|
150
163
|
end
|
151
164
|
|
152
|
-
|
153
|
-
conn.exec_params(query, params).to_a
|
154
|
-
end
|
165
|
+
private
|
155
166
|
|
156
|
-
def
|
157
|
-
|
167
|
+
def concurrent_id
|
168
|
+
[Process.pid, Thread.current.object_id]
|
158
169
|
end
|
159
170
|
|
160
|
-
def
|
161
|
-
|
162
|
-
|
163
|
-
else
|
164
|
-
value
|
165
|
-
end
|
171
|
+
def reconnect
|
172
|
+
@conn.reset
|
173
|
+
@concurrent_id = concurrent_id
|
166
174
|
end
|
167
175
|
|
168
|
-
|
169
|
-
|
170
|
-
s.gsub(/\\/, '\&\&').gsub(/'/, "''")
|
176
|
+
def table_set
|
177
|
+
@table_set ||= Set.new(tables)
|
171
178
|
end
|
172
179
|
|
173
|
-
def
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
command = m[2..-2]
|
178
|
-
result = `#{command}`.chomp
|
179
|
-
unless $?.success?
|
180
|
-
raise PgSync::Error, "Command exited with non-zero status:\n#{command}"
|
181
|
-
end
|
182
|
-
result
|
180
|
+
def conninfo
|
181
|
+
@conninfo ||= begin
|
182
|
+
unless conn.respond_to?(:conninfo_hash)
|
183
|
+
raise Error, "libpq is too old. Upgrade it and run `gem install pg`"
|
183
184
|
end
|
185
|
+
conn.conninfo_hash
|
184
186
|
end
|
185
|
-
source
|
186
187
|
end
|
187
188
|
end
|
188
189
|
end
|
data/lib/pgsync/init.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
module PgSync
|
2
|
+
class Init
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
def initialize(arguments, options)
|
6
|
+
@arguments = arguments
|
7
|
+
@options = options
|
8
|
+
end
|
9
|
+
|
10
|
+
def perform
|
11
|
+
if @arguments.size > 1
|
12
|
+
raise Error, "Usage:\n pgsync --init [db]"
|
13
|
+
end
|
14
|
+
|
15
|
+
file =
|
16
|
+
if @options[:config]
|
17
|
+
@options[:config]
|
18
|
+
elsif @arguments.any?
|
19
|
+
db_config_file(@arguments.first)
|
20
|
+
elsif @options[:db]
|
21
|
+
db_config_file(@options[:db])
|
22
|
+
else
|
23
|
+
".pgsync.yml"
|
24
|
+
end
|
25
|
+
|
26
|
+
if File.exist?(file)
|
27
|
+
raise Error, "#{file} exists."
|
28
|
+
else
|
29
|
+
exclude =
|
30
|
+
if rails?
|
31
|
+
<<~EOS
|
32
|
+
exclude:
|
33
|
+
- schema_migrations
|
34
|
+
- ar_internal_metadata
|
35
|
+
EOS
|
36
|
+
else
|
37
|
+
<<~EOS
|
38
|
+
# exclude:
|
39
|
+
# - table1
|
40
|
+
# - table2
|
41
|
+
EOS
|
42
|
+
end
|
43
|
+
|
44
|
+
# create file
|
45
|
+
contents = File.read(__dir__ + "/../../config.yml")
|
46
|
+
contents.sub!("$(some_command)", "$(heroku config:get DATABASE_URL)") if heroku?
|
47
|
+
File.write(file, contents % {exclude: exclude})
|
48
|
+
|
49
|
+
log "#{file} created. Add your database credentials."
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def heroku?
|
54
|
+
`git remote -v 2>&1`.include?("git.heroku.com") rescue false
|
55
|
+
end
|
56
|
+
|
57
|
+
def rails?
|
58
|
+
File.exist?("bin/rails")
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module PgSync
|
2
|
+
class SchemaSync
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
attr_reader :args, :opts
|
6
|
+
|
7
|
+
def initialize(source:, destination:, tasks:, args:, opts:)
|
8
|
+
@source = source
|
9
|
+
@destination = destination
|
10
|
+
@tasks = tasks
|
11
|
+
@args = args
|
12
|
+
@opts = opts
|
13
|
+
end
|
14
|
+
|
15
|
+
def perform
|
16
|
+
if opts[:preserve]
|
17
|
+
raise Error, "Cannot use --preserve with --schema-first or --schema-only"
|
18
|
+
end
|
19
|
+
|
20
|
+
show_spinner = output.tty? && !opts[:debug]
|
21
|
+
|
22
|
+
if show_spinner
|
23
|
+
spinner = TTY::Spinner.new(":spinner Syncing schema", format: :dots)
|
24
|
+
spinner.auto_spin
|
25
|
+
end
|
26
|
+
|
27
|
+
# if spinner, capture lines to show on error
|
28
|
+
lines = []
|
29
|
+
success =
|
30
|
+
run_command("#{dump_command} | #{restore_command}") do |line|
|
31
|
+
if show_spinner
|
32
|
+
lines << line
|
33
|
+
else
|
34
|
+
log line
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
if show_spinner
|
39
|
+
if success
|
40
|
+
spinner.success
|
41
|
+
else
|
42
|
+
spinner.error
|
43
|
+
log lines.join
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
raise Error, "Schema sync returned non-zero exit code" unless success
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def run_command(command)
|
53
|
+
Open3.popen2e(command) do |stdin, stdout, wait_thr|
|
54
|
+
stdout.each do |line|
|
55
|
+
yield line
|
56
|
+
end
|
57
|
+
wait_thr.value.success?
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def pg_restore_version
|
62
|
+
`pg_restore --version`.lines[0].chomp.split(" ")[-1].split(/[^\d.]/)[0]
|
63
|
+
rescue Errno::ENOENT
|
64
|
+
raise Error, "pg_restore not found"
|
65
|
+
end
|
66
|
+
|
67
|
+
def dump_command
|
68
|
+
tables =
|
69
|
+
if !opts[:all_schemas] || opts[:tables] || opts[:groups] || args[0] || opts[:exclude] || opts[:schemas]
|
70
|
+
@tasks.map { |task| "-t #{Shellwords.escape(task.quoted_table)}" }
|
71
|
+
else
|
72
|
+
[]
|
73
|
+
end
|
74
|
+
|
75
|
+
"pg_dump -Fc --verbose --schema-only --no-owner --no-acl #{tables.join(" ")} -d #{@source.url}"
|
76
|
+
end
|
77
|
+
|
78
|
+
def restore_command
|
79
|
+
if_exists = Gem::Version.new(pg_restore_version) >= Gem::Version.new("9.4.0")
|
80
|
+
"pg_restore --verbose --no-owner --no-acl --clean #{if_exists ? "--if-exists" : nil} -d #{@destination.url}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/lib/pgsync/sync.rb
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
module PgSync
|
2
|
+
class Sync
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
def initialize(arguments, options)
|
6
|
+
@arguments = arguments
|
7
|
+
@options = options
|
8
|
+
end
|
9
|
+
|
10
|
+
def perform
|
11
|
+
started_at = Time.now
|
12
|
+
|
13
|
+
args = @arguments
|
14
|
+
opts = @options
|
15
|
+
|
16
|
+
# only resolve commands from config, not CLI arguments
|
17
|
+
[:to, :from].each do |opt|
|
18
|
+
opts[opt] ||= resolve_source(config[opt.to_s])
|
19
|
+
end
|
20
|
+
|
21
|
+
# merge other config
|
22
|
+
[:to_safe, :exclude, :schemas].each do |opt|
|
23
|
+
opts[opt] ||= config[opt.to_s]
|
24
|
+
end
|
25
|
+
|
26
|
+
if args.size > 2
|
27
|
+
raise Error, "Usage:\n pgsync [options]"
|
28
|
+
end
|
29
|
+
|
30
|
+
raise Error, "No source" unless source.exists?
|
31
|
+
raise Error, "No destination" unless destination.exists?
|
32
|
+
|
33
|
+
unless opts[:to_safe] || destination.local?
|
34
|
+
raise Error, "Danger! Add `to_safe: true` to `.pgsync.yml` if the destination is not localhost or 127.0.0.1"
|
35
|
+
end
|
36
|
+
|
37
|
+
if (opts[:preserve] || opts[:overwrite]) && destination.server_version_num < 90500
|
38
|
+
raise Error, "Postgres 9.5+ is required for --preserve and --overwrite"
|
39
|
+
end
|
40
|
+
|
41
|
+
print_description("From", source)
|
42
|
+
print_description("To", destination)
|
43
|
+
|
44
|
+
resolver = TaskResolver.new(args: args, opts: opts, source: source, destination: destination, config: config, first_schema: first_schema)
|
45
|
+
tasks =
|
46
|
+
resolver.tasks.map do |task|
|
47
|
+
Task.new(source: source, destination: destination, config: config, table: task[:table], opts: opts.merge(sql: task[:sql]))
|
48
|
+
end
|
49
|
+
|
50
|
+
if opts[:in_batches] && tasks.size > 1
|
51
|
+
raise Error, "Cannot use --in-batches with multiple tables"
|
52
|
+
end
|
53
|
+
|
54
|
+
confirm_tables_exist(source, tasks, "source")
|
55
|
+
|
56
|
+
if opts[:list]
|
57
|
+
confirm_tables_exist(destination, tasks, "destination")
|
58
|
+
tasks.each do |task|
|
59
|
+
log task_name(task)
|
60
|
+
end
|
61
|
+
else
|
62
|
+
if opts[:schema_first] || opts[:schema_only]
|
63
|
+
SchemaSync.new(source: source, destination: destination, tasks: tasks, args: args, opts: opts).perform
|
64
|
+
end
|
65
|
+
|
66
|
+
unless opts[:schema_only]
|
67
|
+
TableSync.new(source: source, destination: destination, tasks: tasks, opts: opts, resolver: resolver).perform
|
68
|
+
end
|
69
|
+
|
70
|
+
log_completed(started_at)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def config
|
77
|
+
@config ||= begin
|
78
|
+
file = config_file
|
79
|
+
if file
|
80
|
+
begin
|
81
|
+
YAML.load_file(file) || {}
|
82
|
+
rescue Psych::SyntaxError => e
|
83
|
+
raise Error, e.message
|
84
|
+
rescue Errno::ENOENT
|
85
|
+
raise Error, "Config file not found: #{file}"
|
86
|
+
end
|
87
|
+
else
|
88
|
+
{}
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def config_file
|
94
|
+
if @options[:config]
|
95
|
+
@options[:config]
|
96
|
+
elsif @options[:db]
|
97
|
+
file = db_config_file(@options[:db])
|
98
|
+
search_tree(file) || file
|
99
|
+
else
|
100
|
+
search_tree(".pgsync.yml")
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def search_tree(file)
|
105
|
+
return file if File.exist?(file)
|
106
|
+
|
107
|
+
path = Dir.pwd
|
108
|
+
# prevent infinite loop
|
109
|
+
20.times do
|
110
|
+
absolute_file = File.join(path, file)
|
111
|
+
break absolute_file if File.exist?(absolute_file)
|
112
|
+
path = File.dirname(path)
|
113
|
+
break if path == "/"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def print_description(prefix, source)
|
118
|
+
location = " on #{source.host}:#{source.port}" if source.host
|
119
|
+
log "#{prefix}: #{source.dbname}#{location}"
|
120
|
+
end
|
121
|
+
|
122
|
+
def log_completed(started_at)
|
123
|
+
time = Time.now - started_at
|
124
|
+
message = "Completed in #{time.round(1)}s"
|
125
|
+
log colorize(message, :green)
|
126
|
+
end
|
127
|
+
|
128
|
+
def source
|
129
|
+
@source ||= data_source(@options[:from])
|
130
|
+
end
|
131
|
+
|
132
|
+
def destination
|
133
|
+
@destination ||= data_source(@options[:to])
|
134
|
+
end
|
135
|
+
|
136
|
+
def data_source(url)
|
137
|
+
ds = DataSource.new(url)
|
138
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(ds))
|
139
|
+
ds
|
140
|
+
end
|
141
|
+
|
142
|
+
def resolve_source(source)
|
143
|
+
if source
|
144
|
+
source = source.dup
|
145
|
+
source.gsub!(/\$\([^)]+\)/) do |m|
|
146
|
+
command = m[2..-2]
|
147
|
+
result = `#{command}`.chomp
|
148
|
+
unless $?.success?
|
149
|
+
raise Error, "Command exited with non-zero status:\n#{command}"
|
150
|
+
end
|
151
|
+
result
|
152
|
+
end
|
153
|
+
end
|
154
|
+
source
|
155
|
+
end
|
156
|
+
|
157
|
+
def self.finalize(ds)
|
158
|
+
# must use proc instead of stabby lambda
|
159
|
+
proc { ds.close }
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|