pgsync 0.3.8 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pgsync might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 50eb1d7496a1a6025abd64cb19087781c87deed7
4
- data.tar.gz: 6f5c579ff553997d664bc142f91524b2263bf622
3
+ metadata.gz: f8d5362420e8f65878dcb39bfcb945db069d585f
4
+ data.tar.gz: a0d4d2b74ca25148f6ddcb2b4bdfa0bea7ef2c99
5
5
  SHA512:
6
- metadata.gz: c814c85dc2e5d4f9eab683954b0a9473d693e4fa0299b52c75e1525f380380630956d2c50bb0a3212e62eb053bfe4391a108bcb97b4ae51fc3e275b0f1258df7
7
- data.tar.gz: 482f1923f915f5123f334b92d0ce2a3444892ed314994aec73b5b8c551ead29dc365e21275b3ec257d48f5c1d4aa0b922db1bcb11f0ccea6478bfeae8ce631bc
6
+ metadata.gz: ed08a53660ea4973932176da8ea2bb1354f457c67dc9dd9cff832a67eb2b785d399f84a1840e21485579c0d595e81657303213d8831ae8ae72cba6cede713dd2
7
+ data.tar.gz: 4f56fdbec5b5b93b4ff6ffe781edba94076107203e5f620a5708c3487d8a1cd77e8388a06cf0c9ddbd403afefa06832dd648382dfb3dadde9fc857b83ddea089
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ # 0.3.9
2
+
3
+ - Better support for schemas
4
+ - Added `--schemas` option
5
+ - Added `--all-schemas` option
6
+ - Added `--schema-first` option
7
+ - Fixed issue with non-lowercase tables and partial syncs
8
+
1
9
  # 0.3.8
2
10
 
3
11
  - Added Windows support
data/README.md CHANGED
@@ -122,16 +122,24 @@ pgsync product:123
122
122
 
123
123
  ### Schema
124
124
 
125
- Sync schema
125
+ Sync schema before the data
126
126
 
127
127
  ```sh
128
- pgsync --schema-only
128
+ pgsync --schema-first
129
129
  ```
130
130
 
131
+ **Note:** This wipes out existing data
132
+
131
133
  Specify tables
132
134
 
133
135
  ```sh
134
- pgsync table1,table2 --schema-only
136
+ pgsync table1,table2 --schema-first
137
+ ```
138
+
139
+ Or just the schema
140
+
141
+ ```sh
142
+ pgsync --schema-only
135
143
  ```
136
144
 
137
145
  ## Sensitive Information
@@ -161,6 +169,7 @@ Options for replacement are:
161
169
  - statement
162
170
  - unique_email
163
171
  - unique_phone
172
+ - unique_secret
164
173
  - random_letter
165
174
  - random_int
166
175
  - random_date
@@ -246,7 +255,7 @@ To use master, run:
246
255
 
247
256
  ```sh
248
257
  gem install specific_install
249
- gem specific_install ankane/pgsync
258
+ gem specific_install https://github.com/ankane/pgsync.git
250
259
  ```
251
260
 
252
261
  ## Thanks
@@ -0,0 +1,280 @@
1
+ module PgSync
2
+ class Client
3
+ def initialize(args)
4
+ $stdout.sync = true
5
+ @exit = false
6
+ @arguments, @options = parse_args(args)
7
+ @mutex = windows? ? Mutex.new : MultiProcessing::Mutex.new
8
+ end
9
+
10
+ # TODO clean up this mess
11
+ def perform
12
+ return if @exit
13
+
14
+ args, opts = @arguments, @options
15
+ [:to, :from, :to_safe, :exclude].each do |opt|
16
+ opts[opt] ||= config[opt.to_s]
17
+ end
18
+ map_deprecations(args, opts)
19
+
20
+ if opts[:setup]
21
+ setup(db_config_file(args[0]) || config_file || ".pgsync.yml")
22
+ else
23
+ sync(args, opts)
24
+ end
25
+
26
+ true
27
+ end
28
+
29
+ protected
30
+
31
+ def sync(args, opts)
32
+ start_time = Time.now
33
+
34
+ if args.size > 2
35
+ raise PgSync::Error, "Usage:\n pgsync [options]"
36
+ end
37
+
38
+ source = DataSource.new(opts[:from])
39
+ raise PgSync::Error, "No source" unless source.exists?
40
+
41
+ destination = DataSource.new(opts[:to])
42
+ raise PgSync::Error, "No destination" unless destination.exists?
43
+
44
+ unless opts[:to_safe] || destination.local?
45
+ raise PgSync::Error, "Danger! Add `to_safe: true` to `.pgsync.yml` if the destination is not localhost or 127.0.0.1"
46
+ end
47
+
48
+ print_description("From", source)
49
+ print_description("To", destination)
50
+
51
+ tables = nil
52
+ begin
53
+ tables = TableList.new(args, opts, source, config).tables
54
+ ensure
55
+ source.close
56
+ end
57
+
58
+ confirm_tables_exist(source, tables, "source")
59
+
60
+ if opts[:list]
61
+ confirm_tables_exist(destination, tables, "destination")
62
+
63
+ list_items =
64
+ if args[0] == "groups"
65
+ (config["groups"] || {}).keys
66
+ else
67
+ tables.keys
68
+ end
69
+
70
+ pretty_list list_items
71
+ else
72
+ if opts[:schema_first] || opts[:schema_only]
73
+ if opts[:preserve]
74
+ raise PgSync::Error, "Cannot use --preserve with --schema-first or --schema-only"
75
+ end
76
+
77
+ log "* Dumping schema"
78
+ sync_schema(source, destination, tables)
79
+ end
80
+
81
+ unless opts[:schema_only]
82
+ confirm_tables_exist(destination, tables, "destination")
83
+
84
+ in_parallel(tables) do |table, table_opts|
85
+ TableSync.new.sync_with_benchmark(@mutex, config, table, opts.merge(table_opts), source.url, destination.url)
86
+ end
87
+ end
88
+
89
+ log_completed(start_time)
90
+ end
91
+ end
92
+
93
+ def confirm_tables_exist(destination, tables, description)
94
+ tables.keys.each do |table|
95
+ unless destination.table_exists?(table)
96
+ raise PgSync::Error, "Table does not exist in #{description}: #{table}"
97
+ end
98
+ end
99
+ ensure
100
+ destination.close
101
+ end
102
+
103
+ def map_deprecations(args, opts)
104
+ command = args[0]
105
+
106
+ case command
107
+ when "setup"
108
+ args.shift
109
+ opts[:setup] = true
110
+ deprecated "Use `psync --setup` instead"
111
+ when "schema"
112
+ args.shift
113
+ opts[:schema_only] = true
114
+ deprecated "Use `psync --schema-only` instead"
115
+ when "tables"
116
+ args.shift
117
+ opts[:tables] = args.shift
118
+ deprecated "Use `pgsync #{opts[:tables]}` instead"
119
+ when "groups"
120
+ args.shift
121
+ opts[:groups] = args.shift
122
+ deprecated "Use `pgsync #{opts[:groups]}` instead"
123
+ end
124
+
125
+ if opts[:where]
126
+ opts[:sql] ||= String.new
127
+ opts[:sql] << " WHERE #{opts[:where]}"
128
+ deprecated "Use `\"WHERE #{opts[:where]}\"` instead"
129
+ end
130
+
131
+ if opts[:limit]
132
+ opts[:sql] ||= String.new
133
+ opts[:sql] << " LIMIT #{opts[:limit]}"
134
+ deprecated "Use `\"LIMIT #{opts[:limit]}\"` instead"
135
+ end
136
+ end
137
+
138
+ def sync_schema(source, destination, tables)
139
+ dump_command = source.dump_command(tables)
140
+ restore_command = destination.restore_command
141
+ system("#{dump_command} | #{restore_command}")
142
+ end
143
+
144
+ def parse_args(args)
145
+ opts = Slop.parse(args) do |o|
146
+ o.banner = %{Usage:
147
+ pgsync [options]
148
+
149
+ Options:}
150
+ o.string "-d", "--db", "database"
151
+ o.string "-t", "--tables", "tables to sync"
152
+ o.string "-g", "--groups", "groups to sync"
153
+ o.string "--schemas", "schemas to sync"
154
+ o.string "--from", "source"
155
+ o.string "--to", "destination"
156
+ o.string "--where", "where", help: false
157
+ o.integer "--limit", "limit", help: false
158
+ o.string "--exclude", "exclude tables"
159
+ o.string "--config", "config file"
160
+ # TODO much better name for this option
161
+ o.boolean "--to-safe", "accept danger", default: false
162
+ o.boolean "--debug", "debug", default: false
163
+ o.boolean "--list", "list", default: false
164
+ o.boolean "--overwrite", "overwrite existing rows", default: false, help: false
165
+ o.boolean "--preserve", "preserve existing rows", default: false
166
+ o.boolean "--truncate", "truncate existing rows", default: false
167
+ o.boolean "--schema-first", "schema first", default: false
168
+ o.boolean "--schema-only", "schema only", default: false
169
+ o.boolean "--all-schemas", "all schemas", default: false
170
+ o.boolean "--no-rules", "do not apply data rules", default: false
171
+ o.boolean "--setup", "setup", default: false
172
+ o.boolean "--in-batches", "in batches", default: false, help: false
173
+ o.integer "--batch-size", "batch size", default: 10000, help: false
174
+ o.float "--sleep", "sleep", default: 0, help: false
175
+ o.on "-v", "--version", "print the version" do
176
+ log PgSync::VERSION
177
+ @exit = true
178
+ end
179
+ o.on "-h", "--help", "prints help" do
180
+ log o
181
+ @exit = true
182
+ end
183
+ end
184
+ [opts.arguments, opts.to_hash]
185
+ rescue Slop::Error => e
186
+ raise PgSync::Error, e.message
187
+ end
188
+
189
+ def config
190
+ @config ||= begin
191
+ if config_file
192
+ begin
193
+ YAML.load_file(config_file) || {}
194
+ rescue Psych::SyntaxError => e
195
+ raise PgSync::Error, e.message
196
+ end
197
+ else
198
+ {}
199
+ end
200
+ end
201
+ end
202
+
203
+ def setup(config_file)
204
+ if File.exist?(config_file)
205
+ raise PgSync::Error, "#{config_file} exists."
206
+ else
207
+ FileUtils.cp(File.dirname(__FILE__) + "/../../config.yml", config_file)
208
+ log "#{config_file} created. Add your database credentials."
209
+ end
210
+ end
211
+
212
+ def db_config_file(db)
213
+ return unless db
214
+ ".pgsync-#{db}.yml"
215
+ end
216
+
217
+ def print_description(prefix, source)
218
+ log "#{prefix}: #{source.uri.path.sub(/\A\//, '')} on #{source.uri.host}:#{source.uri.port}"
219
+ end
220
+
221
+ def search_tree(file)
222
+ path = Dir.pwd
223
+ # prevent infinite loop
224
+ 20.times do
225
+ absolute_file = File.join(path, file)
226
+ if File.exist?(absolute_file)
227
+ break absolute_file
228
+ end
229
+ path = File.dirname(path)
230
+ break if path == "/"
231
+ end
232
+ end
233
+
234
+ def config_file
235
+ return @config_file if instance_variable_defined?(:@config_file)
236
+
237
+ @config_file =
238
+ search_tree(
239
+ if @options[:db]
240
+ db_config_file(@options[:db])
241
+ else
242
+ @options[:config] || ".pgsync.yml"
243
+ end
244
+ )
245
+ end
246
+
247
+ def log(message = nil)
248
+ $stderr.puts message
249
+ end
250
+
251
+ def in_parallel(tables, &block)
252
+ if @options[:debug] || @options[:in_batches]
253
+ tables.each(&block)
254
+ else
255
+ options = {}
256
+ options[:in_threads] = 4 if windows?
257
+ Parallel.each(tables, options, &block)
258
+ end
259
+ end
260
+
261
+ def pretty_list(items)
262
+ items.each do |item|
263
+ log item
264
+ end
265
+ end
266
+
267
+ def deprecated(message)
268
+ log "[DEPRECATED] #{message}"
269
+ end
270
+
271
+ def log_completed(start_time)
272
+ time = Time.now - start_time
273
+ log "Completed in #{time.round(1)}s"
274
+ end
275
+
276
+ def windows?
277
+ Gem.win_platform?
278
+ end
279
+ end
280
+ end
@@ -0,0 +1,191 @@
1
+ module PgSync
2
+ class DataSource
3
+ attr_reader :url
4
+
5
+ def initialize(source)
6
+ @url = resolve_url(source)
7
+ end
8
+
9
+ def exists?
10
+ @url && @url.size > 0
11
+ end
12
+
13
+ def local?
14
+ %w(localhost 127.0.0.1).include?(uri.host)
15
+ end
16
+
17
+ def uri
18
+ @uri ||= begin
19
+ uri = URI.parse(@url)
20
+ uri.scheme ||= "postgres"
21
+ uri.host ||= "localhost"
22
+ uri.port ||= 5432
23
+ uri.path = "/#{uri.path}" if uri.path && uri.path[0] != "/"
24
+ uri
25
+ end
26
+ end
27
+
28
+ def schema
29
+ @schema ||= CGI.parse(uri.query.to_s)["schema"][0]
30
+ end
31
+
32
+ def tables
33
+ query = "SELECT schemaname, tablename FROM pg_catalog.pg_tables WHERE schemaname NOT IN ('information_schema', 'pg_catalog') ORDER BY 1, 2"
34
+ execute(query).map { |row| "#{row["schemaname"]}.#{row["tablename"]}" }
35
+ end
36
+
37
+ def table_exists?(table)
38
+ query = "SELECT 1 FROM information_schema.tables WHERE table_schema = $1 AND table_name = $2"
39
+ execute(query, table.split(".", 2)).size > 0
40
+ end
41
+
42
+ def close
43
+ if @conn
44
+ conn.close
45
+ @conn = nil
46
+ end
47
+ end
48
+
49
+ def to_url
50
+ uri = self.uri.dup
51
+ uri.query = nil
52
+ uri.to_s
53
+ end
54
+
55
+ def columns(table)
56
+ query = "SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2"
57
+ execute(query, table.split(".", 2)).map { |row| row["column_name"] }
58
+ end
59
+
60
+ def sequences(table, columns)
61
+ execute("SELECT #{columns.map { |f| "pg_get_serial_sequence(#{escape("#{quote_ident_full(table)}")}, #{escape(f)}) AS #{f}" }.join(", ")}")[0].values.compact
62
+ end
63
+
64
+ def max_id(table, primary_key, sql_clause = nil)
65
+ execute("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}")[0]["max"].to_i
66
+ end
67
+
68
+ def min_id(table, primary_key, sql_clause = nil)
69
+ execute("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}")[0]["min"].to_i
70
+ end
71
+
72
+ def last_value(seq)
73
+ execute("select last_value from #{seq}")[0]["last_value"]
74
+ end
75
+
76
+ def truncate(table)
77
+ execute("TRUNCATE #{quote_ident_full(table)} CASCADE")
78
+ end
79
+
80
+ # http://stackoverflow.com/a/20537829
81
+ def primary_key(table)
82
+ query = <<-SQL
83
+ SELECT
84
+ pg_attribute.attname,
85
+ format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
86
+ FROM
87
+ pg_index, pg_class, pg_attribute, pg_namespace
88
+ WHERE
89
+ pg_class.oid = $2::regclass AND
90
+ indrelid = pg_class.oid AND
91
+ nspname = $1 AND
92
+ pg_class.relnamespace = pg_namespace.oid AND
93
+ pg_attribute.attrelid = pg_class.oid AND
94
+ pg_attribute.attnum = any(pg_index.indkey) AND
95
+ indisprimary
96
+ SQL
97
+ row = execute(query, [table.split(".", 2)[0], quote_ident_full(table)])[0]
98
+ row && row["attname"]
99
+ end
100
+
101
+ # borrowed from
102
+ # ActiveRecord::ConnectionAdapters::ConnectionSpecification::ConnectionUrlResolver
103
+ def conn
104
+ @conn ||= begin
105
+ begin
106
+ uri_parser = URI::Parser.new
107
+ config = {
108
+ host: uri.host,
109
+ port: uri.port,
110
+ dbname: uri.path.sub(/\A\//, ""),
111
+ user: uri.user,
112
+ password: uri.password,
113
+ connect_timeout: 3
114
+ }.reject { |_, value| value.to_s.empty? }
115
+ config.map { |key, value| config[key] = uri_parser.unescape(value) if value.is_a?(String) }
116
+ conn = PG::Connection.new(config)
117
+ rescue PG::ConnectionBad => e
118
+ log
119
+ raise PgSync::Error, e.message
120
+ end
121
+ end
122
+ end
123
+
124
+ def dump_command(tables)
125
+ tables = tables.keys.map { |t| "-t #{Shellwords.escape(quote_ident_full(t))}" }.join(" ")
126
+ dump_command = "pg_dump -Fc --verbose --schema-only --no-owner --no-acl #{tables} #{to_url}"
127
+ end
128
+
129
+ def restore_command
130
+ psql_version = Gem::Version.new(`psql --version`.lines[0].chomp.split(" ")[-1].sub(/beta\d/, ""))
131
+ if_exists = psql_version >= Gem::Version.new("9.4.0")
132
+ restore_command = "pg_restore --verbose --no-owner --no-acl --clean #{if_exists ? "--if-exists" : nil} -d #{to_url}"
133
+ end
134
+
135
+ def fully_resolve_tables(tables)
136
+ no_schema_tables = {}
137
+ search_path_index = Hash[search_path.map.with_index.to_a]
138
+ self.tables.group_by { |t| t.split(".", 2)[-1] }.each do |group, t2|
139
+ no_schema_tables[group] = t2.sort_by { |t| [search_path_index[t.split(".", 2)[0]] || 1000000, t] }[0]
140
+ end
141
+
142
+ Hash[tables.map { |k, v| [no_schema_tables[k] || k, v] }]
143
+ end
144
+
145
+ def search_path
146
+ execute("SELECT current_schemas(true)")[0]["current_schemas"][1..-2].split(",")
147
+ end
148
+
149
+ private
150
+
151
+ def quote_ident_full(ident)
152
+ ident.split(".", 2).map { |v| quote_ident(v) }.join(".")
153
+ end
154
+
155
+ def execute(query, params = [])
156
+ conn.exec_params(query, params).to_a
157
+ end
158
+
159
+ def log(message = nil)
160
+ $stderr.puts message
161
+ end
162
+
163
+ def quote_ident(value)
164
+ PG::Connection.quote_ident(value)
165
+ end
166
+
167
+ def escape(value)
168
+ if value.is_a?(String)
169
+ "'#{quote_string(value)}'"
170
+ else
171
+ value
172
+ end
173
+ end
174
+
175
+ # activerecord
176
+ def quote_string(s)
177
+ s.gsub(/\\/, '\&\&').gsub(/'/, "''")
178
+ end
179
+
180
+ def resolve_url(source)
181
+ if source && source[0..1] == "$(" && source[-1] == ")"
182
+ command = source[2..-2]
183
+ source = `#{command}`.chomp
184
+ unless $?.success?
185
+ raise PgSync::Error, "Command exited with non-zero status:\n#{command}"
186
+ end
187
+ end
188
+ source
189
+ end
190
+ end
191
+ end
@@ -0,0 +1,105 @@
1
+ module PgSync
2
+ class TableList
3
+ attr_reader :args, :opts, :source, :config
4
+
5
+ def initialize(args, options, source, config)
6
+ @args = args
7
+ @opts = options
8
+ @source = source
9
+ @config = config
10
+ end
11
+
12
+ def tables
13
+ tables = nil
14
+
15
+ if opts[:groups]
16
+ tables ||= Hash.new { |hash, key| hash[key] = {} }
17
+ specified_groups = to_arr(opts[:groups])
18
+ specified_groups.map do |tag|
19
+ group, id = tag.split(":", 2)
20
+ if (t = (config["groups"] || {})[group])
21
+ add_tables(tables, t, id, args[1])
22
+ else
23
+ raise PgSync::Error, "Group not found: #{group}"
24
+ end
25
+ end
26
+ end
27
+
28
+ if opts[:tables]
29
+ tables ||= Hash.new { |hash, key| hash[key] = {} }
30
+ to_arr(opts[:tables]).each do |tag|
31
+ table, id = tag.split(":", 2)
32
+ add_table(tables, table, id, args[1])
33
+ end
34
+ end
35
+
36
+ if args[0]
37
+ # could be a group, table, or mix
38
+ tables ||= Hash.new { |hash, key| hash[key] = {} }
39
+ specified_groups = to_arr(args[0])
40
+ specified_groups.map do |tag|
41
+ group, id = tag.split(":", 2)
42
+ if (t = (config["groups"] || {})[group])
43
+ add_tables(tables, t, id, args[1])
44
+ else
45
+ add_table(tables, group, id, args[1])
46
+ end
47
+ end
48
+ end
49
+
50
+ tables ||= begin
51
+ exclude = to_arr(opts[:exclude])
52
+ exclude = source.fully_resolve_tables(exclude).keys if exclude.any?
53
+
54
+ tabs = source.tables
55
+ unless opts[:all_schemas]
56
+ schemas = Set.new(opts[:schemas] ? to_arr(opts[:schemas]) : [source.schema || "public"])
57
+ tabs.select! { |t| schemas.include?(t.split(".", 2)[0]) }
58
+ end
59
+
60
+ Hash[(tabs - exclude).map { |k| [k, {}] }]
61
+ end
62
+
63
+ source.fully_resolve_tables(tables)
64
+ end
65
+
66
+ private
67
+
68
+ def to_arr(value)
69
+ if value.is_a?(Array)
70
+ value
71
+ else
72
+ # Split by commas, but don't use commas inside double quotes
73
+ # http://stackoverflow.com/questions/21105360/regex-find-comma-not-inside-quotes
74
+ value.to_s.split(/(?!\B"[^"]*),(?![^"]*"\B)/)
75
+ end
76
+ end
77
+
78
+ def add_tables(tables, t, id, boom)
79
+ t.each do |table|
80
+ sql = nil
81
+ if table.is_a?(Array)
82
+ table, sql = table
83
+ end
84
+ add_table(tables, table, id, boom || sql)
85
+ end
86
+ end
87
+
88
+ def add_table(tables, table, id, boom, wildcard = false)
89
+ if table.include?("*") && !wildcard
90
+ regex = Regexp.new('\A' + Regexp.escape(table).gsub('\*','[^\.]*') + '\z')
91
+ t2 = source.tables.select { |t| regex.match(t) }
92
+ t2.each do |tab|
93
+ add_table(tables, tab, id, boom, true)
94
+ end
95
+ else
96
+ tables[table] = {}
97
+ tables[table][:sql] = boom.gsub("{id}", cast(id)).gsub("{1}", cast(id)) if boom
98
+ end
99
+ end
100
+
101
+ def cast(value)
102
+ value.to_s.gsub(/\A\"|\"\z/, '')
103
+ end
104
+ end
105
+ end