pgsync 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.travis.yml +11 -0
- data/CHANGELOG.md +5 -0
- data/README.md +4 -2
- data/Rakefile +2 -1
- data/lib/pgsync.rb +34 -27
- data/lib/pgsync/version.rb +1 -1
- data/pgsync.gemspec +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 68da22be015e5ff66ecc722c7e2cf19a91b77b3c
|
4
|
+
data.tar.gz: 76498a3252a89668dc9b1e9f65dd152dedc4a727
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 71f0e8b384f7a07706e6ad796c407f212cd0eb1dfe04bebffb278e0208e26e755a3352ef1ffd8d7681d868818df30312297d9fd60a5373048524236cdf2d6238
|
7
|
+
data.tar.gz: cdae25c85d546e866cd1a8558ab1c76bd548ed0a8d41ff049644e2cf29661e5f565a803f69a848217f482163aa4c3e49bc1680098f45fcbdb125ae443be50040
|
data/.travis.yml
ADDED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# pgsync
|
2
2
|
|
3
|
-
Sync Postgres data
|
3
|
+
Sync Postgres data between databases. Designed for:
|
4
4
|
|
5
5
|
- **speed** - up to 4x faster than traditional tools on a 4-core machine
|
6
6
|
- **security** - built-in methods to prevent sensitive data from ever leaving the server
|
@@ -8,6 +8,8 @@ Sync Postgres data to your local machine. Designed for:
|
|
8
8
|
|
9
9
|
:tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
|
10
10
|
|
11
|
+
[![Build Status](https://travis-ci.org/ankane/pgsync.svg?branch=master)](https://travis-ci.org/ankane/pgsync)
|
12
|
+
|
11
13
|
## Installation
|
12
14
|
|
13
15
|
pgsync is a command line tool. To install, run:
|
@@ -109,7 +111,7 @@ groups:
|
|
109
111
|
products: "where id = {1}"
|
110
112
|
reviews: "where product_id = {1}"
|
111
113
|
coupons: "where product_id = {1} order by created_at desc limit 10"
|
112
|
-
stores: "where id in (select store_id from products where id = {1})
|
114
|
+
stores: "where id in (select store_id from products where id = {1})"
|
113
115
|
```
|
114
116
|
|
115
117
|
And run:
|
data/Rakefile
CHANGED
data/lib/pgsync.rb
CHANGED
@@ -9,6 +9,7 @@ require "multiprocessing"
|
|
9
9
|
require "fileutils"
|
10
10
|
require "tempfile"
|
11
11
|
require "cgi"
|
12
|
+
require "shellwords"
|
12
13
|
|
13
14
|
module URI
|
14
15
|
class POSTGRESQL < Generic
|
@@ -97,8 +98,8 @@ module PgSync
|
|
97
98
|
|
98
99
|
if opts[:schema_only]
|
99
100
|
log "* Dumping schema"
|
100
|
-
tables = tables.keys.map { |t| "-t #{t}" }.join(" ")
|
101
|
-
psql_version = Gem::Version.new(`psql --version`.split(" ").
|
101
|
+
tables = tables.keys.map { |t| "-t #{Shellwords.escape(quote_ident(t))}" }.join(" ")
|
102
|
+
psql_version = Gem::Version.new(`psql --version`.lines[0].chomp.split(" ")[-1].sub(/beta\d/, ""))
|
102
103
|
if_exists = psql_version >= Gem::Version.new("9.4.0")
|
103
104
|
dump_command = "pg_dump -Fc --verbose --schema-only --no-owner --no-acl #{tables} #{to_url(source_uri)}"
|
104
105
|
restore_command = "pg_restore --verbose --no-owner --no-acl --clean #{if_exists ? "--if-exists" : nil} -d #{to_url(destination_uri)}"
|
@@ -172,19 +173,23 @@ module PgSync
|
|
172
173
|
end
|
173
174
|
|
174
175
|
if shared_fields.any?
|
175
|
-
copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, bk| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, from_connection)} AS #{
|
176
|
-
fields = shared_fields.map { |f|
|
176
|
+
copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, bk| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, from_connection)} AS #{quote_ident(f)}" : "#{quote_ident(table)}.#{quote_ident(f)}" }.join(", ")
|
177
|
+
fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
|
177
178
|
|
178
179
|
seq_values = {}
|
179
180
|
shared_sequences.each do |seq|
|
180
181
|
seq_values[seq] = from_connection.exec("select last_value from #{seq}").to_a[0]["last_value"]
|
181
182
|
end
|
182
183
|
|
183
|
-
copy_to_command = "COPY (SELECT #{copy_fields} FROM #{table}#{sql_clause}) TO STDOUT"
|
184
|
+
copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident(table)}#{sql_clause}) TO STDOUT"
|
184
185
|
if opts[:in_batches]
|
186
|
+
abort "Cannot use --overwrite with --in-batches" if opts[:overwrite]
|
187
|
+
|
185
188
|
primary_key = self.primary_key(from_connection, table, from_schema)
|
186
189
|
abort "No primary key" unless primary_key
|
187
190
|
|
191
|
+
to_connection.exec("TRUNCATE #{quote_ident(table)} CASCADE") if opts[:truncate]
|
192
|
+
|
188
193
|
from_max_id = max_id(from_connection, table, primary_key, sql_clause)
|
189
194
|
to_max_id = max_id(to_connection, table, primary_key, sql_clause) + 1
|
190
195
|
|
@@ -200,14 +205,14 @@ module PgSync
|
|
200
205
|
batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
|
201
206
|
|
202
207
|
while starting_id <= from_max_id
|
203
|
-
where = "#{primary_key} >= #{starting_id} AND #{primary_key} < #{starting_id + batch_size}"
|
208
|
+
where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
|
204
209
|
log " #{i}/#{batch_count}: #{where}"
|
205
210
|
|
206
211
|
# TODO be smarter for advance sql clauses
|
207
212
|
batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
|
208
213
|
|
209
|
-
batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{table}#{batch_sql_clause}) TO STDOUT"
|
210
|
-
to_connection.copy_data "COPY #{table} (#{fields}) FROM STDIN" do
|
214
|
+
batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident(table)}#{batch_sql_clause}) TO STDOUT"
|
215
|
+
to_connection.copy_data "COPY #{quote_ident(table)} (#{fields}) FROM STDIN" do
|
211
216
|
from_connection.copy_data batch_copy_to_command do
|
212
217
|
while row = from_connection.get_copy_data
|
213
218
|
to_connection.put_copy_data(row)
|
@@ -238,10 +243,10 @@ module PgSync
|
|
238
243
|
|
239
244
|
to_connection.transaction do
|
240
245
|
# create a temp table
|
241
|
-
to_connection.exec("CREATE TABLE #{temp_table} AS SELECT * FROM #{table} WITH NO DATA")
|
246
|
+
to_connection.exec("CREATE TABLE #{quote_ident(temp_table)} AS SELECT * FROM #{quote_ident(table)} WITH NO DATA")
|
242
247
|
|
243
248
|
# load file
|
244
|
-
to_connection.copy_data "COPY #{temp_table} (#{fields}) FROM STDIN" do
|
249
|
+
to_connection.copy_data "COPY #{quote_ident(temp_table)} (#{fields}) FROM STDIN" do
|
245
250
|
file.each do |row|
|
246
251
|
to_connection.put_copy_data(row)
|
247
252
|
end
|
@@ -249,22 +254,22 @@ module PgSync
|
|
249
254
|
|
250
255
|
if opts[:preserve]
|
251
256
|
# insert into
|
252
|
-
to_connection.exec("INSERT INTO #{table} (SELECT * FROM #{temp_table} WHERE NOT EXISTS (SELECT 1 FROM #{table} WHERE #{table}.#{primary_key} = #{temp_table}.#{primary_key}))")
|
257
|
+
to_connection.exec("INSERT INTO #{quote_ident(table)} (SELECT * FROM #{quote_ident(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident(table)} WHERE #{quote_ident(table)}.#{primary_key} = #{quote_ident(temp_table)}.#{quote_ident(primary_key)}))")
|
253
258
|
else
|
254
|
-
to_connection.exec("DELETE FROM #{table} WHERE #{primary_key} IN (SELECT #{primary_key} FROM #{temp_table})")
|
255
|
-
to_connection.exec("INSERT INTO #{table} (SELECT * FROM #{temp_table})")
|
259
|
+
to_connection.exec("DELETE FROM #{quote_ident(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident(temp_table)})")
|
260
|
+
to_connection.exec("INSERT INTO #{quote_ident(table)} (SELECT * FROM #{quote_ident(temp_table)})")
|
256
261
|
end
|
257
262
|
|
258
263
|
# delete temp table
|
259
|
-
to_connection.exec("DROP TABLE #{temp_table}")
|
264
|
+
to_connection.exec("DROP TABLE #{quote_ident(temp_table)}")
|
260
265
|
end
|
261
266
|
ensure
|
262
267
|
file.close
|
263
268
|
file.unlink
|
264
269
|
end
|
265
270
|
else
|
266
|
-
to_connection.exec("TRUNCATE #{table} CASCADE")
|
267
|
-
to_connection.copy_data "COPY #{table} (#{fields}) FROM STDIN" do
|
271
|
+
to_connection.exec("TRUNCATE #{quote_ident(table)} CASCADE")
|
272
|
+
to_connection.copy_data "COPY #{quote_ident(table)} (#{fields}) FROM STDIN" do
|
268
273
|
from_connection.copy_data copy_to_command do
|
269
274
|
while row = from_connection.get_copy_data
|
270
275
|
to_connection.put_copy_data(row)
|
@@ -452,7 +457,7 @@ Options:}
|
|
452
457
|
else
|
453
458
|
strategies = {
|
454
459
|
"unique_email" => "'email' || #{table}.id || '@example.org'",
|
455
|
-
"untouched" =>
|
460
|
+
"untouched" => quote_ident(column),
|
456
461
|
"unique_phone" => "(#{table}.id + 1000000000)::text",
|
457
462
|
"random_int" => "(RAND() * 10)::int",
|
458
463
|
"random_date" => "'1970-01-01'",
|
@@ -471,7 +476,7 @@ Options:}
|
|
471
476
|
end
|
472
477
|
end
|
473
478
|
|
474
|
-
def
|
479
|
+
def quote_ident(value)
|
475
480
|
PG::Connection.quote_ident(value)
|
476
481
|
end
|
477
482
|
|
@@ -492,7 +497,9 @@ Options:}
|
|
492
497
|
if value.is_a?(Array)
|
493
498
|
value
|
494
499
|
else
|
495
|
-
|
500
|
+
# Split by commas, but don't use commas inside double quotes
|
501
|
+
# http://stackoverflow.com/questions/21105360/regex-find-comma-not-inside-quotes
|
502
|
+
value.to_s.split(/(?!\B"[^"]*),(?![^"]*"\B)/)
|
496
503
|
end
|
497
504
|
end
|
498
505
|
|
@@ -502,8 +509,8 @@ Options:}
|
|
502
509
|
uri.host ||= "localhost"
|
503
510
|
uri.port ||= 5432
|
504
511
|
uri.path = "/#{uri.path}" if uri.path && uri.path[0] != "/"
|
505
|
-
schema =
|
506
|
-
[uri, schema
|
512
|
+
schema = CGI.parse(uri.query.to_s)["schema"][0] || "public"
|
513
|
+
[uri, schema]
|
507
514
|
end
|
508
515
|
|
509
516
|
def print_uri(prefix, uri)
|
@@ -551,7 +558,7 @@ Options:}
|
|
551
558
|
end
|
552
559
|
|
553
560
|
def sequences(conn, table, columns)
|
554
|
-
conn.exec("SELECT #{columns.map { |f| "pg_get_serial_sequence(#{escape(table)}, #{escape(f)}) AS #{f}" }.join(", ")}").to_a[0].values.compact
|
561
|
+
conn.exec("SELECT #{columns.map { |f| "pg_get_serial_sequence(#{escape(quote_ident(table))}, #{escape(f)}) AS #{f}" }.join(", ")}").to_a[0].values.compact
|
555
562
|
end
|
556
563
|
|
557
564
|
def in_parallel(tables, &block)
|
@@ -582,8 +589,8 @@ Options:}
|
|
582
589
|
if table.include?("*") && !wildcard
|
583
590
|
regex = Regexp.new('\A' + Regexp.escape(table).gsub('\*','[^\.]*') + '\z')
|
584
591
|
t2 = with_connection(from_uri) { |conn| self.tables(conn, from_schema) }.select { |t| regex.match(t) }
|
585
|
-
t2.each do |
|
586
|
-
add_table(tables,
|
592
|
+
t2.each do |tab|
|
593
|
+
add_table(tables, tab, id, boom, from_uri, from_schema, true)
|
587
594
|
end
|
588
595
|
else
|
589
596
|
tables[table] = {}
|
@@ -643,15 +650,15 @@ Options:}
|
|
643
650
|
end
|
644
651
|
|
645
652
|
def max_id(conn, table, primary_key, sql_clause = nil)
|
646
|
-
conn.exec("SELECT MAX(#{
|
653
|
+
conn.exec("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident(table)}#{sql_clause}").to_a[0]["max"].to_i
|
647
654
|
end
|
648
655
|
|
649
656
|
def min_id(conn, table, primary_key, sql_clause = nil)
|
650
|
-
conn.exec("SELECT MIN(#{
|
657
|
+
conn.exec("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident(table)}#{sql_clause}").to_a[0]["min"].to_i
|
651
658
|
end
|
652
659
|
|
653
660
|
def cast(value)
|
654
|
-
value.to_s
|
661
|
+
value.to_s.gsub(/\A\"|\"\z/, '')
|
655
662
|
end
|
656
663
|
|
657
664
|
def deprecated(message)
|
data/lib/pgsync/version.rb
CHANGED
data/pgsync.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Andrew Kane"]
|
10
10
|
spec.email = ["andrew@chartkick.com"]
|
11
11
|
|
12
|
-
spec.summary = "
|
12
|
+
spec.summary = "Sync Postgres data between databases"
|
13
13
|
spec.homepage = "https://github.com/ankane/pgsync"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgsync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: slop
|
@@ -117,6 +117,7 @@ extensions: []
|
|
117
117
|
extra_rdoc_files: []
|
118
118
|
files:
|
119
119
|
- ".gitignore"
|
120
|
+
- ".travis.yml"
|
120
121
|
- CHANGELOG.md
|
121
122
|
- Gemfile
|
122
123
|
- LICENSE.txt
|
@@ -147,8 +148,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
147
148
|
version: '0'
|
148
149
|
requirements: []
|
149
150
|
rubyforge_project:
|
150
|
-
rubygems_version: 2.
|
151
|
+
rubygems_version: 2.6.11
|
151
152
|
signing_key:
|
152
153
|
specification_version: 4
|
153
|
-
summary:
|
154
|
+
summary: Sync Postgres data between databases
|
154
155
|
test_files: []
|