pgsync 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +6 -0
- data/README.md +7 -1
- data/lib/pgsync.rb +147 -55
- data/lib/pgsync/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 792aeac71e01e8b28c8f33eab3392f2afb8cf1ac
|
4
|
+
data.tar.gz: e47f1aed5ef45cdb3af86d2a52a93c16f332e079
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c6c37c246796179708f08abd3e43d09a3608545cc02ee4eab03cfeb3138b893e60be4ca8fceb6773ac295346fa194d6275aae660517c870be96b6d93cfb3358
|
7
|
+
data.tar.gz: 0fa9979a1a15d6a5e0da7298eed998f9a04a5067ed2be35d0b67d84ef5ffd2fa85b94cdf1d7f52d9ab42185ce476b57cac89b0f5a182eaacfd011a8ad8b12861
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -14,7 +14,7 @@ And in your project directory, run:
|
|
14
14
|
pgsync setup
|
15
15
|
```
|
16
16
|
|
17
|
-
This creates `.pgsync.yml` for you to customize. We recommend checking this into your version control (assuming it doesn’t contain sensitive information).
|
17
|
+
This creates `.pgsync.yml` for you to customize. We recommend checking this into your version control (assuming it doesn’t contain sensitive information). `pgsync` commands can be run from this directory or any subdirectory.
|
18
18
|
|
19
19
|
## How to Use
|
20
20
|
|
@@ -36,6 +36,12 @@ Fetch specific rows (truncates destination table first)
|
|
36
36
|
pgsync products --where "id < 100"
|
37
37
|
```
|
38
38
|
|
39
|
+
To preserve existing rows, use:
|
40
|
+
|
41
|
+
```sh
|
42
|
+
pgsync products --where "id < 100" --preserve
|
43
|
+
```
|
44
|
+
|
39
45
|
### Exclude Tables
|
40
46
|
|
41
47
|
```sh
|
data/lib/pgsync.rb
CHANGED
@@ -7,6 +7,14 @@ require "pg"
|
|
7
7
|
require "parallel"
|
8
8
|
require "multiprocessing"
|
9
9
|
require "fileutils"
|
10
|
+
require "tempfile"
|
11
|
+
|
12
|
+
module URI
|
13
|
+
class POSTGRESQL < Generic
|
14
|
+
DEFAULT_PORT = 5432
|
15
|
+
end
|
16
|
+
@@schemes["POSTGRESQL"] = @@schemes["POSTGRES"] = POSTGRESQL
|
17
|
+
end
|
10
18
|
|
11
19
|
module PgSync
|
12
20
|
class Error < StandardError; end
|
@@ -98,75 +106,127 @@ module PgSync
|
|
98
106
|
end
|
99
107
|
end
|
100
108
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
109
|
+
if opts[:list]
|
110
|
+
if args[0] == "groups"
|
111
|
+
pretty_list (config["groups"] || {}).keys
|
112
|
+
else
|
113
|
+
pretty_list tables
|
114
|
+
end
|
115
|
+
else
|
116
|
+
in_parallel(tables) do |table|
|
117
|
+
time =
|
118
|
+
benchmark do
|
119
|
+
with_connection(from_uri) do |from_connection|
|
120
|
+
with_connection(to_uri) do |to_connection|
|
121
|
+
bad_fields = config["data_rules"]
|
122
|
+
|
123
|
+
from_fields = columns(from_connection, table, "public")
|
124
|
+
to_fields = columns(to_connection, table, "public")
|
125
|
+
shared_fields = to_fields & from_fields
|
126
|
+
extra_fields = to_fields - from_fields
|
127
|
+
missing_fields = from_fields - to_fields
|
128
|
+
|
129
|
+
from_sequences = sequences(from_connection, table, shared_fields)
|
130
|
+
to_sequences = sequences(to_connection, table, shared_fields)
|
131
|
+
shared_sequences = to_sequences & from_sequences
|
132
|
+
extra_sequences = to_sequences - from_sequences
|
133
|
+
missing_sequences = from_sequences - to_sequences
|
134
|
+
|
135
|
+
where = opts[:where]
|
136
|
+
limit = opts[:limit]
|
137
|
+
sql_clause = String.new
|
138
|
+
|
139
|
+
@mutex.synchronize do
|
140
|
+
log "* Syncing #{table}"
|
141
|
+
if where
|
142
|
+
log " #{where}"
|
143
|
+
sql_clause << " WHERE #{opts[:where]}"
|
144
|
+
end
|
145
|
+
if limit
|
146
|
+
log " LIMIT #{limit}"
|
147
|
+
sql_clause << " LIMIT #{limit}"
|
148
|
+
end
|
149
|
+
log " Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
|
150
|
+
log " Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
|
151
|
+
log " Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
|
152
|
+
log " Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
132
153
|
|
133
|
-
|
134
|
-
|
154
|
+
if shared_fields.empty?
|
155
|
+
log " No fields to copy"
|
156
|
+
end
|
135
157
|
end
|
136
|
-
end
|
137
158
|
|
138
|
-
|
139
|
-
|
140
|
-
|
159
|
+
if shared_fields.any?
|
160
|
+
copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, bk| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], f, from_connection)} AS #{escape_identifier(f)}" : escape_identifier(f) }.join(", ")
|
161
|
+
fields = shared_fields.map { |f| escape_identifier(f) }.join(", ")
|
141
162
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
163
|
+
seq_values = {}
|
164
|
+
shared_sequences.each do |seq|
|
165
|
+
seq_values[seq] = from_connection.exec("select last_value from #{seq}").to_a[0]["last_value"]
|
166
|
+
end
|
146
167
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
168
|
+
copy_to_command = "COPY (SELECT #{copy_fields} FROM #{table}#{sql_clause}) TO STDOUT"
|
169
|
+
if opts[:preserve]
|
170
|
+
primary_key = self.primary_key(from_connection, table, "public")
|
171
|
+
abort "No primary key" unless primary_key
|
172
|
+
|
173
|
+
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
174
|
+
file = Tempfile.new(temp_table)
|
175
|
+
begin
|
176
|
+
from_connection.copy_data copy_to_command do
|
177
|
+
while row = from_connection.get_copy_data
|
178
|
+
file.write(row)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
file.rewind
|
182
|
+
|
183
|
+
to_connection.transaction do
|
184
|
+
# create a temp table
|
185
|
+
to_connection.exec("CREATE TABLE #{temp_table} AS SELECT * FROM #{table} WITH NO DATA")
|
186
|
+
|
187
|
+
# load file
|
188
|
+
to_connection.copy_data "COPY #{temp_table} (#{fields}) FROM STDIN" do
|
189
|
+
file.each do |row|
|
190
|
+
to_connection.put_copy_data(row)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
# insert into
|
195
|
+
to_connection.exec("INSERT INTO #{table} (SELECT * FROM #{temp_table} WHERE NOT EXISTS (SELECT 1 FROM #{table} WHERE #{table}.#{primary_key} = #{temp_table}.#{primary_key}))")
|
196
|
+
|
197
|
+
# delete temp table
|
198
|
+
to_connection.exec("DROP TABLE #{temp_table}")
|
199
|
+
end
|
200
|
+
ensure
|
201
|
+
file.close
|
202
|
+
file.unlink
|
203
|
+
end
|
204
|
+
else
|
205
|
+
to_connection.exec("TRUNCATE #{table} CASCADE")
|
206
|
+
to_connection.copy_data "COPY #{table} (#{fields}) FROM STDIN" do
|
207
|
+
from_connection.copy_data copy_to_command do
|
208
|
+
while row = from_connection.get_copy_data
|
209
|
+
to_connection.put_copy_data(row)
|
210
|
+
end
|
211
|
+
end
|
152
212
|
end
|
153
213
|
end
|
154
|
-
|
155
|
-
|
156
|
-
|
214
|
+
seq_values.each do |seq, value|
|
215
|
+
to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
|
216
|
+
end
|
157
217
|
end
|
158
218
|
end
|
159
219
|
end
|
160
220
|
end
|
161
|
-
end
|
162
221
|
|
163
|
-
|
164
|
-
|
222
|
+
@mutex.synchronize do
|
223
|
+
log "* DONE #{table} (#{time.round(1)}s)"
|
224
|
+
end
|
165
225
|
end
|
166
|
-
end
|
167
226
|
|
168
|
-
|
169
|
-
|
227
|
+
time = Time.now - start_time
|
228
|
+
log "Completed in #{time.round(1)}s"
|
229
|
+
end
|
170
230
|
end
|
171
231
|
end
|
172
232
|
true
|
@@ -189,12 +249,15 @@ Options:}
|
|
189
249
|
o.string "--from", "source"
|
190
250
|
o.string "--to", "destination"
|
191
251
|
o.string "--where", "where"
|
252
|
+
o.integer "--limit", "limit"
|
192
253
|
o.string "--exclude", "exclude tables"
|
193
254
|
o.string "--config", "config file"
|
194
255
|
o.string "--db", "database"
|
195
256
|
# TODO much better name for this option
|
196
257
|
o.boolean "--to-safe", "accept danger", default: false
|
197
258
|
o.boolean "--debug", "debug", default: false
|
259
|
+
o.boolean "--list", "list", default: false
|
260
|
+
o.boolean "--preserve", "preserve", default: false
|
198
261
|
o.on "-v", "--version", "print the version" do
|
199
262
|
log PgSync::VERSION
|
200
263
|
@exit = true
|
@@ -294,6 +357,27 @@ Options:}
|
|
294
357
|
conn.exec_params(query, [schema, table]).to_a.size > 0
|
295
358
|
end
|
296
359
|
|
360
|
+
# http://stackoverflow.com/a/20537829
|
361
|
+
def primary_key(conn, table, schema)
|
362
|
+
query = <<-SQL
|
363
|
+
SELECT
|
364
|
+
pg_attribute.attname,
|
365
|
+
format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
|
366
|
+
FROM
|
367
|
+
pg_index, pg_class, pg_attribute, pg_namespace
|
368
|
+
WHERE
|
369
|
+
pg_class.oid = $2::regclass AND
|
370
|
+
indrelid = pg_class.oid AND
|
371
|
+
nspname = $1 AND
|
372
|
+
pg_class.relnamespace = pg_namespace.oid AND
|
373
|
+
pg_attribute.attrelid = pg_class.oid AND
|
374
|
+
pg_attribute.attnum = any(pg_index.indkey) AND
|
375
|
+
indisprimary
|
376
|
+
SQL
|
377
|
+
row = conn.exec_params(query, [schema, table]).to_a[0]
|
378
|
+
row && row["attname"]
|
379
|
+
end
|
380
|
+
|
297
381
|
# TODO better performance
|
298
382
|
def rule_match?(table, column, rule)
|
299
383
|
regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
|
@@ -359,8 +443,10 @@ Options:}
|
|
359
443
|
|
360
444
|
def parse_uri(url)
|
361
445
|
uri = URI.parse(url)
|
446
|
+
uri.scheme ||= "postgres"
|
362
447
|
uri.host ||= "localhost"
|
363
448
|
uri.port ||= 5432
|
449
|
+
uri.path = "/#{uri.path}" if uri.path && uri.path[0] != "/"
|
364
450
|
uri
|
365
451
|
end
|
366
452
|
|
@@ -419,5 +505,11 @@ Options:}
|
|
419
505
|
Parallel.each(tables, &block)
|
420
506
|
end
|
421
507
|
end
|
508
|
+
|
509
|
+
def pretty_list(items)
|
510
|
+
items.each do |item|
|
511
|
+
log item
|
512
|
+
end
|
513
|
+
end
|
422
514
|
end
|
423
515
|
end
|
data/lib/pgsync/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgsync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: slop
|