pgsync 0.3.8 → 0.3.9
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +13 -4
- data/lib/pgsync/client.rb +280 -0
- data/lib/pgsync/data_source.rb +191 -0
- data/lib/pgsync/table_list.rb +105 -0
- data/lib/pgsync/table_sync.rb +239 -0
- data/lib/pgsync/version.rb +1 -1
- data/lib/pgsync.rb +7 -659
- metadata +6 -2
data/lib/pgsync.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require "pgsync/version"
|
2
1
|
require "yaml"
|
3
2
|
require "slop"
|
4
3
|
require "uri"
|
@@ -10,8 +9,15 @@ require "fileutils"
|
|
10
9
|
require "tempfile"
|
11
10
|
require "cgi"
|
12
11
|
require "shellwords"
|
12
|
+
require "set"
|
13
13
|
require "thread" # windows only
|
14
14
|
|
15
|
+
require "pgsync/client"
|
16
|
+
require "pgsync/data_source"
|
17
|
+
require "pgsync/table_list"
|
18
|
+
require "pgsync/table_sync"
|
19
|
+
require "pgsync/version"
|
20
|
+
|
15
21
|
module URI
|
16
22
|
class POSTGRESQL < Generic
|
17
23
|
DEFAULT_PORT = 5432
|
@@ -21,662 +27,4 @@ end
|
|
21
27
|
|
22
28
|
module PgSync
|
23
29
|
class Error < StandardError; end
|
24
|
-
|
25
|
-
class Client
|
26
|
-
def initialize(args)
|
27
|
-
$stdout.sync = true
|
28
|
-
@exit = false
|
29
|
-
@arguments, @options = parse_args(args)
|
30
|
-
@mutex = windows? ? Mutex.new : MultiProcessing::Mutex.new
|
31
|
-
end
|
32
|
-
|
33
|
-
# TODO clean up this mess
|
34
|
-
def perform
|
35
|
-
return if @exit
|
36
|
-
|
37
|
-
start_time = Time.now
|
38
|
-
|
39
|
-
args, opts = @arguments, @options
|
40
|
-
[:to, :from, :to_safe, :exclude].each do |opt|
|
41
|
-
opts[opt] ||= config[opt.to_s]
|
42
|
-
end
|
43
|
-
command = args[0]
|
44
|
-
|
45
|
-
case command
|
46
|
-
when "setup"
|
47
|
-
args.shift
|
48
|
-
opts[:setup] = true
|
49
|
-
deprecated "Use `psync --setup` instead"
|
50
|
-
when "schema"
|
51
|
-
args.shift
|
52
|
-
opts[:schema_only] = true
|
53
|
-
deprecated "Use `psync --schema-only` instead"
|
54
|
-
when "tables"
|
55
|
-
args.shift
|
56
|
-
opts[:tables] = args.shift
|
57
|
-
deprecated "Use `pgsync #{opts[:tables]}` instead"
|
58
|
-
when "groups"
|
59
|
-
args.shift
|
60
|
-
opts[:groups] = args.shift
|
61
|
-
deprecated "Use `pgsync #{opts[:groups]}` instead"
|
62
|
-
end
|
63
|
-
|
64
|
-
if opts[:where]
|
65
|
-
opts[:sql] ||= String.new
|
66
|
-
opts[:sql] << " WHERE #{opts[:where]}"
|
67
|
-
deprecated "Use `\"WHERE #{opts[:where]}\"` instead"
|
68
|
-
end
|
69
|
-
|
70
|
-
if opts[:limit]
|
71
|
-
opts[:sql] ||= String.new
|
72
|
-
opts[:sql] << " LIMIT #{opts[:limit]}"
|
73
|
-
deprecated "Use `\"LIMIT #{opts[:limit]}\"` instead"
|
74
|
-
end
|
75
|
-
|
76
|
-
if opts[:setup]
|
77
|
-
setup(db_config_file(args[0]) || config_file || ".pgsync.yml")
|
78
|
-
else
|
79
|
-
if args.size > 2
|
80
|
-
abort "Usage:\n pgsync [options]"
|
81
|
-
end
|
82
|
-
|
83
|
-
source = parse_source(opts[:from])
|
84
|
-
abort "No source" unless source
|
85
|
-
source_uri, from_schema = parse_uri(source)
|
86
|
-
|
87
|
-
destination = parse_source(opts[:to])
|
88
|
-
abort "No destination" unless destination
|
89
|
-
destination_uri, to_schema = parse_uri(destination)
|
90
|
-
abort "Danger! Add `to_safe: true` to `.pgsync.yml` if the destination is not localhost or 127.0.0.1" unless %(localhost 127.0.0.1).include?(destination_uri.host) || opts[:to_safe]
|
91
|
-
|
92
|
-
print_uri("From", source_uri)
|
93
|
-
print_uri("To", destination_uri)
|
94
|
-
|
95
|
-
from_uri = source_uri
|
96
|
-
to_uri = destination_uri
|
97
|
-
|
98
|
-
tables = table_list(args, opts, from_uri, from_schema)
|
99
|
-
|
100
|
-
if opts[:schema_only]
|
101
|
-
log "* Dumping schema"
|
102
|
-
tables = tables.keys.map { |t| "-t #{Shellwords.escape(quote_ident(t))}" }.join(" ")
|
103
|
-
psql_version = Gem::Version.new(`psql --version`.lines[0].chomp.split(" ")[-1].sub(/beta\d/, ""))
|
104
|
-
if_exists = psql_version >= Gem::Version.new("9.4.0")
|
105
|
-
dump_command = "pg_dump -Fc --verbose --schema-only --no-owner --no-acl #{tables} #{to_url(source_uri)}"
|
106
|
-
restore_command = "pg_restore --verbose --no-owner --no-acl --clean #{if_exists ? "--if-exists" : nil} -d #{to_url(destination_uri)}"
|
107
|
-
system("#{dump_command} | #{restore_command}")
|
108
|
-
|
109
|
-
log_completed(start_time)
|
110
|
-
else
|
111
|
-
with_connection(to_uri, timeout: 3) do |conn|
|
112
|
-
tables.keys.each do |table|
|
113
|
-
unless table_exists?(conn, table, to_schema)
|
114
|
-
abort "Table does not exist in destination: #{table}"
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
if opts[:list]
|
120
|
-
if args[0] == "groups"
|
121
|
-
pretty_list (config["groups"] || {}).keys
|
122
|
-
else
|
123
|
-
pretty_list tables.keys
|
124
|
-
end
|
125
|
-
else
|
126
|
-
in_parallel(tables) do |table, table_opts|
|
127
|
-
sync_table(table, opts.merge(table_opts), from_uri, to_uri, from_schema, to_schema)
|
128
|
-
end
|
129
|
-
|
130
|
-
log_completed(start_time)
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
true
|
135
|
-
end
|
136
|
-
|
137
|
-
protected
|
138
|
-
|
139
|
-
def sync_table(table, opts, from_uri, to_uri, from_schema, to_schema)
|
140
|
-
time =
|
141
|
-
benchmark do
|
142
|
-
with_connection(from_uri) do |from_connection|
|
143
|
-
with_connection(to_uri) do |to_connection|
|
144
|
-
bad_fields = opts[:no_rules] ? [] : config["data_rules"]
|
145
|
-
|
146
|
-
from_fields = columns(from_connection, table, from_schema)
|
147
|
-
to_fields = columns(to_connection, table, to_schema)
|
148
|
-
shared_fields = to_fields & from_fields
|
149
|
-
extra_fields = to_fields - from_fields
|
150
|
-
missing_fields = from_fields - to_fields
|
151
|
-
|
152
|
-
from_sequences = sequences(from_connection, table, shared_fields)
|
153
|
-
to_sequences = sequences(to_connection, table, shared_fields)
|
154
|
-
shared_sequences = to_sequences & from_sequences
|
155
|
-
extra_sequences = to_sequences - from_sequences
|
156
|
-
missing_sequences = from_sequences - to_sequences
|
157
|
-
|
158
|
-
sql_clause = String.new
|
159
|
-
|
160
|
-
@mutex.synchronize do
|
161
|
-
log "* Syncing #{table}"
|
162
|
-
if opts[:sql]
|
163
|
-
log " #{opts[:sql]}"
|
164
|
-
sql_clause << " #{opts[:sql]}"
|
165
|
-
end
|
166
|
-
log " Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
|
167
|
-
log " Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
|
168
|
-
log " Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
|
169
|
-
log " Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
170
|
-
|
171
|
-
if shared_fields.empty?
|
172
|
-
log " No fields to copy"
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
|
-
if shared_fields.any?
|
177
|
-
copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, bk| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, from_connection)} AS #{quote_ident(f)}" : "#{quote_ident(table)}.#{quote_ident(f)}" }.join(", ")
|
178
|
-
fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
|
179
|
-
|
180
|
-
seq_values = {}
|
181
|
-
shared_sequences.each do |seq|
|
182
|
-
seq_values[seq] = from_connection.exec("select last_value from #{seq}").to_a[0]["last_value"]
|
183
|
-
end
|
184
|
-
|
185
|
-
copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident(table)}#{sql_clause}) TO STDOUT"
|
186
|
-
if opts[:in_batches]
|
187
|
-
abort "Cannot use --overwrite with --in-batches" if opts[:overwrite]
|
188
|
-
|
189
|
-
primary_key = self.primary_key(from_connection, table, from_schema)
|
190
|
-
abort "No primary key" unless primary_key
|
191
|
-
|
192
|
-
to_connection.exec("TRUNCATE #{quote_ident(table)} CASCADE") if opts[:truncate]
|
193
|
-
|
194
|
-
from_max_id = max_id(from_connection, table, primary_key)
|
195
|
-
to_max_id = max_id(to_connection, table, primary_key) + 1
|
196
|
-
|
197
|
-
if to_max_id == 1
|
198
|
-
from_min_id = min_id(from_connection, table, primary_key)
|
199
|
-
to_max_id = from_min_id if from_min_id > 0
|
200
|
-
end
|
201
|
-
|
202
|
-
starting_id = to_max_id
|
203
|
-
batch_size = opts[:batch_size]
|
204
|
-
|
205
|
-
i = 1
|
206
|
-
batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
|
207
|
-
|
208
|
-
while starting_id <= from_max_id
|
209
|
-
where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
|
210
|
-
log " #{i}/#{batch_count}: #{where}"
|
211
|
-
|
212
|
-
# TODO be smarter for advance sql clauses
|
213
|
-
batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
|
214
|
-
|
215
|
-
batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident(table)}#{batch_sql_clause}) TO STDOUT"
|
216
|
-
to_connection.copy_data "COPY #{quote_ident(table)} (#{fields}) FROM STDIN" do
|
217
|
-
from_connection.copy_data batch_copy_to_command do
|
218
|
-
while row = from_connection.get_copy_data
|
219
|
-
to_connection.put_copy_data(row)
|
220
|
-
end
|
221
|
-
end
|
222
|
-
end
|
223
|
-
|
224
|
-
starting_id += batch_size
|
225
|
-
i += 1
|
226
|
-
|
227
|
-
if opts[:sleep] && starting_id <= from_max_id
|
228
|
-
sleep(opts[:sleep])
|
229
|
-
end
|
230
|
-
end
|
231
|
-
elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
|
232
|
-
primary_key = self.primary_key(to_connection, table, to_schema)
|
233
|
-
abort "No primary key" unless primary_key
|
234
|
-
|
235
|
-
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
236
|
-
file = Tempfile.new(temp_table)
|
237
|
-
begin
|
238
|
-
from_connection.copy_data copy_to_command do
|
239
|
-
while row = from_connection.get_copy_data
|
240
|
-
file.write(row)
|
241
|
-
end
|
242
|
-
end
|
243
|
-
file.rewind
|
244
|
-
|
245
|
-
to_connection.transaction do
|
246
|
-
# create a temp table
|
247
|
-
to_connection.exec("CREATE TABLE #{quote_ident(temp_table)} AS SELECT * FROM #{quote_ident(table)} WITH NO DATA")
|
248
|
-
|
249
|
-
# load file
|
250
|
-
to_connection.copy_data "COPY #{quote_ident(temp_table)} (#{fields}) FROM STDIN" do
|
251
|
-
file.each do |row|
|
252
|
-
to_connection.put_copy_data(row)
|
253
|
-
end
|
254
|
-
end
|
255
|
-
|
256
|
-
if opts[:preserve]
|
257
|
-
# insert into
|
258
|
-
to_connection.exec("INSERT INTO #{quote_ident(table)} (SELECT * FROM #{quote_ident(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident(table)} WHERE #{quote_ident(table)}.#{primary_key} = #{quote_ident(temp_table)}.#{quote_ident(primary_key)}))")
|
259
|
-
else
|
260
|
-
to_connection.exec("DELETE FROM #{quote_ident(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident(temp_table)})")
|
261
|
-
to_connection.exec("INSERT INTO #{quote_ident(table)} (SELECT * FROM #{quote_ident(temp_table)})")
|
262
|
-
end
|
263
|
-
|
264
|
-
# delete temp table
|
265
|
-
to_connection.exec("DROP TABLE #{quote_ident(temp_table)}")
|
266
|
-
end
|
267
|
-
ensure
|
268
|
-
file.close
|
269
|
-
file.unlink
|
270
|
-
end
|
271
|
-
else
|
272
|
-
to_connection.exec("TRUNCATE #{quote_ident(table)} CASCADE")
|
273
|
-
to_connection.copy_data "COPY #{quote_ident(table)} (#{fields}) FROM STDIN" do
|
274
|
-
from_connection.copy_data copy_to_command do
|
275
|
-
while row = from_connection.get_copy_data
|
276
|
-
to_connection.put_copy_data(row)
|
277
|
-
end
|
278
|
-
end
|
279
|
-
end
|
280
|
-
end
|
281
|
-
seq_values.each do |seq, value|
|
282
|
-
to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
|
283
|
-
end
|
284
|
-
end
|
285
|
-
end
|
286
|
-
end
|
287
|
-
end
|
288
|
-
|
289
|
-
@mutex.synchronize do
|
290
|
-
log "* DONE #{table} (#{time.round(1)}s)"
|
291
|
-
end
|
292
|
-
end
|
293
|
-
|
294
|
-
def parse_args(args)
|
295
|
-
opts = Slop.parse(args) do |o|
|
296
|
-
o.banner = %{Usage:
|
297
|
-
pgsync [options]
|
298
|
-
|
299
|
-
Options:}
|
300
|
-
o.string "-t", "--tables", "tables"
|
301
|
-
o.string "-g", "--groups", "groups"
|
302
|
-
o.string "-d", "--db", "database"
|
303
|
-
o.string "--from", "source"
|
304
|
-
o.string "--to", "destination"
|
305
|
-
o.string "--where", "where", help: false
|
306
|
-
o.integer "--limit", "limit", help: false
|
307
|
-
o.string "--exclude", "exclude tables"
|
308
|
-
o.string "--config", "config file"
|
309
|
-
# TODO much better name for this option
|
310
|
-
o.boolean "--to-safe", "accept danger", default: false
|
311
|
-
o.boolean "--debug", "debug", default: false
|
312
|
-
o.boolean "--list", "list", default: false
|
313
|
-
o.boolean "--overwrite", "overwrite existing rows", default: false, help: false
|
314
|
-
o.boolean "--preserve", "preserve existing rows", default: false
|
315
|
-
o.boolean "--truncate", "truncate existing rows", default: false
|
316
|
-
o.boolean "--schema-only", "schema only", default: false
|
317
|
-
o.boolean "--no-rules", "do not apply data rules", default: false
|
318
|
-
o.boolean "--setup", "setup", default: false
|
319
|
-
o.boolean "--in-batches", "in batches", default: false, help: false
|
320
|
-
o.integer "--batch-size", "batch size", default: 10000, help: false
|
321
|
-
o.float "--sleep", "sleep", default: 0, help: false
|
322
|
-
o.on "-v", "--version", "print the version" do
|
323
|
-
log PgSync::VERSION
|
324
|
-
@exit = true
|
325
|
-
end
|
326
|
-
o.on "-h", "--help", "prints help" do
|
327
|
-
log o
|
328
|
-
@exit = true
|
329
|
-
end
|
330
|
-
end
|
331
|
-
[opts.arguments, opts.to_hash]
|
332
|
-
rescue Slop::Error => e
|
333
|
-
abort e.message
|
334
|
-
end
|
335
|
-
|
336
|
-
def config
|
337
|
-
@config ||= begin
|
338
|
-
if config_file
|
339
|
-
begin
|
340
|
-
YAML.load_file(config_file) || {}
|
341
|
-
rescue Psych::SyntaxError => e
|
342
|
-
raise PgSync::Error, e.message
|
343
|
-
end
|
344
|
-
else
|
345
|
-
{}
|
346
|
-
end
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
def parse_source(source)
|
351
|
-
if source && source[0..1] == "$(" && source[-1] == ")"
|
352
|
-
command = source[2..-2]
|
353
|
-
# log "Running #{command}"
|
354
|
-
source = `#{command}`.chomp
|
355
|
-
unless $?.success?
|
356
|
-
abort "Command exited with non-zero status:\n#{command}"
|
357
|
-
end
|
358
|
-
end
|
359
|
-
source
|
360
|
-
end
|
361
|
-
|
362
|
-
def setup(config_file)
|
363
|
-
if File.exist?(config_file)
|
364
|
-
abort "#{config_file} exists."
|
365
|
-
else
|
366
|
-
FileUtils.cp(File.dirname(__FILE__) + "/../config.yml", config_file)
|
367
|
-
log "#{config_file} created. Add your database credentials."
|
368
|
-
end
|
369
|
-
end
|
370
|
-
|
371
|
-
def db_config_file(db)
|
372
|
-
return unless db
|
373
|
-
".pgsync-#{db}.yml"
|
374
|
-
end
|
375
|
-
|
376
|
-
# borrowed from
|
377
|
-
# ActiveRecord::ConnectionAdapters::ConnectionSpecification::ConnectionUrlResolver
|
378
|
-
def with_connection(uri, timeout: 0)
|
379
|
-
uri_parser = URI::Parser.new
|
380
|
-
config = {
|
381
|
-
host: uri.host,
|
382
|
-
port: uri.port,
|
383
|
-
dbname: uri.path.sub(/\A\//, ""),
|
384
|
-
user: uri.user,
|
385
|
-
password: uri.password,
|
386
|
-
connect_timeout: timeout
|
387
|
-
}.reject { |_, value| value.to_s.empty? }
|
388
|
-
config.map { |key, value| config[key] = uri_parser.unescape(value) if value.is_a?(String) }
|
389
|
-
conn = PG::Connection.new(config)
|
390
|
-
begin
|
391
|
-
yield conn
|
392
|
-
ensure
|
393
|
-
conn.close
|
394
|
-
end
|
395
|
-
rescue PG::ConnectionBad => e
|
396
|
-
log
|
397
|
-
abort e.message
|
398
|
-
end
|
399
|
-
|
400
|
-
def benchmark
|
401
|
-
start_time = Time.now
|
402
|
-
yield
|
403
|
-
Time.now - start_time
|
404
|
-
end
|
405
|
-
|
406
|
-
def tables(conn, schema)
|
407
|
-
query = "SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname = $1 ORDER BY tablename ASC"
|
408
|
-
conn.exec_params(query, [schema]).to_a.map { |row| row["tablename"] }
|
409
|
-
end
|
410
|
-
|
411
|
-
def columns(conn, table, schema)
|
412
|
-
query = "SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2"
|
413
|
-
conn.exec_params(query, [schema, table]).to_a.map { |row| row["column_name"] }
|
414
|
-
end
|
415
|
-
|
416
|
-
def table_exists?(conn, table, schema)
|
417
|
-
query = "SELECT 1 FROM information_schema.tables WHERE table_schema = $1 AND table_name = $2"
|
418
|
-
conn.exec_params(query, [schema, table]).to_a.size > 0
|
419
|
-
end
|
420
|
-
|
421
|
-
# http://stackoverflow.com/a/20537829
|
422
|
-
def primary_key(conn, table, schema)
|
423
|
-
query = <<-SQL
|
424
|
-
SELECT
|
425
|
-
pg_attribute.attname,
|
426
|
-
format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
|
427
|
-
FROM
|
428
|
-
pg_index, pg_class, pg_attribute, pg_namespace
|
429
|
-
WHERE
|
430
|
-
pg_class.oid = $2::regclass AND
|
431
|
-
indrelid = pg_class.oid AND
|
432
|
-
nspname = $1 AND
|
433
|
-
pg_class.relnamespace = pg_namespace.oid AND
|
434
|
-
pg_attribute.attrelid = pg_class.oid AND
|
435
|
-
pg_attribute.attnum = any(pg_index.indkey) AND
|
436
|
-
indisprimary
|
437
|
-
SQL
|
438
|
-
row = conn.exec_params(query, [schema, table]).to_a[0]
|
439
|
-
row && row["attname"]
|
440
|
-
end
|
441
|
-
|
442
|
-
# TODO better performance
|
443
|
-
def rule_match?(table, column, rule)
|
444
|
-
regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
|
445
|
-
regex.match(column) || regex.match("#{table}.#{column}")
|
446
|
-
end
|
447
|
-
|
448
|
-
# TODO wildcard rules
|
449
|
-
def apply_strategy(rule, table, column, conn)
|
450
|
-
if rule.is_a?(Hash)
|
451
|
-
if rule.key?("value")
|
452
|
-
escape(rule["value"])
|
453
|
-
elsif rule.key?("statement")
|
454
|
-
rule["statement"]
|
455
|
-
else
|
456
|
-
abort "Unknown rule #{rule.inspect} for column #{column}"
|
457
|
-
end
|
458
|
-
else
|
459
|
-
strategies = {
|
460
|
-
"unique_email" => "'email' || #{table}.id || '@example.org'",
|
461
|
-
"untouched" => quote_ident(column),
|
462
|
-
"unique_phone" => "(#{table}.id + 1000000000)::text",
|
463
|
-
"random_int" => "(RAND() * 10)::int",
|
464
|
-
"random_date" => "'1970-01-01'",
|
465
|
-
"random_time" => "NOW()",
|
466
|
-
"unique_secret" => "'secret' || #{table}.id",
|
467
|
-
"random_ip" => "'127.0.0.1'",
|
468
|
-
"random_letter" => "'A'",
|
469
|
-
"random_string" => "right(md5(random()::text),10)",
|
470
|
-
"random_number" => "(RANDOM() * 1000000)::int",
|
471
|
-
"null" => "NULL",
|
472
|
-
nil => "NULL"
|
473
|
-
}
|
474
|
-
if strategies[rule]
|
475
|
-
strategies[rule]
|
476
|
-
else
|
477
|
-
abort "Unknown rule #{rule} for column #{column}"
|
478
|
-
end
|
479
|
-
end
|
480
|
-
end
|
481
|
-
|
482
|
-
def quote_ident(value)
|
483
|
-
PG::Connection.quote_ident(value)
|
484
|
-
end
|
485
|
-
|
486
|
-
def escape(value)
|
487
|
-
if value.is_a?(String)
|
488
|
-
"'#{quote_string(value)}'"
|
489
|
-
else
|
490
|
-
value
|
491
|
-
end
|
492
|
-
end
|
493
|
-
|
494
|
-
# activerecord
|
495
|
-
def quote_string(s)
|
496
|
-
s.gsub(/\\/, '\&\&').gsub(/'/, "''")
|
497
|
-
end
|
498
|
-
|
499
|
-
def to_arr(value)
|
500
|
-
if value.is_a?(Array)
|
501
|
-
value
|
502
|
-
else
|
503
|
-
# Split by commas, but don't use commas inside double quotes
|
504
|
-
# http://stackoverflow.com/questions/21105360/regex-find-comma-not-inside-quotes
|
505
|
-
value.to_s.split(/(?!\B"[^"]*),(?![^"]*"\B)/)
|
506
|
-
end
|
507
|
-
end
|
508
|
-
|
509
|
-
def parse_uri(url)
|
510
|
-
uri = URI.parse(url)
|
511
|
-
uri.scheme ||= "postgres"
|
512
|
-
uri.host ||= "localhost"
|
513
|
-
uri.port ||= 5432
|
514
|
-
uri.path = "/#{uri.path}" if uri.path && uri.path[0] != "/"
|
515
|
-
schema = CGI.parse(uri.query.to_s)["schema"][0] || "public"
|
516
|
-
[uri, schema]
|
517
|
-
end
|
518
|
-
|
519
|
-
def print_uri(prefix, uri)
|
520
|
-
log "#{prefix}: #{uri.path.sub(/\A\//, '')} on #{uri.host}:#{uri.port}"
|
521
|
-
end
|
522
|
-
|
523
|
-
def to_url(uri)
|
524
|
-
uri = uri.dup
|
525
|
-
uri.query = nil
|
526
|
-
uri.to_s
|
527
|
-
end
|
528
|
-
|
529
|
-
def search_tree(file)
|
530
|
-
path = Dir.pwd
|
531
|
-
# prevent infinite loop
|
532
|
-
20.times do
|
533
|
-
absolute_file = File.join(path, file)
|
534
|
-
if File.exist?(absolute_file)
|
535
|
-
break absolute_file
|
536
|
-
end
|
537
|
-
path = File.dirname(path)
|
538
|
-
break if path == "/"
|
539
|
-
end
|
540
|
-
end
|
541
|
-
|
542
|
-
def config_file
|
543
|
-
return @config_file if instance_variable_defined?(:@config_file)
|
544
|
-
|
545
|
-
@config_file =
|
546
|
-
search_tree(
|
547
|
-
if @options[:db]
|
548
|
-
db_config_file(@options[:db])
|
549
|
-
else
|
550
|
-
@options[:config] || ".pgsync.yml"
|
551
|
-
end
|
552
|
-
)
|
553
|
-
end
|
554
|
-
|
555
|
-
def abort(message)
|
556
|
-
raise PgSync::Error, message
|
557
|
-
end
|
558
|
-
|
559
|
-
def log(message = nil)
|
560
|
-
$stderr.puts message
|
561
|
-
end
|
562
|
-
|
563
|
-
def sequences(conn, table, columns)
|
564
|
-
conn.exec("SELECT #{columns.map { |f| "pg_get_serial_sequence(#{escape(quote_ident(table))}, #{escape(f)}) AS #{f}" }.join(", ")}").to_a[0].values.compact
|
565
|
-
end
|
566
|
-
|
567
|
-
def in_parallel(tables, &block)
|
568
|
-
if @options[:debug] || @options[:in_batches]
|
569
|
-
tables.each(&block)
|
570
|
-
else
|
571
|
-
options = {}
|
572
|
-
options[:in_threads] = 4 if windows?
|
573
|
-
Parallel.each(tables, options, &block)
|
574
|
-
end
|
575
|
-
end
|
576
|
-
|
577
|
-
def pretty_list(items)
|
578
|
-
items.each do |item|
|
579
|
-
log item
|
580
|
-
end
|
581
|
-
end
|
582
|
-
|
583
|
-
def add_tables(tables, t, id, boom, from_uri, from_schema)
|
584
|
-
t.each do |table|
|
585
|
-
sql = nil
|
586
|
-
if table.is_a?(Array)
|
587
|
-
table, sql = table
|
588
|
-
end
|
589
|
-
add_table(tables, table, id, boom || sql, from_uri, from_schema)
|
590
|
-
end
|
591
|
-
end
|
592
|
-
|
593
|
-
def add_table(tables, table, id, boom, from_uri, from_schema, wildcard = false)
|
594
|
-
if table.include?("*") && !wildcard
|
595
|
-
regex = Regexp.new('\A' + Regexp.escape(table).gsub('\*','[^\.]*') + '\z')
|
596
|
-
t2 = with_connection(from_uri) { |conn| self.tables(conn, from_schema) }.select { |t| regex.match(t) }
|
597
|
-
t2.each do |tab|
|
598
|
-
add_table(tables, tab, id, boom, from_uri, from_schema, true)
|
599
|
-
end
|
600
|
-
else
|
601
|
-
tables[table] = {}
|
602
|
-
tables[table][:sql] = boom.gsub("{id}", cast(id)).gsub("{1}", cast(id)) if boom
|
603
|
-
end
|
604
|
-
end
|
605
|
-
|
606
|
-
def table_list(args, opts, from_uri, from_schema)
|
607
|
-
tables = nil
|
608
|
-
|
609
|
-
if opts[:groups]
|
610
|
-
tables ||= Hash.new { |hash, key| hash[key] = {} }
|
611
|
-
specified_groups = to_arr(opts[:groups])
|
612
|
-
specified_groups.map do |tag|
|
613
|
-
group, id = tag.split(":", 2)
|
614
|
-
if (t = (config["groups"] || {})[group])
|
615
|
-
add_tables(tables, t, id, args[1], from_uri, from_schema)
|
616
|
-
else
|
617
|
-
abort "Group not found: #{group}"
|
618
|
-
end
|
619
|
-
end
|
620
|
-
end
|
621
|
-
|
622
|
-
if opts[:tables]
|
623
|
-
tables ||= Hash.new { |hash, key| hash[key] = {} }
|
624
|
-
to_arr(opts[:tables]).each do |tag|
|
625
|
-
table, id = tag.split(":", 2)
|
626
|
-
add_table(tables, table, id, args[1], from_uri, from_schema)
|
627
|
-
end
|
628
|
-
end
|
629
|
-
|
630
|
-
if args[0]
|
631
|
-
# could be a group, table, or mix
|
632
|
-
tables ||= Hash.new { |hash, key| hash[key] = {} }
|
633
|
-
specified_groups = to_arr(args[0])
|
634
|
-
specified_groups.map do |tag|
|
635
|
-
group, id = tag.split(":", 2)
|
636
|
-
if (t = (config["groups"] || {})[group])
|
637
|
-
add_tables(tables, t, id, args[1], from_uri, from_schema)
|
638
|
-
else
|
639
|
-
add_table(tables, group, id, args[1], from_uri, from_schema)
|
640
|
-
end
|
641
|
-
end
|
642
|
-
end
|
643
|
-
|
644
|
-
with_connection(from_uri, timeout: 3) do |conn|
|
645
|
-
tables ||= Hash[(self.tables(conn, from_schema) - to_arr(opts[:exclude])).map { |k| [k, {}] }]
|
646
|
-
|
647
|
-
tables.keys.each do |table|
|
648
|
-
unless table_exists?(conn, table, from_schema)
|
649
|
-
abort "Table does not exist in source: #{table}"
|
650
|
-
end
|
651
|
-
end
|
652
|
-
end
|
653
|
-
|
654
|
-
tables
|
655
|
-
end
|
656
|
-
|
657
|
-
def max_id(conn, table, primary_key, sql_clause = nil)
|
658
|
-
conn.exec("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident(table)}#{sql_clause}").to_a[0]["max"].to_i
|
659
|
-
end
|
660
|
-
|
661
|
-
def min_id(conn, table, primary_key, sql_clause = nil)
|
662
|
-
conn.exec("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident(table)}#{sql_clause}").to_a[0]["min"].to_i
|
663
|
-
end
|
664
|
-
|
665
|
-
def cast(value)
|
666
|
-
value.to_s.gsub(/\A\"|\"\z/, '')
|
667
|
-
end
|
668
|
-
|
669
|
-
def deprecated(message)
|
670
|
-
log "[DEPRECATED] #{message}"
|
671
|
-
end
|
672
|
-
|
673
|
-
def log_completed(start_time)
|
674
|
-
time = Time.now - start_time
|
675
|
-
log "Completed in #{time.round(1)}s"
|
676
|
-
end
|
677
|
-
|
678
|
-
def windows?
|
679
|
-
Gem.win_platform?
|
680
|
-
end
|
681
|
-
end
|
682
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgsync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: slop
|
@@ -126,6 +126,10 @@ files:
|
|
126
126
|
- config.yml
|
127
127
|
- exe/pgsync
|
128
128
|
- lib/pgsync.rb
|
129
|
+
- lib/pgsync/client.rb
|
130
|
+
- lib/pgsync/data_source.rb
|
131
|
+
- lib/pgsync/table_list.rb
|
132
|
+
- lib/pgsync/table_sync.rb
|
129
133
|
- lib/pgsync/version.rb
|
130
134
|
- pgsync.gemspec
|
131
135
|
homepage: https://github.com/ankane/pgsync
|