pgsync 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pgsync might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d815251b4114bab3d2f124185c3f4d0a6d7ea01b
4
- data.tar.gz: 46838871758b47ed72b72de2544e8dca5c4d0448
3
+ metadata.gz: 792aeac71e01e8b28c8f33eab3392f2afb8cf1ac
4
+ data.tar.gz: e47f1aed5ef45cdb3af86d2a52a93c16f332e079
5
5
  SHA512:
6
- metadata.gz: 19599ac80d68278b56ebe96d6ed1b6d5065df06d3384bb900d4876974d250048c8ca1bf9e0ba13fb3a7af4a6981c4f7a132d7d47c52636dab0180ec55995b7ca
7
- data.tar.gz: 19e780c6b2006d3c9b96149bfa73b27e7b366f46b160cb0a6ad9f309530f75b36ffa484c956c7de1c3658623c376ef0e2589d4be42807160a618603ebeb6e196
6
+ metadata.gz: 4c6c37c246796179708f08abd3e43d09a3608545cc02ee4eab03cfeb3138b893e60be4ca8fceb6773ac295346fa194d6275aae660517c870be96b6d93cfb3358
7
+ data.tar.gz: 0fa9979a1a15d6a5e0da7298eed998f9a04a5067ed2be35d0b67d84ef5ffd2fa85b94cdf1d7f52d9ab42185ce476b57cac89b0f5a182eaacfd011a8ad8b12861
data/.gitignore CHANGED
@@ -7,3 +7,4 @@
7
7
  /pkg/
8
8
  /spec/reports/
9
9
  /tmp/
10
+ /.pgsync.yml
@@ -1,3 +1,9 @@
1
+ # 0.2.4
2
+
3
+ - Added `--preserve` option
4
+ - Added `--list` option for groups and tables
5
+ - Added `--limit` option
6
+
1
7
  # 0.2.3
2
8
 
3
9
  - Fixed `no PostgreSQL user name specified in startup packet`
data/README.md CHANGED
@@ -14,7 +14,7 @@ And in your project directory, run:
14
14
  pgsync setup
15
15
  ```
16
16
 
17
- This creates `.pgsync.yml` for you to customize. We recommend checking this into your version control (assuming it doesn’t contain sensitive information).
17
+ This creates `.pgsync.yml` for you to customize. We recommend checking this into your version control (assuming it doesn’t contain sensitive information). `pgsync` commands can be run from this directory or any subdirectory.
18
18
 
19
19
  ## How to Use
20
20
 
@@ -36,6 +36,12 @@ Fetch specific rows (truncates destination table first)
36
36
  pgsync products --where "id < 100"
37
37
  ```
38
38
 
39
+ To preserve existing rows, use:
40
+
41
+ ```sh
42
+ pgsync products --where "id < 100" --preserve
43
+ ```
44
+
39
45
  ### Exclude Tables
40
46
 
41
47
  ```sh
@@ -7,6 +7,14 @@ require "pg"
7
7
  require "parallel"
8
8
  require "multiprocessing"
9
9
  require "fileutils"
10
+ require "tempfile"
11
+
12
+ module URI
13
+ class POSTGRESQL < Generic
14
+ DEFAULT_PORT = 5432
15
+ end
16
+ @@schemes["POSTGRESQL"] = @@schemes["POSTGRES"] = POSTGRESQL
17
+ end
10
18
 
11
19
  module PgSync
12
20
  class Error < StandardError; end
@@ -98,75 +106,127 @@ module PgSync
98
106
  end
99
107
  end
100
108
 
101
- in_parallel(tables) do |table|
102
- time =
103
- benchmark do
104
- with_connection(from_uri) do |from_connection|
105
- with_connection(to_uri) do |to_connection|
106
- bad_fields = config["data_rules"]
107
-
108
- from_fields = columns(from_connection, table, "public")
109
- to_fields = columns(to_connection, table, "public")
110
- shared_fields = to_fields & from_fields
111
- extra_fields = to_fields - from_fields
112
- missing_fields = from_fields - to_fields
113
-
114
- from_sequences = sequences(from_connection, table, shared_fields)
115
- to_sequences = sequences(to_connection, table, shared_fields)
116
- shared_sequences = to_sequences & from_sequences
117
- extra_sequences = to_sequences - from_sequences
118
- missing_sequences = from_sequences - to_sequences
119
-
120
- where = opts[:where]
121
-
122
- @mutex.synchronize do
123
- log "* Syncing #{table}"
124
- if where
125
- log " #{where}"
126
- where = " WHERE #{opts[:where]}"
127
- end
128
- log " Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
129
- log " Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
130
- log " Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
131
- log " Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
109
+ if opts[:list]
110
+ if args[0] == "groups"
111
+ pretty_list (config["groups"] || {}).keys
112
+ else
113
+ pretty_list tables
114
+ end
115
+ else
116
+ in_parallel(tables) do |table|
117
+ time =
118
+ benchmark do
119
+ with_connection(from_uri) do |from_connection|
120
+ with_connection(to_uri) do |to_connection|
121
+ bad_fields = config["data_rules"]
122
+
123
+ from_fields = columns(from_connection, table, "public")
124
+ to_fields = columns(to_connection, table, "public")
125
+ shared_fields = to_fields & from_fields
126
+ extra_fields = to_fields - from_fields
127
+ missing_fields = from_fields - to_fields
128
+
129
+ from_sequences = sequences(from_connection, table, shared_fields)
130
+ to_sequences = sequences(to_connection, table, shared_fields)
131
+ shared_sequences = to_sequences & from_sequences
132
+ extra_sequences = to_sequences - from_sequences
133
+ missing_sequences = from_sequences - to_sequences
134
+
135
+ where = opts[:where]
136
+ limit = opts[:limit]
137
+ sql_clause = String.new
138
+
139
+ @mutex.synchronize do
140
+ log "* Syncing #{table}"
141
+ if where
142
+ log " #{where}"
143
+ sql_clause << " WHERE #{opts[:where]}"
144
+ end
145
+ if limit
146
+ log " LIMIT #{limit}"
147
+ sql_clause << " LIMIT #{limit}"
148
+ end
149
+ log " Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
150
+ log " Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
151
+ log " Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
152
+ log " Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
132
153
 
133
- if shared_fields.empty?
134
- log " No fields to copy"
154
+ if shared_fields.empty?
155
+ log " No fields to copy"
156
+ end
135
157
  end
136
- end
137
158
 
138
- if shared_fields.any?
139
- copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, bk| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], f, from_connection)} AS #{escape_identifier(f)}" : escape_identifier(f) }.join(", ")
140
- fields = shared_fields.map { |f| escape_identifier(f) }.join(", ")
159
+ if shared_fields.any?
160
+ copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, bk| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], f, from_connection)} AS #{escape_identifier(f)}" : escape_identifier(f) }.join(", ")
161
+ fields = shared_fields.map { |f| escape_identifier(f) }.join(", ")
141
162
 
142
- seq_values = {}
143
- shared_sequences.each do |seq|
144
- seq_values[seq] = from_connection.exec("select last_value from #{seq}").to_a[0]["last_value"]
145
- end
163
+ seq_values = {}
164
+ shared_sequences.each do |seq|
165
+ seq_values[seq] = from_connection.exec("select last_value from #{seq}").to_a[0]["last_value"]
166
+ end
146
167
 
147
- to_connection.exec("TRUNCATE #{table} CASCADE")
148
- to_connection.copy_data "COPY #{table} (#{fields}) FROM STDIN" do
149
- from_connection.copy_data "COPY (SELECT #{copy_fields} FROM #{table}#{where}) TO STDOUT" do
150
- while row = from_connection.get_copy_data
151
- to_connection.put_copy_data(row)
168
+ copy_to_command = "COPY (SELECT #{copy_fields} FROM #{table}#{sql_clause}) TO STDOUT"
169
+ if opts[:preserve]
170
+ primary_key = self.primary_key(from_connection, table, "public")
171
+ abort "No primary key" unless primary_key
172
+
173
+ temp_table = "pgsync_#{rand(1_000_000_000)}"
174
+ file = Tempfile.new(temp_table)
175
+ begin
176
+ from_connection.copy_data copy_to_command do
177
+ while row = from_connection.get_copy_data
178
+ file.write(row)
179
+ end
180
+ end
181
+ file.rewind
182
+
183
+ to_connection.transaction do
184
+ # create a temp table
185
+ to_connection.exec("CREATE TABLE #{temp_table} AS SELECT * FROM #{table} WITH NO DATA")
186
+
187
+ # load file
188
+ to_connection.copy_data "COPY #{temp_table} (#{fields}) FROM STDIN" do
189
+ file.each do |row|
190
+ to_connection.put_copy_data(row)
191
+ end
192
+ end
193
+
194
+ # insert into
195
+ to_connection.exec("INSERT INTO #{table} (SELECT * FROM #{temp_table} WHERE NOT EXISTS (SELECT 1 FROM #{table} WHERE #{table}.#{primary_key} = #{temp_table}.#{primary_key}))")
196
+
197
+ # delete temp table
198
+ to_connection.exec("DROP TABLE #{temp_table}")
199
+ end
200
+ ensure
201
+ file.close
202
+ file.unlink
203
+ end
204
+ else
205
+ to_connection.exec("TRUNCATE #{table} CASCADE")
206
+ to_connection.copy_data "COPY #{table} (#{fields}) FROM STDIN" do
207
+ from_connection.copy_data copy_to_command do
208
+ while row = from_connection.get_copy_data
209
+ to_connection.put_copy_data(row)
210
+ end
211
+ end
152
212
  end
153
213
  end
154
- end
155
- seq_values.each do |seq, value|
156
- to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
214
+ seq_values.each do |seq, value|
215
+ to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
216
+ end
157
217
  end
158
218
  end
159
219
  end
160
220
  end
161
- end
162
221
 
163
- @mutex.synchronize do
164
- log "* DONE #{table} (#{time.round(1)}s)"
222
+ @mutex.synchronize do
223
+ log "* DONE #{table} (#{time.round(1)}s)"
224
+ end
165
225
  end
166
- end
167
226
 
168
- time = Time.now - start_time
169
- log "Completed in #{time.round(1)}s"
227
+ time = Time.now - start_time
228
+ log "Completed in #{time.round(1)}s"
229
+ end
170
230
  end
171
231
  end
172
232
  true
@@ -189,12 +249,15 @@ Options:}
189
249
  o.string "--from", "source"
190
250
  o.string "--to", "destination"
191
251
  o.string "--where", "where"
252
+ o.integer "--limit", "limit"
192
253
  o.string "--exclude", "exclude tables"
193
254
  o.string "--config", "config file"
194
255
  o.string "--db", "database"
195
256
  # TODO much better name for this option
196
257
  o.boolean "--to-safe", "accept danger", default: false
197
258
  o.boolean "--debug", "debug", default: false
259
+ o.boolean "--list", "list", default: false
260
+ o.boolean "--preserve", "preserve", default: false
198
261
  o.on "-v", "--version", "print the version" do
199
262
  log PgSync::VERSION
200
263
  @exit = true
@@ -294,6 +357,27 @@ Options:}
294
357
  conn.exec_params(query, [schema, table]).to_a.size > 0
295
358
  end
296
359
 
360
+ # http://stackoverflow.com/a/20537829
361
+ def primary_key(conn, table, schema)
362
+ query = <<-SQL
363
+ SELECT
364
+ pg_attribute.attname,
365
+ format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
366
+ FROM
367
+ pg_index, pg_class, pg_attribute, pg_namespace
368
+ WHERE
369
+ pg_class.oid = $2::regclass AND
370
+ indrelid = pg_class.oid AND
371
+ nspname = $1 AND
372
+ pg_class.relnamespace = pg_namespace.oid AND
373
+ pg_attribute.attrelid = pg_class.oid AND
374
+ pg_attribute.attnum = any(pg_index.indkey) AND
375
+ indisprimary
376
+ SQL
377
+ row = conn.exec_params(query, [schema, table]).to_a[0]
378
+ row && row["attname"]
379
+ end
380
+
297
381
  # TODO better performance
298
382
  def rule_match?(table, column, rule)
299
383
  regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
@@ -359,8 +443,10 @@ Options:}
359
443
 
360
444
  def parse_uri(url)
361
445
  uri = URI.parse(url)
446
+ uri.scheme ||= "postgres"
362
447
  uri.host ||= "localhost"
363
448
  uri.port ||= 5432
449
+ uri.path = "/#{uri.path}" if uri.path && uri.path[0] != "/"
364
450
  uri
365
451
  end
366
452
 
@@ -419,5 +505,11 @@ Options:}
419
505
  Parallel.each(tables, &block)
420
506
  end
421
507
  end
508
+
509
+ def pretty_list(items)
510
+ items.each do |item|
511
+ log item
512
+ end
513
+ end
422
514
  end
423
515
  end
@@ -1,3 +1,3 @@
1
1
  module PgSync
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgsync
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-31 00:00:00.000000000 Z
11
+ date: 2016-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: slop