pgsync 0.3.8 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pgsync might be problematic. Click here for more details.

@@ -0,0 +1,239 @@
1
+ module PgSync
2
+ class TableSync
3
+ def sync_with_benchmark(mutex, config, table, opts, source_url, destination_url)
4
+ time =
5
+ benchmark do
6
+ sync(mutex, config, table, opts, source_url, destination_url)
7
+ end
8
+
9
+ mutex.synchronize do
10
+ log "* DONE #{table} (#{time.round(1)}s)"
11
+ end
12
+ end
13
+
14
+ def sync(mutex, config, table, opts, source_url, destination_url)
15
+ source = DataSource.new(source_url)
16
+ destination = DataSource.new(destination_url)
17
+
18
+ from_connection = source.conn
19
+ to_connection = destination.conn
20
+
21
+ begin
22
+ bad_fields = opts[:no_rules] ? [] : config["data_rules"]
23
+
24
+ from_fields = source.columns(table)
25
+ to_fields = destination.columns(table)
26
+ shared_fields = to_fields & from_fields
27
+ extra_fields = to_fields - from_fields
28
+ missing_fields = from_fields - to_fields
29
+
30
+ from_sequences = source.sequences(table, shared_fields)
31
+ to_sequences = destination.sequences(table, shared_fields)
32
+ shared_sequences = to_sequences & from_sequences
33
+ extra_sequences = to_sequences - from_sequences
34
+ missing_sequences = from_sequences - to_sequences
35
+
36
+ sql_clause = String.new
37
+
38
+ mutex.synchronize do
39
+ log "* Syncing #{table}"
40
+ if opts[:sql]
41
+ log " #{opts[:sql]}"
42
+ sql_clause << " #{opts[:sql]}"
43
+ end
44
+ log " Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
45
+ log " Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
46
+ log " Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
47
+ log " Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
48
+
49
+ if shared_fields.empty?
50
+ log " No fields to copy"
51
+ end
52
+ end
53
+
54
+ if shared_fields.any?
55
+ copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, bk| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f)} AS #{quote_ident(f)}" : "#{quote_ident_full(table)}.#{quote_ident(f)}" }.join(", ")
56
+ fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
57
+
58
+ seq_values = {}
59
+ shared_sequences.each do |seq|
60
+ seq_values[seq] = source.last_value(seq)
61
+ end
62
+
63
+ copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{sql_clause}) TO STDOUT"
64
+ if opts[:in_batches]
65
+ raise PgSync::Error, "Cannot use --overwrite with --in-batches" if opts[:overwrite]
66
+
67
+ primary_key = source.primary_key(table)
68
+ raise PgSync::Error, "No primary key" unless primary_key
69
+
70
+ destination.truncate(table) if opts[:truncate]
71
+
72
+ from_max_id = source.max_id(table, primary_key)
73
+ to_max_id = destination.max_id(table, primary_key) + 1
74
+
75
+ if to_max_id == 1
76
+ from_min_id = source.min_id(table, primary_key)
77
+ to_max_id = from_min_id if from_min_id > 0
78
+ end
79
+
80
+ starting_id = to_max_id
81
+ batch_size = opts[:batch_size]
82
+
83
+ i = 1
84
+ batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
85
+
86
+ while starting_id <= from_max_id
87
+ where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
88
+ log " #{i}/#{batch_count}: #{where}"
89
+
90
+ # TODO be smarter for advance sql clauses
91
+ batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
92
+
93
+ batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{batch_sql_clause}) TO STDOUT"
94
+ to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
95
+ from_connection.copy_data batch_copy_to_command do
96
+ while row = from_connection.get_copy_data
97
+ to_connection.put_copy_data(row)
98
+ end
99
+ end
100
+ end
101
+
102
+ starting_id += batch_size
103
+ i += 1
104
+
105
+ if opts[:sleep] && starting_id <= from_max_id
106
+ sleep(opts[:sleep])
107
+ end
108
+ end
109
+ elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
110
+ primary_key = destination.primary_key(table)
111
+ raise PgSync::Error, "No primary key" unless primary_key
112
+
113
+ temp_table = "pgsync_#{rand(1_000_000_000)}"
114
+ file = Tempfile.new(temp_table)
115
+ begin
116
+ from_connection.copy_data copy_to_command do
117
+ while row = from_connection.get_copy_data
118
+ file.write(row)
119
+ end
120
+ end
121
+ file.rewind
122
+
123
+ # create a temp table
124
+ to_connection.exec("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS SELECT * FROM #{quote_ident_full(table)} WITH NO DATA")
125
+
126
+ # load file
127
+ to_connection.copy_data "COPY #{quote_ident_full(temp_table)} (#{fields}) FROM STDIN" do
128
+ file.each do |row|
129
+ to_connection.put_copy_data(row)
130
+ end
131
+ end
132
+
133
+ if opts[:preserve]
134
+ # insert into
135
+ to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident_full(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident_full(table)} WHERE #{quote_ident_full(table)}.#{primary_key} = #{quote_ident_full(temp_table)}.#{quote_ident(primary_key)}))")
136
+ else
137
+ to_connection.transaction do
138
+ to_connection.exec("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
139
+ to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
140
+ end
141
+ end
142
+ ensure
143
+ file.close
144
+ file.unlink
145
+ end
146
+ else
147
+ destination.truncate(table)
148
+ to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
149
+ from_connection.copy_data copy_to_command do
150
+ while row = from_connection.get_copy_data
151
+ to_connection.put_copy_data(row)
152
+ end
153
+ end
154
+ end
155
+ end
156
+ seq_values.each do |seq, value|
157
+ to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
158
+ end
159
+ end
160
+ ensure
161
+ source.close
162
+ destination.close
163
+ end
164
+ end
165
+
166
+ private
167
+
168
+ # TODO better performance
169
+ def rule_match?(table, column, rule)
170
+ regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
171
+ regex.match(column) || regex.match("#{table.split(".", 2)[-1]}.#{column}") || regex.match("#{table}.#{column}")
172
+ end
173
+
174
+ # TODO wildcard rules
175
+ def apply_strategy(rule, table, column)
176
+ if rule.is_a?(Hash)
177
+ if rule.key?("value")
178
+ escape(rule["value"])
179
+ elsif rule.key?("statement")
180
+ rule["statement"]
181
+ else
182
+ raise PgSync::Error, "Unknown rule #{rule.inspect} for column #{column}"
183
+ end
184
+ else
185
+ strategies = {
186
+ "unique_email" => "'email' || #{table}.id || '@example.org'",
187
+ "untouched" => quote_ident(column),
188
+ "unique_phone" => "(#{table}.id + 1000000000)::text",
189
+ "random_int" => "(RAND() * 10)::int",
190
+ "random_date" => "'1970-01-01'",
191
+ "random_time" => "NOW()",
192
+ "unique_secret" => "'secret' || #{table}.id",
193
+ "random_ip" => "'127.0.0.1'",
194
+ "random_letter" => "'A'",
195
+ "random_string" => "right(md5(random()::text),10)",
196
+ "random_number" => "(RANDOM() * 1000000)::int",
197
+ "null" => "NULL",
198
+ nil => "NULL"
199
+ }
200
+ if strategies[rule]
201
+ strategies[rule]
202
+ else
203
+ raise PgSync::Error, "Unknown rule #{rule} for column #{column}"
204
+ end
205
+ end
206
+ end
207
+
208
+ def benchmark
209
+ start_time = Time.now
210
+ yield
211
+ Time.now - start_time
212
+ end
213
+
214
+ def log(message = nil)
215
+ $stderr.puts message
216
+ end
217
+
218
+ def quote_ident_full(ident)
219
+ ident.split(".").map { |v| quote_ident(v) }.join(".")
220
+ end
221
+
222
+ def quote_ident(value)
223
+ PG::Connection.quote_ident(value)
224
+ end
225
+
226
+ def escape(value)
227
+ if value.is_a?(String)
228
+ "'#{quote_string(value)}'"
229
+ else
230
+ value
231
+ end
232
+ end
233
+
234
+ # activerecord
235
+ def quote_string(s)
236
+ s.gsub(/\\/, '\&\&').gsub(/'/, "''")
237
+ end
238
+ end
239
+ end
@@ -1,3 +1,3 @@
1
1
  module PgSync
2
- VERSION = "0.3.8"
2
+ VERSION = "0.3.9"
3
3
  end