pgsync 0.3.8 → 0.3.9
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +13 -4
- data/lib/pgsync/client.rb +280 -0
- data/lib/pgsync/data_source.rb +191 -0
- data/lib/pgsync/table_list.rb +105 -0
- data/lib/pgsync/table_sync.rb +239 -0
- data/lib/pgsync/version.rb +1 -1
- data/lib/pgsync.rb +7 -659
- metadata +6 -2
@@ -0,0 +1,239 @@
|
|
1
|
+
module PgSync
|
2
|
+
class TableSync
|
3
|
+
def sync_with_benchmark(mutex, config, table, opts, source_url, destination_url)
|
4
|
+
time =
|
5
|
+
benchmark do
|
6
|
+
sync(mutex, config, table, opts, source_url, destination_url)
|
7
|
+
end
|
8
|
+
|
9
|
+
mutex.synchronize do
|
10
|
+
log "* DONE #{table} (#{time.round(1)}s)"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def sync(mutex, config, table, opts, source_url, destination_url)
|
15
|
+
source = DataSource.new(source_url)
|
16
|
+
destination = DataSource.new(destination_url)
|
17
|
+
|
18
|
+
from_connection = source.conn
|
19
|
+
to_connection = destination.conn
|
20
|
+
|
21
|
+
begin
|
22
|
+
bad_fields = opts[:no_rules] ? [] : config["data_rules"]
|
23
|
+
|
24
|
+
from_fields = source.columns(table)
|
25
|
+
to_fields = destination.columns(table)
|
26
|
+
shared_fields = to_fields & from_fields
|
27
|
+
extra_fields = to_fields - from_fields
|
28
|
+
missing_fields = from_fields - to_fields
|
29
|
+
|
30
|
+
from_sequences = source.sequences(table, shared_fields)
|
31
|
+
to_sequences = destination.sequences(table, shared_fields)
|
32
|
+
shared_sequences = to_sequences & from_sequences
|
33
|
+
extra_sequences = to_sequences - from_sequences
|
34
|
+
missing_sequences = from_sequences - to_sequences
|
35
|
+
|
36
|
+
sql_clause = String.new
|
37
|
+
|
38
|
+
mutex.synchronize do
|
39
|
+
log "* Syncing #{table}"
|
40
|
+
if opts[:sql]
|
41
|
+
log " #{opts[:sql]}"
|
42
|
+
sql_clause << " #{opts[:sql]}"
|
43
|
+
end
|
44
|
+
log " Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
|
45
|
+
log " Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
|
46
|
+
log " Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
|
47
|
+
log " Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
|
48
|
+
|
49
|
+
if shared_fields.empty?
|
50
|
+
log " No fields to copy"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
if shared_fields.any?
|
55
|
+
copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, bk| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f)} AS #{quote_ident(f)}" : "#{quote_ident_full(table)}.#{quote_ident(f)}" }.join(", ")
|
56
|
+
fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
|
57
|
+
|
58
|
+
seq_values = {}
|
59
|
+
shared_sequences.each do |seq|
|
60
|
+
seq_values[seq] = source.last_value(seq)
|
61
|
+
end
|
62
|
+
|
63
|
+
copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{sql_clause}) TO STDOUT"
|
64
|
+
if opts[:in_batches]
|
65
|
+
raise PgSync::Error, "Cannot use --overwrite with --in-batches" if opts[:overwrite]
|
66
|
+
|
67
|
+
primary_key = source.primary_key(table)
|
68
|
+
raise PgSync::Error, "No primary key" unless primary_key
|
69
|
+
|
70
|
+
destination.truncate(table) if opts[:truncate]
|
71
|
+
|
72
|
+
from_max_id = source.max_id(table, primary_key)
|
73
|
+
to_max_id = destination.max_id(table, primary_key) + 1
|
74
|
+
|
75
|
+
if to_max_id == 1
|
76
|
+
from_min_id = source.min_id(table, primary_key)
|
77
|
+
to_max_id = from_min_id if from_min_id > 0
|
78
|
+
end
|
79
|
+
|
80
|
+
starting_id = to_max_id
|
81
|
+
batch_size = opts[:batch_size]
|
82
|
+
|
83
|
+
i = 1
|
84
|
+
batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
|
85
|
+
|
86
|
+
while starting_id <= from_max_id
|
87
|
+
where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
|
88
|
+
log " #{i}/#{batch_count}: #{where}"
|
89
|
+
|
90
|
+
# TODO be smarter for advance sql clauses
|
91
|
+
batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
|
92
|
+
|
93
|
+
batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quote_ident_full(table)}#{batch_sql_clause}) TO STDOUT"
|
94
|
+
to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
|
95
|
+
from_connection.copy_data batch_copy_to_command do
|
96
|
+
while row = from_connection.get_copy_data
|
97
|
+
to_connection.put_copy_data(row)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
starting_id += batch_size
|
103
|
+
i += 1
|
104
|
+
|
105
|
+
if opts[:sleep] && starting_id <= from_max_id
|
106
|
+
sleep(opts[:sleep])
|
107
|
+
end
|
108
|
+
end
|
109
|
+
elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
|
110
|
+
primary_key = destination.primary_key(table)
|
111
|
+
raise PgSync::Error, "No primary key" unless primary_key
|
112
|
+
|
113
|
+
temp_table = "pgsync_#{rand(1_000_000_000)}"
|
114
|
+
file = Tempfile.new(temp_table)
|
115
|
+
begin
|
116
|
+
from_connection.copy_data copy_to_command do
|
117
|
+
while row = from_connection.get_copy_data
|
118
|
+
file.write(row)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
file.rewind
|
122
|
+
|
123
|
+
# create a temp table
|
124
|
+
to_connection.exec("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS SELECT * FROM #{quote_ident_full(table)} WITH NO DATA")
|
125
|
+
|
126
|
+
# load file
|
127
|
+
to_connection.copy_data "COPY #{quote_ident_full(temp_table)} (#{fields}) FROM STDIN" do
|
128
|
+
file.each do |row|
|
129
|
+
to_connection.put_copy_data(row)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
if opts[:preserve]
|
134
|
+
# insert into
|
135
|
+
to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident_full(temp_table)} WHERE NOT EXISTS (SELECT 1 FROM #{quote_ident_full(table)} WHERE #{quote_ident_full(table)}.#{primary_key} = #{quote_ident_full(temp_table)}.#{quote_ident(primary_key)}))")
|
136
|
+
else
|
137
|
+
to_connection.transaction do
|
138
|
+
to_connection.exec("DELETE FROM #{quote_ident_full(table)} WHERE #{quote_ident(primary_key)} IN (SELECT #{quote_ident(primary_key)} FROM #{quote_ident_full(temp_table)})")
|
139
|
+
to_connection.exec("INSERT INTO #{quote_ident_full(table)} (SELECT * FROM #{quote_ident(temp_table)})")
|
140
|
+
end
|
141
|
+
end
|
142
|
+
ensure
|
143
|
+
file.close
|
144
|
+
file.unlink
|
145
|
+
end
|
146
|
+
else
|
147
|
+
destination.truncate(table)
|
148
|
+
to_connection.copy_data "COPY #{quote_ident_full(table)} (#{fields}) FROM STDIN" do
|
149
|
+
from_connection.copy_data copy_to_command do
|
150
|
+
while row = from_connection.get_copy_data
|
151
|
+
to_connection.put_copy_data(row)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
seq_values.each do |seq, value|
|
157
|
+
to_connection.exec("SELECT setval(#{escape(seq)}, #{escape(value)})")
|
158
|
+
end
|
159
|
+
end
|
160
|
+
ensure
|
161
|
+
source.close
|
162
|
+
destination.close
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
private
|
167
|
+
|
168
|
+
# TODO better performance
|
169
|
+
def rule_match?(table, column, rule)
|
170
|
+
regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*','[^\.]*') + '\z')
|
171
|
+
regex.match(column) || regex.match("#{table.split(".", 2)[-1]}.#{column}") || regex.match("#{table}.#{column}")
|
172
|
+
end
|
173
|
+
|
174
|
+
# TODO wildcard rules
|
175
|
+
def apply_strategy(rule, table, column)
|
176
|
+
if rule.is_a?(Hash)
|
177
|
+
if rule.key?("value")
|
178
|
+
escape(rule["value"])
|
179
|
+
elsif rule.key?("statement")
|
180
|
+
rule["statement"]
|
181
|
+
else
|
182
|
+
raise PgSync::Error, "Unknown rule #{rule.inspect} for column #{column}"
|
183
|
+
end
|
184
|
+
else
|
185
|
+
strategies = {
|
186
|
+
"unique_email" => "'email' || #{table}.id || '@example.org'",
|
187
|
+
"untouched" => quote_ident(column),
|
188
|
+
"unique_phone" => "(#{table}.id + 1000000000)::text",
|
189
|
+
"random_int" => "(RAND() * 10)::int",
|
190
|
+
"random_date" => "'1970-01-01'",
|
191
|
+
"random_time" => "NOW()",
|
192
|
+
"unique_secret" => "'secret' || #{table}.id",
|
193
|
+
"random_ip" => "'127.0.0.1'",
|
194
|
+
"random_letter" => "'A'",
|
195
|
+
"random_string" => "right(md5(random()::text),10)",
|
196
|
+
"random_number" => "(RANDOM() * 1000000)::int",
|
197
|
+
"null" => "NULL",
|
198
|
+
nil => "NULL"
|
199
|
+
}
|
200
|
+
if strategies[rule]
|
201
|
+
strategies[rule]
|
202
|
+
else
|
203
|
+
raise PgSync::Error, "Unknown rule #{rule} for column #{column}"
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def benchmark
|
209
|
+
start_time = Time.now
|
210
|
+
yield
|
211
|
+
Time.now - start_time
|
212
|
+
end
|
213
|
+
|
214
|
+
def log(message = nil)
|
215
|
+
$stderr.puts message
|
216
|
+
end
|
217
|
+
|
218
|
+
def quote_ident_full(ident)
|
219
|
+
ident.split(".").map { |v| quote_ident(v) }.join(".")
|
220
|
+
end
|
221
|
+
|
222
|
+
def quote_ident(value)
|
223
|
+
PG::Connection.quote_ident(value)
|
224
|
+
end
|
225
|
+
|
226
|
+
def escape(value)
|
227
|
+
if value.is_a?(String)
|
228
|
+
"'#{quote_string(value)}'"
|
229
|
+
else
|
230
|
+
value
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
# activerecord
|
235
|
+
def quote_string(s)
|
236
|
+
s.gsub(/\\/, '\&\&').gsub(/'/, "''")
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
data/lib/pgsync/version.rb
CHANGED