rubyrep 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +137 -0
- data/README.txt +37 -0
- data/Rakefile +30 -0
- data/bin/rubyrep +8 -0
- data/config/hoe.rb +72 -0
- data/config/mysql_config.rb +25 -0
- data/config/postgres_config.rb +21 -0
- data/config/proxied_test_config.rb +14 -0
- data/config/redmine_config.rb +17 -0
- data/config/rep_config.rb +20 -0
- data/config/requirements.rb +32 -0
- data/config/test_config.rb +20 -0
- data/lib/rubyrep/base_runner.rb +195 -0
- data/lib/rubyrep/command_runner.rb +144 -0
- data/lib/rubyrep/committers/buffered_committer.rb +140 -0
- data/lib/rubyrep/committers/committers.rb +146 -0
- data/lib/rubyrep/configuration.rb +240 -0
- data/lib/rubyrep/connection_extenders/connection_extenders.rb +133 -0
- data/lib/rubyrep/connection_extenders/jdbc_extender.rb +284 -0
- data/lib/rubyrep/connection_extenders/mysql_extender.rb +168 -0
- data/lib/rubyrep/connection_extenders/postgresql_extender.rb +261 -0
- data/lib/rubyrep/database_proxy.rb +52 -0
- data/lib/rubyrep/direct_table_scan.rb +75 -0
- data/lib/rubyrep/generate_runner.rb +105 -0
- data/lib/rubyrep/initializer.rb +39 -0
- data/lib/rubyrep/logged_change.rb +326 -0
- data/lib/rubyrep/proxied_table_scan.rb +171 -0
- data/lib/rubyrep/proxy_block_cursor.rb +145 -0
- data/lib/rubyrep/proxy_connection.rb +318 -0
- data/lib/rubyrep/proxy_cursor.rb +44 -0
- data/lib/rubyrep/proxy_row_cursor.rb +43 -0
- data/lib/rubyrep/proxy_runner.rb +89 -0
- data/lib/rubyrep/replication_difference.rb +91 -0
- data/lib/rubyrep/replication_extenders/mysql_replication.rb +271 -0
- data/lib/rubyrep/replication_extenders/postgresql_replication.rb +204 -0
- data/lib/rubyrep/replication_extenders/replication_extenders.rb +26 -0
- data/lib/rubyrep/replication_helper.rb +104 -0
- data/lib/rubyrep/replication_initializer.rb +307 -0
- data/lib/rubyrep/replication_run.rb +48 -0
- data/lib/rubyrep/replication_runner.rb +138 -0
- data/lib/rubyrep/replicators/replicators.rb +37 -0
- data/lib/rubyrep/replicators/two_way_replicator.rb +334 -0
- data/lib/rubyrep/scan_progress_printers/progress_bar.rb +65 -0
- data/lib/rubyrep/scan_progress_printers/scan_progress_printers.rb +65 -0
- data/lib/rubyrep/scan_report_printers/scan_detail_reporter.rb +111 -0
- data/lib/rubyrep/scan_report_printers/scan_report_printers.rb +67 -0
- data/lib/rubyrep/scan_report_printers/scan_summary_reporter.rb +75 -0
- data/lib/rubyrep/scan_runner.rb +25 -0
- data/lib/rubyrep/session.rb +177 -0
- data/lib/rubyrep/sync_helper.rb +111 -0
- data/lib/rubyrep/sync_runner.rb +31 -0
- data/lib/rubyrep/syncers/syncers.rb +112 -0
- data/lib/rubyrep/syncers/two_way_syncer.rb +174 -0
- data/lib/rubyrep/table_scan.rb +54 -0
- data/lib/rubyrep/table_scan_helper.rb +38 -0
- data/lib/rubyrep/table_sorter.rb +70 -0
- data/lib/rubyrep/table_spec_resolver.rb +136 -0
- data/lib/rubyrep/table_sync.rb +68 -0
- data/lib/rubyrep/trigger_mode_switcher.rb +63 -0
- data/lib/rubyrep/type_casting_cursor.rb +31 -0
- data/lib/rubyrep/uninstall_runner.rb +92 -0
- data/lib/rubyrep/version.rb +9 -0
- data/lib/rubyrep.rb +68 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/sims/performance/big_rep_spec.rb +100 -0
- data/sims/performance/big_scan_spec.rb +57 -0
- data/sims/performance/big_sync_spec.rb +141 -0
- data/sims/performance/performance.rake +228 -0
- data/sims/sim_helper.rb +24 -0
- data/spec/base_runner_spec.rb +218 -0
- data/spec/buffered_committer_spec.rb +271 -0
- data/spec/command_runner_spec.rb +145 -0
- data/spec/committers_spec.rb +174 -0
- data/spec/configuration_spec.rb +198 -0
- data/spec/connection_extender_interface_spec.rb +138 -0
- data/spec/connection_extenders_registration_spec.rb +129 -0
- data/spec/database_proxy_spec.rb +48 -0
- data/spec/database_rake_spec.rb +40 -0
- data/spec/db_specific_connection_extenders_spec.rb +34 -0
- data/spec/db_specific_replication_extenders_spec.rb +38 -0
- data/spec/direct_table_scan_spec.rb +61 -0
- data/spec/generate_runner_spec.rb +84 -0
- data/spec/initializer_spec.rb +46 -0
- data/spec/logged_change_spec.rb +480 -0
- data/spec/postgresql_replication_spec.rb +48 -0
- data/spec/postgresql_support_spec.rb +57 -0
- data/spec/progress_bar_spec.rb +77 -0
- data/spec/proxied_table_scan_spec.rb +151 -0
- data/spec/proxy_block_cursor_spec.rb +197 -0
- data/spec/proxy_connection_spec.rb +399 -0
- data/spec/proxy_cursor_spec.rb +56 -0
- data/spec/proxy_row_cursor_spec.rb +66 -0
- data/spec/proxy_runner_spec.rb +70 -0
- data/spec/replication_difference_spec.rb +160 -0
- data/spec/replication_extender_interface_spec.rb +365 -0
- data/spec/replication_extenders_spec.rb +32 -0
- data/spec/replication_helper_spec.rb +121 -0
- data/spec/replication_initializer_spec.rb +477 -0
- data/spec/replication_run_spec.rb +166 -0
- data/spec/replication_runner_spec.rb +213 -0
- data/spec/replicators_spec.rb +31 -0
- data/spec/rubyrep_spec.rb +8 -0
- data/spec/scan_detail_reporter_spec.rb +119 -0
- data/spec/scan_progress_printers_spec.rb +68 -0
- data/spec/scan_report_printers_spec.rb +67 -0
- data/spec/scan_runner_spec.rb +50 -0
- data/spec/scan_summary_reporter_spec.rb +61 -0
- data/spec/session_spec.rb +212 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +295 -0
- data/spec/sync_helper_spec.rb +157 -0
- data/spec/sync_runner_spec.rb +78 -0
- data/spec/syncers_spec.rb +171 -0
- data/spec/table_scan_helper_spec.rb +29 -0
- data/spec/table_scan_spec.rb +49 -0
- data/spec/table_sorter_spec.rb +31 -0
- data/spec/table_spec_resolver_spec.rb +102 -0
- data/spec/table_sync_spec.rb +84 -0
- data/spec/trigger_mode_switcher_spec.rb +83 -0
- data/spec/two_way_replicator_spec.rb +551 -0
- data/spec/two_way_syncer_spec.rb +256 -0
- data/spec/type_casting_cursor_spec.rb +50 -0
- data/spec/uninstall_runner_spec.rb +86 -0
- data/tasks/database.rake +439 -0
- data/tasks/deployment.rake +29 -0
- data/tasks/environment.rake +9 -0
- data/tasks/java.rake +37 -0
- data/tasks/redmine_test.rake +47 -0
- data/tasks/rspec.rake +68 -0
- data/tasks/rubyrep.tailor +18 -0
- data/tasks/stats.rake +19 -0
- data/tasks/task_helper.rb +20 -0
- data.tar.gz.sig +0 -0
- metadata +243 -0
- metadata.gz.sig +0 -0
@@ -0,0 +1,326 @@
|
|
1
|
+
module RR
|
2
|
+
|
3
|
+
class Session
|
4
|
+
|
5
|
+
# Returns the +LoggedChangeLoader+ of the specified database.
|
6
|
+
# * database: either :+left+ or :+right+
|
7
|
+
def change_loader(database)
|
8
|
+
@change_loaders ||= {}
|
9
|
+
unless change_loader = @change_loaders[database]
|
10
|
+
change_loader = @change_loaders[database] = LoggedChangeLoader.new(self, database)
|
11
|
+
end
|
12
|
+
change_loader
|
13
|
+
end
|
14
|
+
|
15
|
+
# Forces an update of the change log cache
|
16
|
+
def reload_changes
|
17
|
+
change_loader(:left).update :forced => true
|
18
|
+
change_loader(:right).update :forced => true
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
# Caches the entries in the change log table
|
24
|
+
class LoggedChangeLoader
|
25
|
+
|
26
|
+
# The current +Session+.
|
27
|
+
attr_accessor :session
|
28
|
+
|
29
|
+
# The current +ProxyConnection+.
|
30
|
+
attr_accessor :connection
|
31
|
+
|
32
|
+
# Index to the next unprocessed change in the +change_array+.
|
33
|
+
attr_accessor :current_index
|
34
|
+
|
35
|
+
# ID of the last cached change log record.
|
36
|
+
attr_accessor :current_id
|
37
|
+
|
38
|
+
# Array with all cached changes.
|
39
|
+
# Processed change log records are replaced with +nil+.
|
40
|
+
attr_accessor :change_array
|
41
|
+
|
42
|
+
# Tree (hash) structure for fast access to all cached changes.
|
43
|
+
# First level of tree:
|
44
|
+
# * key: table name
|
45
|
+
# * value: 2nd level tree
|
46
|
+
# 2nd level tree:
|
47
|
+
# * key: the change_key value of the according change log records.
|
48
|
+
# * value:
|
49
|
+
# The according change log record (column_name => value hash).
|
50
|
+
# Additional entry of each change log hash:
|
51
|
+
# * key: 'array_index'
|
52
|
+
# * value: index to the change log record in +change_array+
|
53
|
+
attr_accessor :change_tree
|
54
|
+
|
55
|
+
# Date of last update of the cache
|
56
|
+
attr_accessor :last_updated
|
57
|
+
|
58
|
+
# Initializes / resets the cache.
|
59
|
+
def init_cache
|
60
|
+
self.change_tree = {}
|
61
|
+
self.change_array = []
|
62
|
+
self.current_index = 0
|
63
|
+
end
|
64
|
+
private :init_cache
|
65
|
+
|
66
|
+
# Create a new change log record cache.
|
67
|
+
# * +session+: The current +Session+
|
68
|
+
# * +database+: Either :+left+ or :+right+
|
69
|
+
def initialize(session, database)
|
70
|
+
self.session = session
|
71
|
+
self.connection = session.send(database)
|
72
|
+
|
73
|
+
init_cache
|
74
|
+
self.current_id = -1
|
75
|
+
self.last_updated = 1.year.ago
|
76
|
+
end
|
77
|
+
|
78
|
+
# Updates the cache.
|
79
|
+
# Options is a hash determining when the update is actually executed:
|
80
|
+
# * :+expire_time+: cache is older than the given number of seconds
|
81
|
+
# * :+forced+: if +true+ update the cache even if not yet expired
|
82
|
+
def update(options = {:forced => false, :expire_time => 1})
|
83
|
+
return unless options[:forced] or Time.now - self.last_updated >= options[:expire_time]
|
84
|
+
|
85
|
+
self.last_updated = Time.now
|
86
|
+
|
87
|
+
cursor = connection.select_cursor(
|
88
|
+
:table => change_log_table,
|
89
|
+
:from => {'id' => current_id},
|
90
|
+
:exclude_starting_row => true,
|
91
|
+
:type_cast => true
|
92
|
+
)
|
93
|
+
while cursor.next?
|
94
|
+
change = cursor.next_row
|
95
|
+
self.current_id = change['id']
|
96
|
+
self.change_array << change
|
97
|
+
change['array_index'] = self.change_array.size - 1
|
98
|
+
|
99
|
+
table_change_tree = change_tree[change['change_table']] ||= {}
|
100
|
+
key_changes = table_change_tree[change['change_key']] ||= []
|
101
|
+
key_changes << change
|
102
|
+
end
|
103
|
+
cursor.clear
|
104
|
+
end
|
105
|
+
|
106
|
+
# Returns the creation time of the oldest unprocessed change log record.
|
107
|
+
def oldest_change_time
|
108
|
+
change = oldest_change
|
109
|
+
change['change_time'] if change
|
110
|
+
end
|
111
|
+
|
112
|
+
# Returns the oldest unprocessed change log record (column_name => value hash).
|
113
|
+
def oldest_change
|
114
|
+
update
|
115
|
+
oldest_change = nil
|
116
|
+
unless change_array.empty?
|
117
|
+
while (oldest_change = change_array[self.current_index]) == nil
|
118
|
+
self.current_index += 1
|
119
|
+
end
|
120
|
+
end
|
121
|
+
oldest_change
|
122
|
+
end
|
123
|
+
|
124
|
+
# Returns the specified change log record (column_name => value hash).
|
125
|
+
# * +change_table+: the name of the table that was changed
|
126
|
+
# * +change_key+: the change key of the modified record
|
127
|
+
def load(change_table, change_key)
|
128
|
+
update
|
129
|
+
change = nil
|
130
|
+
table_change_tree = change_tree[change_table]
|
131
|
+
if table_change_tree
|
132
|
+
key_changes = table_change_tree[change_key]
|
133
|
+
if key_changes
|
134
|
+
# get change object and delete from key_changes
|
135
|
+
change = key_changes.shift
|
136
|
+
|
137
|
+
# delete change from change_array
|
138
|
+
change_array[change['array_index']] = nil
|
139
|
+
|
140
|
+
# delete change from database
|
141
|
+
connection.execute "delete from #{change_log_table} where id = #{change['id']}"
|
142
|
+
|
143
|
+
# delete key_changes if empty
|
144
|
+
if key_changes.empty?
|
145
|
+
table_change_tree.delete change_key
|
146
|
+
end
|
147
|
+
|
148
|
+
# delete table_change_tree if empty
|
149
|
+
if table_change_tree.empty?
|
150
|
+
change_tree.delete change_table
|
151
|
+
end
|
152
|
+
|
153
|
+
# reset everything if no more changes remain
|
154
|
+
if change_tree.empty?
|
155
|
+
init_cache
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
change
|
160
|
+
end
|
161
|
+
|
162
|
+
# Returns the name of the change log table
|
163
|
+
def change_log_table
|
164
|
+
@change_log_table ||= "#{session.configuration.options[:rep_prefix]}_pending_changes"
|
165
|
+
end
|
166
|
+
private :change_log_table
|
167
|
+
end
|
168
|
+
|
169
|
+
# Describes a single logged record change.
|
170
|
+
#
|
171
|
+
# Note:
|
172
|
+
# The change loading functionality depends on the current database session
|
173
|
+
# being executed in an open database transaction.
|
174
|
+
# Also at the end of change processing the transaction must be committed.
|
175
|
+
class LoggedChange
|
176
|
+
|
177
|
+
# The current Session
|
178
|
+
attr_accessor :session
|
179
|
+
|
180
|
+
# The database which was changed. Either :+left+ or :+right+.
|
181
|
+
attr_accessor :database
|
182
|
+
|
183
|
+
# The name of the changed table
|
184
|
+
attr_accessor :table
|
185
|
+
|
186
|
+
# When the first change to the record happened
|
187
|
+
attr_accessor :first_changed_at
|
188
|
+
|
189
|
+
# When the last change to the record happened
|
190
|
+
attr_accessor :last_changed_at
|
191
|
+
|
192
|
+
# Type of the change. Either :+insert+, :+update+ or :+delete+.
|
193
|
+
attr_accessor :type
|
194
|
+
|
195
|
+
# A column_name => value hash identifying the changed record
|
196
|
+
attr_accessor :key
|
197
|
+
|
198
|
+
# Only used for updates: a column_name => value hash of the original primary
|
199
|
+
# key of the updated record
|
200
|
+
attr_accessor :new_key
|
201
|
+
|
202
|
+
# Creates a new LoggedChange instance.
|
203
|
+
# * +session+: the current Session
|
204
|
+
# * +database+: either :+left+ or :+right+
|
205
|
+
def initialize(session, database)
|
206
|
+
self.session = session
|
207
|
+
self.database = database
|
208
|
+
self.type = :no_change
|
209
|
+
end
|
210
|
+
|
211
|
+
# A hash describing how the change state morph based on newly found change
|
212
|
+
# records.
|
213
|
+
# * key: String consisting of 2 letters
|
214
|
+
# * first letter: describes current type change (nothing, insert, update, delete)
|
215
|
+
# * second letter: the new change type as read of the change log table
|
216
|
+
# * value:
|
217
|
+
# The resulting change type.
|
218
|
+
# [1]: such cases shouldn't happen. but just in case, choose the most
|
219
|
+
# sensible solution.
|
220
|
+
TYPE_CHANGES = {
|
221
|
+
'NI' => 'I',
|
222
|
+
'NU' => 'U',
|
223
|
+
'ND' => 'D',
|
224
|
+
'II' => 'I', # [1]
|
225
|
+
'IU' => 'I',
|
226
|
+
'ID' => 'N',
|
227
|
+
'UI' => 'U', # [1]
|
228
|
+
'UU' => 'U',
|
229
|
+
'UD' => 'D',
|
230
|
+
'DI' => 'U',
|
231
|
+
'DU' => 'U', # [1]
|
232
|
+
'DD' => 'D', # [1]
|
233
|
+
}
|
234
|
+
|
235
|
+
# A hash translating the short 1-letter types to the according symbols
|
236
|
+
SHORT_TYPES = {
|
237
|
+
'I' => :insert,
|
238
|
+
'U' => :update,
|
239
|
+
'D' => :delete,
|
240
|
+
'N' => :no_change
|
241
|
+
}
|
242
|
+
# A hash translating the symbold types to according 1 letter types
|
243
|
+
LONG_TYPES = SHORT_TYPES.invert
|
244
|
+
|
245
|
+
# Returns the configured key separator
|
246
|
+
def key_sep
|
247
|
+
@key_sep ||= session.configuration.options[:key_sep]
|
248
|
+
end
|
249
|
+
|
250
|
+
# Returns a column_name => value hash based on the provided +raw_key+ string
|
251
|
+
# (which is a string in the format as read directly from the change log table).
|
252
|
+
def key_to_hash(raw_key)
|
253
|
+
result = {}
|
254
|
+
#raw_key.split(key_sep).each_slice(2) {|a| result[a[0]] = a[1]}
|
255
|
+
raw_key.split(key_sep).each_slice(2) {|field_name, value| result[field_name] = value}
|
256
|
+
result
|
257
|
+
end
|
258
|
+
|
259
|
+
# Loads the change as per #table and #key. Works if the LoggedChange instance
|
260
|
+
# is totally new or was already loaded before.
|
261
|
+
def load
|
262
|
+
current_type = LONG_TYPES[type]
|
263
|
+
|
264
|
+
org_key = new_key || key
|
265
|
+
# change to key string as can be found in change log table
|
266
|
+
org_key = session.send(database).primary_key_names(table).map do |key_name|
|
267
|
+
"#{key_name}#{key_sep}#{org_key[key_name]}"
|
268
|
+
end.join(key_sep)
|
269
|
+
current_key = org_key
|
270
|
+
|
271
|
+
while change = session.change_loader(database).load(table, current_key)
|
272
|
+
|
273
|
+
new_type = change['change_type']
|
274
|
+
current_type = TYPE_CHANGES["#{current_type}#{new_type}"]
|
275
|
+
|
276
|
+
self.first_changed_at ||= change['change_time']
|
277
|
+
self.last_changed_at = change['change_time']
|
278
|
+
|
279
|
+
if change['change_type'] == 'U' and change['change_new_key'] != current_key
|
280
|
+
current_key = change['change_new_key']
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
self.type = SHORT_TYPES[current_type]
|
285
|
+
self.new_key = nil
|
286
|
+
if type == :update
|
287
|
+
self.key ||= key_to_hash(org_key)
|
288
|
+
self.new_key = key_to_hash(current_key)
|
289
|
+
else
|
290
|
+
self.key = key_to_hash(current_key)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
# Loads the change with the specified key for the named +table+.
|
295
|
+
# * +table+: name of the table
|
296
|
+
# * +key+: a column_name => value hash for all primary key columns of the table
|
297
|
+
def load_specified(table, key)
|
298
|
+
self.table = table
|
299
|
+
self.key = key
|
300
|
+
load
|
301
|
+
end
|
302
|
+
|
303
|
+
# Returns the time of the oldest change. Returns +nil+ if there are no
|
304
|
+
# changes left.
|
305
|
+
def oldest_change_time
|
306
|
+
session.change_loader(database).oldest_change_time
|
307
|
+
end
|
308
|
+
|
309
|
+
# Loads the oldest available change
|
310
|
+
def load_oldest
|
311
|
+
begin
|
312
|
+
change = session.change_loader(database).oldest_change
|
313
|
+
break unless change
|
314
|
+
self.key = key_to_hash(change['change_key'])
|
315
|
+
self.table = change['change_table']
|
316
|
+
load
|
317
|
+
end until type != :no_change
|
318
|
+
end
|
319
|
+
|
320
|
+
# Prevents session from going into YAML output
|
321
|
+
def to_yaml_properties
|
322
|
+
instance_variables.sort.reject {|var_name| var_name == '@session'}
|
323
|
+
end
|
324
|
+
|
325
|
+
end
|
326
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
module RR
|
2
|
+
|
3
|
+
# Scans two tables for differences. Goes through a RubyRep Proxy to minimize network load.
|
4
|
+
# Doesn't have any reporting functionality by itself.
|
5
|
+
# Instead ProxiedTableScan#run yields all the differences for the caller to do with as it pleases.
|
6
|
+
# Usage:
|
7
|
+
# 1. Create a new ProxiedTableScan object and hand it all necessary information
|
8
|
+
# 2. Call ProxiedTableScan#run to do the actual comparison
|
9
|
+
# 3. The block handed to ProxiedTableScan#run receives all differences
|
10
|
+
class ProxiedTableScan < TableScan
|
11
|
+
|
12
|
+
# returns block size to use for table scanning
|
13
|
+
def block_size
|
14
|
+
@block_size ||= session.configuration.options_for_table(left_table)[:proxy_block_size]
|
15
|
+
end
|
16
|
+
|
17
|
+
# Creates a new ProxiedTableScan instance
|
18
|
+
# * session: a Session object representing the current database session
|
19
|
+
# * left_table: name of the table in the left database
|
20
|
+
# * right_table: name of the table in the right database. If not given, same like left_table
|
21
|
+
def initialize(session, left_table, right_table = nil)
|
22
|
+
raise "#{self.class.name} only works with proxied sessions" unless session.proxied?
|
23
|
+
|
24
|
+
super
|
25
|
+
end
|
26
|
+
|
27
|
+
# Compares the specified left and right rows.
|
28
|
+
# +left_cursor+ and +right_cursor+ represent the according ProxyBlockCursor objects.
|
29
|
+
# Yields all identified differences with
|
30
|
+
# * diff_type
|
31
|
+
# * row
|
32
|
+
# #run described the yield parameters in detail.
|
33
|
+
def compare_blocks(left_block_cursor, right_block_cursor)
|
34
|
+
left_cursor = right_cursor = nil
|
35
|
+
|
36
|
+
left_row_checksums = left_block_cursor.row_checksums
|
37
|
+
right_row_checksums = right_block_cursor.row_checksums
|
38
|
+
|
39
|
+
# phase 1: identify the different rows and store their primary keys
|
40
|
+
left_diff_rows = []
|
41
|
+
left_diff_checksums = []
|
42
|
+
right_diff_rows = []
|
43
|
+
right_diff_checksums = []
|
44
|
+
i = k = 0
|
45
|
+
while i < left_row_checksums.size or k < right_row_checksums.size
|
46
|
+
left_keys = i < left_row_checksums.size ? left_row_checksums[i][:row_keys] : nil
|
47
|
+
right_keys = k < right_row_checksums.size ? right_row_checksums[k][:row_keys] : nil
|
48
|
+
rank = rank_rows left_keys, right_keys
|
49
|
+
case rank
|
50
|
+
when -1
|
51
|
+
left_diff_rows << left_keys
|
52
|
+
left_diff_checksums << left_row_checksums[i][:checksum]
|
53
|
+
i += 1
|
54
|
+
when 1
|
55
|
+
right_diff_rows << right_keys
|
56
|
+
right_diff_checksums << right_row_checksums[k][:checksum]
|
57
|
+
k += 1
|
58
|
+
when 0
|
59
|
+
if left_row_checksums[i][:checksum] != right_row_checksums[k][:checksum]
|
60
|
+
left_diff_rows << left_keys
|
61
|
+
left_diff_checksums << left_row_checksums[i][:checksum]
|
62
|
+
right_diff_rows << right_keys
|
63
|
+
right_diff_checksums << right_row_checksums[k][:checksum]
|
64
|
+
end
|
65
|
+
i += 1
|
66
|
+
k += 1
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# retrieve possibly existing cached rows from the block cursors
|
71
|
+
left_row_cache = left_block_cursor.retrieve_row_cache left_diff_checksums
|
72
|
+
right_row_cache = right_block_cursor.retrieve_row_cache right_diff_checksums
|
73
|
+
|
74
|
+
# builds arrays of row keys for rows that were not included in the hash
|
75
|
+
left_uncached_rows = []
|
76
|
+
left_diff_rows.each_with_index do |row, i|
|
77
|
+
left_uncached_rows << row unless left_row_cache[left_diff_checksums[i]]
|
78
|
+
end
|
79
|
+
right_uncached_rows = []
|
80
|
+
right_diff_rows.each_with_index do |row, i|
|
81
|
+
right_uncached_rows << row unless right_row_cache[right_diff_checksums[i]]
|
82
|
+
end
|
83
|
+
|
84
|
+
# phase 2: read all different rows and yield them
|
85
|
+
unless left_uncached_rows.empty?
|
86
|
+
left_cursor = session.left.create_cursor \
|
87
|
+
ProxyRowCursor, left_table, :row_keys => left_uncached_rows
|
88
|
+
end
|
89
|
+
unless right_uncached_rows.empty?
|
90
|
+
right_cursor = session.right.create_cursor \
|
91
|
+
ProxyRowCursor, right_table, :row_keys => right_uncached_rows
|
92
|
+
end
|
93
|
+
i = k = 0
|
94
|
+
while i < left_diff_rows.size or k < right_diff_rows.size
|
95
|
+
rank = rank_rows left_diff_rows[i], right_diff_rows[k]
|
96
|
+
case rank
|
97
|
+
when -1
|
98
|
+
if left_row_cache.include? left_diff_checksums[i]
|
99
|
+
row = Marshal.load(left_row_cache[left_diff_checksums[i]])
|
100
|
+
else
|
101
|
+
row = left_cursor.next_row
|
102
|
+
end
|
103
|
+
yield :left, row
|
104
|
+
i += 1
|
105
|
+
when 1
|
106
|
+
if right_row_cache.include? right_diff_checksums[k]
|
107
|
+
row = Marshal.load(right_row_cache[right_diff_checksums[k]])
|
108
|
+
else
|
109
|
+
row = right_cursor.next_row
|
110
|
+
end
|
111
|
+
yield :right, row
|
112
|
+
k += 1
|
113
|
+
when 0
|
114
|
+
if left_row_cache.include? left_diff_checksums[i]
|
115
|
+
left_row = Marshal.load(left_row_cache[left_diff_checksums[i]])
|
116
|
+
else
|
117
|
+
left_row = left_cursor.next_row
|
118
|
+
end
|
119
|
+
if right_row_cache.include? right_diff_checksums[k]
|
120
|
+
right_row = Marshal.load(right_row_cache[right_diff_checksums[k]])
|
121
|
+
else
|
122
|
+
row = right_cursor.next_row
|
123
|
+
end
|
124
|
+
yield :conflict, [left_row, right_row]
|
125
|
+
i += 1
|
126
|
+
k += 1
|
127
|
+
end
|
128
|
+
end
|
129
|
+
ensure
|
130
|
+
session.left.destroy_cursor left_cursor if left_cursor
|
131
|
+
session.right.destroy_cursor right_cursor if right_cursor
|
132
|
+
end
|
133
|
+
|
134
|
+
# Runs the table scan.
|
135
|
+
# Calls the block for every found difference.
|
136
|
+
# Differences are yielded with 2 parameters
|
137
|
+
# * type: describes the difference, either :left (row only in left table), :right (row only in right table) or :conflict
|
138
|
+
# * row: for :left or :right cases a hash describing the row; for :conflict an array of left and right row
|
139
|
+
def run(&blck)
|
140
|
+
left_cursor = right_cursor = nil
|
141
|
+
left_cursor = session.left.create_cursor ProxyBlockCursor, left_table,
|
142
|
+
:row_buffer_size => scan_options[:row_buffer_size]
|
143
|
+
right_cursor = session.right.create_cursor ProxyBlockCursor, right_table,
|
144
|
+
:row_buffer_size => scan_options[:row_buffer_size]
|
145
|
+
update_progress 0 # ensures progress bar is printed even if there are no records
|
146
|
+
while left_cursor.next?
|
147
|
+
left_to, left_checksum, left_progress =
|
148
|
+
left_cursor.checksum :proxy_block_size => block_size
|
149
|
+
_ , right_checksum, right_progress =
|
150
|
+
right_cursor.checksum :max_row => left_to
|
151
|
+
combined_progress = left_progress + right_progress
|
152
|
+
if left_checksum != right_checksum
|
153
|
+
compare_blocks left_cursor, right_cursor do |type, row|
|
154
|
+
steps = type == :conflict ? 2 : 1
|
155
|
+
update_progress steps
|
156
|
+
combined_progress -= steps
|
157
|
+
yield type, row
|
158
|
+
end
|
159
|
+
end
|
160
|
+
update_progress combined_progress
|
161
|
+
end
|
162
|
+
while right_cursor.next?
|
163
|
+
update_progress 1
|
164
|
+
yield :right, right_cursor.next_row
|
165
|
+
end
|
166
|
+
ensure
|
167
|
+
session.left.destroy_cursor left_cursor if left_cursor
|
168
|
+
session.right.destroy_cursor right_cursor if right_cursor
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + '/..'
|
2
|
+
|
3
|
+
require 'digest/sha1'
|
4
|
+
|
5
|
+
require 'rubyrep'
|
6
|
+
|
7
|
+
module RR
|
8
|
+
|
9
|
+
# This class is used to scan a table in blocks.
|
10
|
+
# Calculates the checksums of the scanned blocks.
|
11
|
+
class ProxyBlockCursor < ProxyCursor
|
12
|
+
|
13
|
+
include TableScanHelper
|
14
|
+
|
15
|
+
# The current Digest
|
16
|
+
attr_accessor :digest
|
17
|
+
|
18
|
+
# nil if the last run of the checksum method left no unprocessed row.
|
19
|
+
# Otherwise the left over row of that checksum run
|
20
|
+
attr_accessor :last_row
|
21
|
+
|
22
|
+
# Returns an array of checksums for each encounters row.
|
23
|
+
# Each array element is a Hash with the following elements:
|
24
|
+
# * +:row_keys+: A primary key => value hash identifying the row
|
25
|
+
# * +:checksum+: the checksum for this row
|
26
|
+
attr_accessor :row_checksums
|
27
|
+
|
28
|
+
# The maximum total size (in bytes) up to which rows will be cached
|
29
|
+
attr_accessor :max_row_cache_size
|
30
|
+
|
31
|
+
# A byte counter of many bytes of row data have already been cached
|
32
|
+
attr_accessor :current_row_cache_size
|
33
|
+
|
34
|
+
# A hash of cached rows consisting of row checksum => row dump pairs.
|
35
|
+
attr_accessor :row_cache
|
36
|
+
|
37
|
+
# Creates a new cursor
|
38
|
+
# * session: the current proxy session
|
39
|
+
# * table: table_name
|
40
|
+
def initialize(session, table)
|
41
|
+
self.max_row_cache_size = 1000000 # this size should be sufficient as long as table doesn't contain blobs
|
42
|
+
super
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns true if the current cursor has unprocessed rows
|
46
|
+
def next?
|
47
|
+
last_row != nil or cursor.next?
|
48
|
+
end
|
49
|
+
|
50
|
+
# Returns the cursor's next row
|
51
|
+
def next_row
|
52
|
+
if self.last_row
|
53
|
+
row, self.last_row = self.last_row, nil
|
54
|
+
else
|
55
|
+
row = cursor.next_row
|
56
|
+
end
|
57
|
+
row
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns a hash of row checksum => row dump pairs for the +checksums+
|
61
|
+
# in the provided array
|
62
|
+
def retrieve_row_cache(checksums)
|
63
|
+
row_dumps = {}
|
64
|
+
checksums.each do |checksum|
|
65
|
+
row_dumps[checksum] = row_cache[checksum] if row_cache.include? checksum
|
66
|
+
end
|
67
|
+
row_dumps
|
68
|
+
end
|
69
|
+
|
70
|
+
# Updates block / row checksums and row cache with the given +row+.
|
71
|
+
def update_checksum(row)
|
72
|
+
dump = Marshal.dump(row)
|
73
|
+
|
74
|
+
# updates row checksum array
|
75
|
+
row_keys = row.reject {|key, | not primary_key_names.include? key}
|
76
|
+
checksum = Digest::SHA1.hexdigest(dump)
|
77
|
+
self.row_checksums << {:row_keys => row_keys, :checksum => checksum}
|
78
|
+
|
79
|
+
# update the row cache (unless maximum cache size limit has already been reached)
|
80
|
+
if current_row_cache_size + dump.size < max_row_cache_size
|
81
|
+
self.current_row_cache_size += dump.size
|
82
|
+
row_cache[checksum] = dump
|
83
|
+
end
|
84
|
+
|
85
|
+
# update current total checksum
|
86
|
+
self.digest << dump
|
87
|
+
end
|
88
|
+
|
89
|
+
# Reinitializes the row checksum array and the total checksum
|
90
|
+
def reset_checksum
|
91
|
+
self.row_checksums = []
|
92
|
+
self.current_row_cache_size = 0
|
93
|
+
self.row_cache = {}
|
94
|
+
self.digest = Digest::SHA1.new
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns the current checksum
|
98
|
+
def current_checksum
|
99
|
+
self.digest.hexdigest
|
100
|
+
end
|
101
|
+
|
102
|
+
# Calculates the checksum from the current row up to the row specified by options.
|
103
|
+
# options is a hash including either
|
104
|
+
# * :+proxy_block_size+: The number of rows to scan.
|
105
|
+
# * :+max_row+: A row hash of primary key columns specifying the maximum record to scan.
|
106
|
+
# Returns multiple parameters:
|
107
|
+
# * last row read
|
108
|
+
# * checksum
|
109
|
+
# * number of processed records
|
110
|
+
def checksum(options = {})
|
111
|
+
reset_checksum
|
112
|
+
return_row = row = nil
|
113
|
+
row_count = 0
|
114
|
+
|
115
|
+
if options.include? :proxy_block_size
|
116
|
+
block_size = options[:proxy_block_size]
|
117
|
+
raise ":proxy_block_size must be greater than 0" unless block_size > 0
|
118
|
+
while row_count < block_size and next?
|
119
|
+
row = next_row
|
120
|
+
update_checksum(row)
|
121
|
+
row_count += 1
|
122
|
+
end
|
123
|
+
return_row = row
|
124
|
+
elsif options.include? :max_row
|
125
|
+
max_row = options[:max_row]
|
126
|
+
while next?
|
127
|
+
row = next_row
|
128
|
+
rank = rank_rows row, max_row
|
129
|
+
if rank > 0
|
130
|
+
# row > max_row ==> save the current row and break off
|
131
|
+
self.last_row = row
|
132
|
+
break
|
133
|
+
end
|
134
|
+
row_count += 1
|
135
|
+
update_checksum(row)
|
136
|
+
return_row, row = row, nil
|
137
|
+
end
|
138
|
+
else
|
139
|
+
raise "options must include either :proxy_block_size or :max_row"
|
140
|
+
end
|
141
|
+
return_keys = return_row.reject {|key, | not primary_key_names.include? key} if return_row
|
142
|
+
return return_keys, current_checksum, row_count
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|