rubyrep 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +137 -0
- data/README.txt +37 -0
- data/Rakefile +30 -0
- data/bin/rubyrep +8 -0
- data/config/hoe.rb +72 -0
- data/config/mysql_config.rb +25 -0
- data/config/postgres_config.rb +21 -0
- data/config/proxied_test_config.rb +14 -0
- data/config/redmine_config.rb +17 -0
- data/config/rep_config.rb +20 -0
- data/config/requirements.rb +32 -0
- data/config/test_config.rb +20 -0
- data/lib/rubyrep/base_runner.rb +195 -0
- data/lib/rubyrep/command_runner.rb +144 -0
- data/lib/rubyrep/committers/buffered_committer.rb +140 -0
- data/lib/rubyrep/committers/committers.rb +146 -0
- data/lib/rubyrep/configuration.rb +240 -0
- data/lib/rubyrep/connection_extenders/connection_extenders.rb +133 -0
- data/lib/rubyrep/connection_extenders/jdbc_extender.rb +284 -0
- data/lib/rubyrep/connection_extenders/mysql_extender.rb +168 -0
- data/lib/rubyrep/connection_extenders/postgresql_extender.rb +261 -0
- data/lib/rubyrep/database_proxy.rb +52 -0
- data/lib/rubyrep/direct_table_scan.rb +75 -0
- data/lib/rubyrep/generate_runner.rb +105 -0
- data/lib/rubyrep/initializer.rb +39 -0
- data/lib/rubyrep/logged_change.rb +326 -0
- data/lib/rubyrep/proxied_table_scan.rb +171 -0
- data/lib/rubyrep/proxy_block_cursor.rb +145 -0
- data/lib/rubyrep/proxy_connection.rb +318 -0
- data/lib/rubyrep/proxy_cursor.rb +44 -0
- data/lib/rubyrep/proxy_row_cursor.rb +43 -0
- data/lib/rubyrep/proxy_runner.rb +89 -0
- data/lib/rubyrep/replication_difference.rb +91 -0
- data/lib/rubyrep/replication_extenders/mysql_replication.rb +271 -0
- data/lib/rubyrep/replication_extenders/postgresql_replication.rb +204 -0
- data/lib/rubyrep/replication_extenders/replication_extenders.rb +26 -0
- data/lib/rubyrep/replication_helper.rb +104 -0
- data/lib/rubyrep/replication_initializer.rb +307 -0
- data/lib/rubyrep/replication_run.rb +48 -0
- data/lib/rubyrep/replication_runner.rb +138 -0
- data/lib/rubyrep/replicators/replicators.rb +37 -0
- data/lib/rubyrep/replicators/two_way_replicator.rb +334 -0
- data/lib/rubyrep/scan_progress_printers/progress_bar.rb +65 -0
- data/lib/rubyrep/scan_progress_printers/scan_progress_printers.rb +65 -0
- data/lib/rubyrep/scan_report_printers/scan_detail_reporter.rb +111 -0
- data/lib/rubyrep/scan_report_printers/scan_report_printers.rb +67 -0
- data/lib/rubyrep/scan_report_printers/scan_summary_reporter.rb +75 -0
- data/lib/rubyrep/scan_runner.rb +25 -0
- data/lib/rubyrep/session.rb +177 -0
- data/lib/rubyrep/sync_helper.rb +111 -0
- data/lib/rubyrep/sync_runner.rb +31 -0
- data/lib/rubyrep/syncers/syncers.rb +112 -0
- data/lib/rubyrep/syncers/two_way_syncer.rb +174 -0
- data/lib/rubyrep/table_scan.rb +54 -0
- data/lib/rubyrep/table_scan_helper.rb +38 -0
- data/lib/rubyrep/table_sorter.rb +70 -0
- data/lib/rubyrep/table_spec_resolver.rb +136 -0
- data/lib/rubyrep/table_sync.rb +68 -0
- data/lib/rubyrep/trigger_mode_switcher.rb +63 -0
- data/lib/rubyrep/type_casting_cursor.rb +31 -0
- data/lib/rubyrep/uninstall_runner.rb +92 -0
- data/lib/rubyrep/version.rb +9 -0
- data/lib/rubyrep.rb +68 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/sims/performance/big_rep_spec.rb +100 -0
- data/sims/performance/big_scan_spec.rb +57 -0
- data/sims/performance/big_sync_spec.rb +141 -0
- data/sims/performance/performance.rake +228 -0
- data/sims/sim_helper.rb +24 -0
- data/spec/base_runner_spec.rb +218 -0
- data/spec/buffered_committer_spec.rb +271 -0
- data/spec/command_runner_spec.rb +145 -0
- data/spec/committers_spec.rb +174 -0
- data/spec/configuration_spec.rb +198 -0
- data/spec/connection_extender_interface_spec.rb +138 -0
- data/spec/connection_extenders_registration_spec.rb +129 -0
- data/spec/database_proxy_spec.rb +48 -0
- data/spec/database_rake_spec.rb +40 -0
- data/spec/db_specific_connection_extenders_spec.rb +34 -0
- data/spec/db_specific_replication_extenders_spec.rb +38 -0
- data/spec/direct_table_scan_spec.rb +61 -0
- data/spec/generate_runner_spec.rb +84 -0
- data/spec/initializer_spec.rb +46 -0
- data/spec/logged_change_spec.rb +480 -0
- data/spec/postgresql_replication_spec.rb +48 -0
- data/spec/postgresql_support_spec.rb +57 -0
- data/spec/progress_bar_spec.rb +77 -0
- data/spec/proxied_table_scan_spec.rb +151 -0
- data/spec/proxy_block_cursor_spec.rb +197 -0
- data/spec/proxy_connection_spec.rb +399 -0
- data/spec/proxy_cursor_spec.rb +56 -0
- data/spec/proxy_row_cursor_spec.rb +66 -0
- data/spec/proxy_runner_spec.rb +70 -0
- data/spec/replication_difference_spec.rb +160 -0
- data/spec/replication_extender_interface_spec.rb +365 -0
- data/spec/replication_extenders_spec.rb +32 -0
- data/spec/replication_helper_spec.rb +121 -0
- data/spec/replication_initializer_spec.rb +477 -0
- data/spec/replication_run_spec.rb +166 -0
- data/spec/replication_runner_spec.rb +213 -0
- data/spec/replicators_spec.rb +31 -0
- data/spec/rubyrep_spec.rb +8 -0
- data/spec/scan_detail_reporter_spec.rb +119 -0
- data/spec/scan_progress_printers_spec.rb +68 -0
- data/spec/scan_report_printers_spec.rb +67 -0
- data/spec/scan_runner_spec.rb +50 -0
- data/spec/scan_summary_reporter_spec.rb +61 -0
- data/spec/session_spec.rb +212 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +295 -0
- data/spec/sync_helper_spec.rb +157 -0
- data/spec/sync_runner_spec.rb +78 -0
- data/spec/syncers_spec.rb +171 -0
- data/spec/table_scan_helper_spec.rb +29 -0
- data/spec/table_scan_spec.rb +49 -0
- data/spec/table_sorter_spec.rb +31 -0
- data/spec/table_spec_resolver_spec.rb +102 -0
- data/spec/table_sync_spec.rb +84 -0
- data/spec/trigger_mode_switcher_spec.rb +83 -0
- data/spec/two_way_replicator_spec.rb +551 -0
- data/spec/two_way_syncer_spec.rb +256 -0
- data/spec/type_casting_cursor_spec.rb +50 -0
- data/spec/uninstall_runner_spec.rb +86 -0
- data/tasks/database.rake +439 -0
- data/tasks/deployment.rake +29 -0
- data/tasks/environment.rake +9 -0
- data/tasks/java.rake +37 -0
- data/tasks/redmine_test.rake +47 -0
- data/tasks/rspec.rake +68 -0
- data/tasks/rubyrep.tailor +18 -0
- data/tasks/stats.rake +19 -0
- data/tasks/task_helper.rb +20 -0
- data.tar.gz.sig +0 -0
- metadata +243 -0
- metadata.gz.sig +0 -0
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
module RR
|
|
2
|
+
|
|
3
|
+
class Session
|
|
4
|
+
|
|
5
|
+
# Returns the +LoggedChangeLoader+ of the specified database.
|
|
6
|
+
# * database: either :+left+ or :+right+
|
|
7
|
+
def change_loader(database)
|
|
8
|
+
@change_loaders ||= {}
|
|
9
|
+
unless change_loader = @change_loaders[database]
|
|
10
|
+
change_loader = @change_loaders[database] = LoggedChangeLoader.new(self, database)
|
|
11
|
+
end
|
|
12
|
+
change_loader
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Forces an update of the change log cache
|
|
16
|
+
def reload_changes
|
|
17
|
+
change_loader(:left).update :forced => true
|
|
18
|
+
change_loader(:right).update :forced => true
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Caches the entries in the change log table
|
|
24
|
+
class LoggedChangeLoader
|
|
25
|
+
|
|
26
|
+
# The current +Session+.
|
|
27
|
+
attr_accessor :session
|
|
28
|
+
|
|
29
|
+
# The current +ProxyConnection+.
|
|
30
|
+
attr_accessor :connection
|
|
31
|
+
|
|
32
|
+
# Index to the next unprocessed change in the +change_array+.
|
|
33
|
+
attr_accessor :current_index
|
|
34
|
+
|
|
35
|
+
# ID of the last cached change log record.
|
|
36
|
+
attr_accessor :current_id
|
|
37
|
+
|
|
38
|
+
# Array with all cached changes.
|
|
39
|
+
# Processed change log records are replaced with +nil+.
|
|
40
|
+
attr_accessor :change_array
|
|
41
|
+
|
|
42
|
+
# Tree (hash) structure for fast access to all cached changes.
|
|
43
|
+
# First level of tree:
|
|
44
|
+
# * key: table name
|
|
45
|
+
# * value: 2nd level tree
|
|
46
|
+
# 2nd level tree:
|
|
47
|
+
# * key: the change_key value of the according change log records.
|
|
48
|
+
# * value:
|
|
49
|
+
# The according change log record (column_name => value hash).
|
|
50
|
+
# Additional entry of each change log hash:
|
|
51
|
+
# * key: 'array_index'
|
|
52
|
+
# * value: index to the change log record in +change_array+
|
|
53
|
+
attr_accessor :change_tree
|
|
54
|
+
|
|
55
|
+
# Date of last update of the cache
|
|
56
|
+
attr_accessor :last_updated
|
|
57
|
+
|
|
58
|
+
# Initializes / resets the cache.
|
|
59
|
+
def init_cache
|
|
60
|
+
self.change_tree = {}
|
|
61
|
+
self.change_array = []
|
|
62
|
+
self.current_index = 0
|
|
63
|
+
end
|
|
64
|
+
private :init_cache
|
|
65
|
+
|
|
66
|
+
# Create a new change log record cache.
|
|
67
|
+
# * +session+: The current +Session+
|
|
68
|
+
# * +database+: Either :+left+ or :+right+
|
|
69
|
+
def initialize(session, database)
|
|
70
|
+
self.session = session
|
|
71
|
+
self.connection = session.send(database)
|
|
72
|
+
|
|
73
|
+
init_cache
|
|
74
|
+
self.current_id = -1
|
|
75
|
+
self.last_updated = 1.year.ago
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Updates the cache.
|
|
79
|
+
# Options is a hash determining when the update is actually executed:
|
|
80
|
+
# * :+expire_time+: cache is older than the given number of seconds
|
|
81
|
+
# * :+forced+: if +true+ update the cache even if not yet expired
|
|
82
|
+
def update(options = {:forced => false, :expire_time => 1})
|
|
83
|
+
return unless options[:forced] or Time.now - self.last_updated >= options[:expire_time]
|
|
84
|
+
|
|
85
|
+
self.last_updated = Time.now
|
|
86
|
+
|
|
87
|
+
cursor = connection.select_cursor(
|
|
88
|
+
:table => change_log_table,
|
|
89
|
+
:from => {'id' => current_id},
|
|
90
|
+
:exclude_starting_row => true,
|
|
91
|
+
:type_cast => true
|
|
92
|
+
)
|
|
93
|
+
while cursor.next?
|
|
94
|
+
change = cursor.next_row
|
|
95
|
+
self.current_id = change['id']
|
|
96
|
+
self.change_array << change
|
|
97
|
+
change['array_index'] = self.change_array.size - 1
|
|
98
|
+
|
|
99
|
+
table_change_tree = change_tree[change['change_table']] ||= {}
|
|
100
|
+
key_changes = table_change_tree[change['change_key']] ||= []
|
|
101
|
+
key_changes << change
|
|
102
|
+
end
|
|
103
|
+
cursor.clear
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Returns the creation time of the oldest unprocessed change log record.
|
|
107
|
+
def oldest_change_time
|
|
108
|
+
change = oldest_change
|
|
109
|
+
change['change_time'] if change
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Returns the oldest unprocessed change log record (column_name => value hash).
|
|
113
|
+
def oldest_change
|
|
114
|
+
update
|
|
115
|
+
oldest_change = nil
|
|
116
|
+
unless change_array.empty?
|
|
117
|
+
while (oldest_change = change_array[self.current_index]) == nil
|
|
118
|
+
self.current_index += 1
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
oldest_change
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Returns the specified change log record (column_name => value hash).
|
|
125
|
+
# * +change_table+: the name of the table that was changed
|
|
126
|
+
# * +change_key+: the change key of the modified record
|
|
127
|
+
def load(change_table, change_key)
|
|
128
|
+
update
|
|
129
|
+
change = nil
|
|
130
|
+
table_change_tree = change_tree[change_table]
|
|
131
|
+
if table_change_tree
|
|
132
|
+
key_changes = table_change_tree[change_key]
|
|
133
|
+
if key_changes
|
|
134
|
+
# get change object and delete from key_changes
|
|
135
|
+
change = key_changes.shift
|
|
136
|
+
|
|
137
|
+
# delete change from change_array
|
|
138
|
+
change_array[change['array_index']] = nil
|
|
139
|
+
|
|
140
|
+
# delete change from database
|
|
141
|
+
connection.execute "delete from #{change_log_table} where id = #{change['id']}"
|
|
142
|
+
|
|
143
|
+
# delete key_changes if empty
|
|
144
|
+
if key_changes.empty?
|
|
145
|
+
table_change_tree.delete change_key
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# delete table_change_tree if empty
|
|
149
|
+
if table_change_tree.empty?
|
|
150
|
+
change_tree.delete change_table
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# reset everything if no more changes remain
|
|
154
|
+
if change_tree.empty?
|
|
155
|
+
init_cache
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
change
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Returns the name of the change log table
|
|
163
|
+
def change_log_table
|
|
164
|
+
@change_log_table ||= "#{session.configuration.options[:rep_prefix]}_pending_changes"
|
|
165
|
+
end
|
|
166
|
+
private :change_log_table
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Describes a single logged record change.
|
|
170
|
+
#
|
|
171
|
+
# Note:
|
|
172
|
+
# The change loading functionality depends on the current database session
|
|
173
|
+
# being executed in an open database transaction.
|
|
174
|
+
# Also at the end of change processing the transaction must be committed.
|
|
175
|
+
class LoggedChange
|
|
176
|
+
|
|
177
|
+
# The current Session
|
|
178
|
+
attr_accessor :session
|
|
179
|
+
|
|
180
|
+
# The database which was changed. Either :+left+ or :+right+.
|
|
181
|
+
attr_accessor :database
|
|
182
|
+
|
|
183
|
+
# The name of the changed table
|
|
184
|
+
attr_accessor :table
|
|
185
|
+
|
|
186
|
+
# When the first change to the record happened
|
|
187
|
+
attr_accessor :first_changed_at
|
|
188
|
+
|
|
189
|
+
# When the last change to the record happened
|
|
190
|
+
attr_accessor :last_changed_at
|
|
191
|
+
|
|
192
|
+
# Type of the change. Either :+insert+, :+update+ or :+delete+.
|
|
193
|
+
attr_accessor :type
|
|
194
|
+
|
|
195
|
+
# A column_name => value hash identifying the changed record
|
|
196
|
+
attr_accessor :key
|
|
197
|
+
|
|
198
|
+
# Only used for updates: a column_name => value hash of the original primary
|
|
199
|
+
# key of the updated record
|
|
200
|
+
attr_accessor :new_key
|
|
201
|
+
|
|
202
|
+
# Creates a new LoggedChange instance.
|
|
203
|
+
# * +session+: the current Session
|
|
204
|
+
# * +database+: either :+left+ or :+right+
|
|
205
|
+
def initialize(session, database)
|
|
206
|
+
self.session = session
|
|
207
|
+
self.database = database
|
|
208
|
+
self.type = :no_change
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# A hash describing how the change state morph based on newly found change
|
|
212
|
+
# records.
|
|
213
|
+
# * key: String consisting of 2 letters
|
|
214
|
+
# * first letter: describes current type change (nothing, insert, update, delete)
|
|
215
|
+
# * second letter: the new change type as read of the change log table
|
|
216
|
+
# * value:
|
|
217
|
+
# The resulting change type.
|
|
218
|
+
# [1]: such cases shouldn't happen. but just in case, choose the most
|
|
219
|
+
# sensible solution.
|
|
220
|
+
TYPE_CHANGES = {
|
|
221
|
+
'NI' => 'I',
|
|
222
|
+
'NU' => 'U',
|
|
223
|
+
'ND' => 'D',
|
|
224
|
+
'II' => 'I', # [1]
|
|
225
|
+
'IU' => 'I',
|
|
226
|
+
'ID' => 'N',
|
|
227
|
+
'UI' => 'U', # [1]
|
|
228
|
+
'UU' => 'U',
|
|
229
|
+
'UD' => 'D',
|
|
230
|
+
'DI' => 'U',
|
|
231
|
+
'DU' => 'U', # [1]
|
|
232
|
+
'DD' => 'D', # [1]
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
# A hash translating the short 1-letter types to the according symbols
|
|
236
|
+
SHORT_TYPES = {
|
|
237
|
+
'I' => :insert,
|
|
238
|
+
'U' => :update,
|
|
239
|
+
'D' => :delete,
|
|
240
|
+
'N' => :no_change
|
|
241
|
+
}
|
|
242
|
+
# A hash translating the symbold types to according 1 letter types
|
|
243
|
+
LONG_TYPES = SHORT_TYPES.invert
|
|
244
|
+
|
|
245
|
+
# Returns the configured key separator
|
|
246
|
+
def key_sep
|
|
247
|
+
@key_sep ||= session.configuration.options[:key_sep]
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Returns a column_name => value hash based on the provided +raw_key+ string
|
|
251
|
+
# (which is a string in the format as read directly from the change log table).
|
|
252
|
+
def key_to_hash(raw_key)
|
|
253
|
+
result = {}
|
|
254
|
+
#raw_key.split(key_sep).each_slice(2) {|a| result[a[0]] = a[1]}
|
|
255
|
+
raw_key.split(key_sep).each_slice(2) {|field_name, value| result[field_name] = value}
|
|
256
|
+
result
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Loads the change as per #table and #key. Works if the LoggedChange instance
|
|
260
|
+
# is totally new or was already loaded before.
|
|
261
|
+
def load
|
|
262
|
+
current_type = LONG_TYPES[type]
|
|
263
|
+
|
|
264
|
+
org_key = new_key || key
|
|
265
|
+
# change to key string as can be found in change log table
|
|
266
|
+
org_key = session.send(database).primary_key_names(table).map do |key_name|
|
|
267
|
+
"#{key_name}#{key_sep}#{org_key[key_name]}"
|
|
268
|
+
end.join(key_sep)
|
|
269
|
+
current_key = org_key
|
|
270
|
+
|
|
271
|
+
while change = session.change_loader(database).load(table, current_key)
|
|
272
|
+
|
|
273
|
+
new_type = change['change_type']
|
|
274
|
+
current_type = TYPE_CHANGES["#{current_type}#{new_type}"]
|
|
275
|
+
|
|
276
|
+
self.first_changed_at ||= change['change_time']
|
|
277
|
+
self.last_changed_at = change['change_time']
|
|
278
|
+
|
|
279
|
+
if change['change_type'] == 'U' and change['change_new_key'] != current_key
|
|
280
|
+
current_key = change['change_new_key']
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
self.type = SHORT_TYPES[current_type]
|
|
285
|
+
self.new_key = nil
|
|
286
|
+
if type == :update
|
|
287
|
+
self.key ||= key_to_hash(org_key)
|
|
288
|
+
self.new_key = key_to_hash(current_key)
|
|
289
|
+
else
|
|
290
|
+
self.key = key_to_hash(current_key)
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Loads the change with the specified key for the named +table+.
|
|
295
|
+
# * +table+: name of the table
|
|
296
|
+
# * +key+: a column_name => value hash for all primary key columns of the table
|
|
297
|
+
def load_specified(table, key)
|
|
298
|
+
self.table = table
|
|
299
|
+
self.key = key
|
|
300
|
+
load
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Returns the time of the oldest change. Returns +nil+ if there are no
|
|
304
|
+
# changes left.
|
|
305
|
+
def oldest_change_time
|
|
306
|
+
session.change_loader(database).oldest_change_time
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# Loads the oldest available change
|
|
310
|
+
def load_oldest
|
|
311
|
+
begin
|
|
312
|
+
change = session.change_loader(database).oldest_change
|
|
313
|
+
break unless change
|
|
314
|
+
self.key = key_to_hash(change['change_key'])
|
|
315
|
+
self.table = change['change_table']
|
|
316
|
+
load
|
|
317
|
+
end until type != :no_change
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# Prevents session from going into YAML output
|
|
321
|
+
def to_yaml_properties
|
|
322
|
+
instance_variables.sort.reject {|var_name| var_name == '@session'}
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
end
|
|
326
|
+
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
module RR
|
|
2
|
+
|
|
3
|
+
# Scans two tables for differences. Goes through a RubyRep Proxy to minimize network load.
|
|
4
|
+
# Doesn't have any reporting functionality by itself.
|
|
5
|
+
# Instead ProxiedTableScan#run yields all the differences for the caller to do with as it pleases.
|
|
6
|
+
# Usage:
|
|
7
|
+
# 1. Create a new ProxiedTableScan object and hand it all necessary information
|
|
8
|
+
# 2. Call ProxiedTableScan#run to do the actual comparison
|
|
9
|
+
# 3. The block handed to ProxiedTableScan#run receives all differences
|
|
10
|
+
class ProxiedTableScan < TableScan
|
|
11
|
+
|
|
12
|
+
# returns block size to use for table scanning
|
|
13
|
+
def block_size
|
|
14
|
+
@block_size ||= session.configuration.options_for_table(left_table)[:proxy_block_size]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Creates a new ProxiedTableScan instance
|
|
18
|
+
# * session: a Session object representing the current database session
|
|
19
|
+
# * left_table: name of the table in the left database
|
|
20
|
+
# * right_table: name of the table in the right database. If not given, same like left_table
|
|
21
|
+
def initialize(session, left_table, right_table = nil)
|
|
22
|
+
raise "#{self.class.name} only works with proxied sessions" unless session.proxied?
|
|
23
|
+
|
|
24
|
+
super
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Compares the specified left and right rows.
|
|
28
|
+
# +left_cursor+ and +right_cursor+ represent the according ProxyBlockCursor objects.
|
|
29
|
+
# Yields all identified differences with
|
|
30
|
+
# * diff_type
|
|
31
|
+
# * row
|
|
32
|
+
# #run described the yield parameters in detail.
|
|
33
|
+
def compare_blocks(left_block_cursor, right_block_cursor)
|
|
34
|
+
left_cursor = right_cursor = nil
|
|
35
|
+
|
|
36
|
+
left_row_checksums = left_block_cursor.row_checksums
|
|
37
|
+
right_row_checksums = right_block_cursor.row_checksums
|
|
38
|
+
|
|
39
|
+
# phase 1: identify the different rows and store their primary keys
|
|
40
|
+
left_diff_rows = []
|
|
41
|
+
left_diff_checksums = []
|
|
42
|
+
right_diff_rows = []
|
|
43
|
+
right_diff_checksums = []
|
|
44
|
+
i = k = 0
|
|
45
|
+
while i < left_row_checksums.size or k < right_row_checksums.size
|
|
46
|
+
left_keys = i < left_row_checksums.size ? left_row_checksums[i][:row_keys] : nil
|
|
47
|
+
right_keys = k < right_row_checksums.size ? right_row_checksums[k][:row_keys] : nil
|
|
48
|
+
rank = rank_rows left_keys, right_keys
|
|
49
|
+
case rank
|
|
50
|
+
when -1
|
|
51
|
+
left_diff_rows << left_keys
|
|
52
|
+
left_diff_checksums << left_row_checksums[i][:checksum]
|
|
53
|
+
i += 1
|
|
54
|
+
when 1
|
|
55
|
+
right_diff_rows << right_keys
|
|
56
|
+
right_diff_checksums << right_row_checksums[k][:checksum]
|
|
57
|
+
k += 1
|
|
58
|
+
when 0
|
|
59
|
+
if left_row_checksums[i][:checksum] != right_row_checksums[k][:checksum]
|
|
60
|
+
left_diff_rows << left_keys
|
|
61
|
+
left_diff_checksums << left_row_checksums[i][:checksum]
|
|
62
|
+
right_diff_rows << right_keys
|
|
63
|
+
right_diff_checksums << right_row_checksums[k][:checksum]
|
|
64
|
+
end
|
|
65
|
+
i += 1
|
|
66
|
+
k += 1
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# retrieve possibly existing cached rows from the block cursors
|
|
71
|
+
left_row_cache = left_block_cursor.retrieve_row_cache left_diff_checksums
|
|
72
|
+
right_row_cache = right_block_cursor.retrieve_row_cache right_diff_checksums
|
|
73
|
+
|
|
74
|
+
# builds arrays of row keys for rows that were not included in the hash
|
|
75
|
+
left_uncached_rows = []
|
|
76
|
+
left_diff_rows.each_with_index do |row, i|
|
|
77
|
+
left_uncached_rows << row unless left_row_cache[left_diff_checksums[i]]
|
|
78
|
+
end
|
|
79
|
+
right_uncached_rows = []
|
|
80
|
+
right_diff_rows.each_with_index do |row, i|
|
|
81
|
+
right_uncached_rows << row unless right_row_cache[right_diff_checksums[i]]
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# phase 2: read all different rows and yield them
|
|
85
|
+
unless left_uncached_rows.empty?
|
|
86
|
+
left_cursor = session.left.create_cursor \
|
|
87
|
+
ProxyRowCursor, left_table, :row_keys => left_uncached_rows
|
|
88
|
+
end
|
|
89
|
+
unless right_uncached_rows.empty?
|
|
90
|
+
right_cursor = session.right.create_cursor \
|
|
91
|
+
ProxyRowCursor, right_table, :row_keys => right_uncached_rows
|
|
92
|
+
end
|
|
93
|
+
i = k = 0
|
|
94
|
+
while i < left_diff_rows.size or k < right_diff_rows.size
|
|
95
|
+
rank = rank_rows left_diff_rows[i], right_diff_rows[k]
|
|
96
|
+
case rank
|
|
97
|
+
when -1
|
|
98
|
+
if left_row_cache.include? left_diff_checksums[i]
|
|
99
|
+
row = Marshal.load(left_row_cache[left_diff_checksums[i]])
|
|
100
|
+
else
|
|
101
|
+
row = left_cursor.next_row
|
|
102
|
+
end
|
|
103
|
+
yield :left, row
|
|
104
|
+
i += 1
|
|
105
|
+
when 1
|
|
106
|
+
if right_row_cache.include? right_diff_checksums[k]
|
|
107
|
+
row = Marshal.load(right_row_cache[right_diff_checksums[k]])
|
|
108
|
+
else
|
|
109
|
+
row = right_cursor.next_row
|
|
110
|
+
end
|
|
111
|
+
yield :right, row
|
|
112
|
+
k += 1
|
|
113
|
+
when 0
|
|
114
|
+
if left_row_cache.include? left_diff_checksums[i]
|
|
115
|
+
left_row = Marshal.load(left_row_cache[left_diff_checksums[i]])
|
|
116
|
+
else
|
|
117
|
+
left_row = left_cursor.next_row
|
|
118
|
+
end
|
|
119
|
+
if right_row_cache.include? right_diff_checksums[k]
|
|
120
|
+
right_row = Marshal.load(right_row_cache[right_diff_checksums[k]])
|
|
121
|
+
else
|
|
122
|
+
row = right_cursor.next_row
|
|
123
|
+
end
|
|
124
|
+
yield :conflict, [left_row, right_row]
|
|
125
|
+
i += 1
|
|
126
|
+
k += 1
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
ensure
|
|
130
|
+
session.left.destroy_cursor left_cursor if left_cursor
|
|
131
|
+
session.right.destroy_cursor right_cursor if right_cursor
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Runs the table scan.
|
|
135
|
+
# Calls the block for every found difference.
|
|
136
|
+
# Differences are yielded with 2 parameters
|
|
137
|
+
# * type: describes the difference, either :left (row only in left table), :right (row only in right table) or :conflict
|
|
138
|
+
# * row: for :left or :right cases a hash describing the row; for :conflict an array of left and right row
|
|
139
|
+
def run(&blck)
|
|
140
|
+
left_cursor = right_cursor = nil
|
|
141
|
+
left_cursor = session.left.create_cursor ProxyBlockCursor, left_table,
|
|
142
|
+
:row_buffer_size => scan_options[:row_buffer_size]
|
|
143
|
+
right_cursor = session.right.create_cursor ProxyBlockCursor, right_table,
|
|
144
|
+
:row_buffer_size => scan_options[:row_buffer_size]
|
|
145
|
+
update_progress 0 # ensures progress bar is printed even if there are no records
|
|
146
|
+
while left_cursor.next?
|
|
147
|
+
left_to, left_checksum, left_progress =
|
|
148
|
+
left_cursor.checksum :proxy_block_size => block_size
|
|
149
|
+
_ , right_checksum, right_progress =
|
|
150
|
+
right_cursor.checksum :max_row => left_to
|
|
151
|
+
combined_progress = left_progress + right_progress
|
|
152
|
+
if left_checksum != right_checksum
|
|
153
|
+
compare_blocks left_cursor, right_cursor do |type, row|
|
|
154
|
+
steps = type == :conflict ? 2 : 1
|
|
155
|
+
update_progress steps
|
|
156
|
+
combined_progress -= steps
|
|
157
|
+
yield type, row
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
update_progress combined_progress
|
|
161
|
+
end
|
|
162
|
+
while right_cursor.next?
|
|
163
|
+
update_progress 1
|
|
164
|
+
yield :right, right_cursor.next_row
|
|
165
|
+
end
|
|
166
|
+
ensure
|
|
167
|
+
session.left.destroy_cursor left_cursor if left_cursor
|
|
168
|
+
session.right.destroy_cursor right_cursor if right_cursor
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + '/..'
|
|
2
|
+
|
|
3
|
+
require 'digest/sha1'
|
|
4
|
+
|
|
5
|
+
require 'rubyrep'
|
|
6
|
+
|
|
7
|
+
module RR
|
|
8
|
+
|
|
9
|
+
# This class is used to scan a table in blocks.
|
|
10
|
+
# Calculates the checksums of the scanned blocks.
|
|
11
|
+
class ProxyBlockCursor < ProxyCursor
|
|
12
|
+
|
|
13
|
+
include TableScanHelper
|
|
14
|
+
|
|
15
|
+
# The current Digest
|
|
16
|
+
attr_accessor :digest
|
|
17
|
+
|
|
18
|
+
# nil if the last run of the checksum method left no unprocessed row.
|
|
19
|
+
# Otherwise the left over row of that checksum run
|
|
20
|
+
attr_accessor :last_row
|
|
21
|
+
|
|
22
|
+
# Returns an array of checksums for each encounters row.
|
|
23
|
+
# Each array element is a Hash with the following elements:
|
|
24
|
+
# * +:row_keys+: A primary key => value hash identifying the row
|
|
25
|
+
# * +:checksum+: the checksum for this row
|
|
26
|
+
attr_accessor :row_checksums
|
|
27
|
+
|
|
28
|
+
# The maximum total size (in bytes) up to which rows will be cached
|
|
29
|
+
attr_accessor :max_row_cache_size
|
|
30
|
+
|
|
31
|
+
# A byte counter of many bytes of row data have already been cached
|
|
32
|
+
attr_accessor :current_row_cache_size
|
|
33
|
+
|
|
34
|
+
# A hash of cached rows consisting of row checksum => row dump pairs.
|
|
35
|
+
attr_accessor :row_cache
|
|
36
|
+
|
|
37
|
+
# Creates a new cursor
|
|
38
|
+
# * session: the current proxy session
|
|
39
|
+
# * table: table_name
|
|
40
|
+
def initialize(session, table)
|
|
41
|
+
self.max_row_cache_size = 1000000 # this size should be sufficient as long as table doesn't contain blobs
|
|
42
|
+
super
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Returns true if the current cursor has unprocessed rows
|
|
46
|
+
def next?
|
|
47
|
+
last_row != nil or cursor.next?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Returns the cursor's next row
|
|
51
|
+
def next_row
|
|
52
|
+
if self.last_row
|
|
53
|
+
row, self.last_row = self.last_row, nil
|
|
54
|
+
else
|
|
55
|
+
row = cursor.next_row
|
|
56
|
+
end
|
|
57
|
+
row
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Returns a hash of row checksum => row dump pairs for the +checksums+
|
|
61
|
+
# in the provided array
|
|
62
|
+
def retrieve_row_cache(checksums)
|
|
63
|
+
row_dumps = {}
|
|
64
|
+
checksums.each do |checksum|
|
|
65
|
+
row_dumps[checksum] = row_cache[checksum] if row_cache.include? checksum
|
|
66
|
+
end
|
|
67
|
+
row_dumps
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Updates block / row checksums and row cache with the given +row+.
|
|
71
|
+
def update_checksum(row)
|
|
72
|
+
dump = Marshal.dump(row)
|
|
73
|
+
|
|
74
|
+
# updates row checksum array
|
|
75
|
+
row_keys = row.reject {|key, | not primary_key_names.include? key}
|
|
76
|
+
checksum = Digest::SHA1.hexdigest(dump)
|
|
77
|
+
self.row_checksums << {:row_keys => row_keys, :checksum => checksum}
|
|
78
|
+
|
|
79
|
+
# update the row cache (unless maximum cache size limit has already been reached)
|
|
80
|
+
if current_row_cache_size + dump.size < max_row_cache_size
|
|
81
|
+
self.current_row_cache_size += dump.size
|
|
82
|
+
row_cache[checksum] = dump
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# update current total checksum
|
|
86
|
+
self.digest << dump
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Reinitializes the row checksum array and the total checksum
|
|
90
|
+
def reset_checksum
|
|
91
|
+
self.row_checksums = []
|
|
92
|
+
self.current_row_cache_size = 0
|
|
93
|
+
self.row_cache = {}
|
|
94
|
+
self.digest = Digest::SHA1.new
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Returns the current checksum
|
|
98
|
+
def current_checksum
|
|
99
|
+
self.digest.hexdigest
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Calculates the checksum from the current row up to the row specified by options.
|
|
103
|
+
# options is a hash including either
|
|
104
|
+
# * :+proxy_block_size+: The number of rows to scan.
|
|
105
|
+
# * :+max_row+: A row hash of primary key columns specifying the maximum record to scan.
|
|
106
|
+
# Returns multiple parameters:
|
|
107
|
+
# * last row read
|
|
108
|
+
# * checksum
|
|
109
|
+
# * number of processed records
|
|
110
|
+
def checksum(options = {})
|
|
111
|
+
reset_checksum
|
|
112
|
+
return_row = row = nil
|
|
113
|
+
row_count = 0
|
|
114
|
+
|
|
115
|
+
if options.include? :proxy_block_size
|
|
116
|
+
block_size = options[:proxy_block_size]
|
|
117
|
+
raise ":proxy_block_size must be greater than 0" unless block_size > 0
|
|
118
|
+
while row_count < block_size and next?
|
|
119
|
+
row = next_row
|
|
120
|
+
update_checksum(row)
|
|
121
|
+
row_count += 1
|
|
122
|
+
end
|
|
123
|
+
return_row = row
|
|
124
|
+
elsif options.include? :max_row
|
|
125
|
+
max_row = options[:max_row]
|
|
126
|
+
while next?
|
|
127
|
+
row = next_row
|
|
128
|
+
rank = rank_rows row, max_row
|
|
129
|
+
if rank > 0
|
|
130
|
+
# row > max_row ==> save the current row and break off
|
|
131
|
+
self.last_row = row
|
|
132
|
+
break
|
|
133
|
+
end
|
|
134
|
+
row_count += 1
|
|
135
|
+
update_checksum(row)
|
|
136
|
+
return_row, row = row, nil
|
|
137
|
+
end
|
|
138
|
+
else
|
|
139
|
+
raise "options must include either :proxy_block_size or :max_row"
|
|
140
|
+
end
|
|
141
|
+
return_keys = return_row.reject {|key, | not primary_key_names.include? key} if return_row
|
|
142
|
+
return return_keys, current_checksum, row_count
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|