andyjeffries-rubyrep 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +83 -0
- data/License.txt +20 -0
- data/Manifest.txt +151 -0
- data/README.txt +37 -0
- data/bin/rubyrep +8 -0
- data/lib/rubyrep.rb +72 -0
- data/lib/rubyrep/base_runner.rb +195 -0
- data/lib/rubyrep/command_runner.rb +144 -0
- data/lib/rubyrep/committers/buffered_committer.rb +151 -0
- data/lib/rubyrep/committers/committers.rb +152 -0
- data/lib/rubyrep/configuration.rb +275 -0
- data/lib/rubyrep/connection_extenders/connection_extenders.rb +165 -0
- data/lib/rubyrep/connection_extenders/jdbc_extender.rb +65 -0
- data/lib/rubyrep/connection_extenders/mysql_extender.rb +59 -0
- data/lib/rubyrep/connection_extenders/postgresql_extender.rb +277 -0
- data/lib/rubyrep/database_proxy.rb +52 -0
- data/lib/rubyrep/direct_table_scan.rb +75 -0
- data/lib/rubyrep/generate_runner.rb +105 -0
- data/lib/rubyrep/initializer.rb +39 -0
- data/lib/rubyrep/log_helper.rb +30 -0
- data/lib/rubyrep/logged_change.rb +160 -0
- data/lib/rubyrep/logged_change_loader.rb +197 -0
- data/lib/rubyrep/noisy_connection.rb +80 -0
- data/lib/rubyrep/proxied_table_scan.rb +171 -0
- data/lib/rubyrep/proxy_block_cursor.rb +145 -0
- data/lib/rubyrep/proxy_connection.rb +431 -0
- data/lib/rubyrep/proxy_cursor.rb +44 -0
- data/lib/rubyrep/proxy_row_cursor.rb +43 -0
- data/lib/rubyrep/proxy_runner.rb +89 -0
- data/lib/rubyrep/replication_difference.rb +100 -0
- data/lib/rubyrep/replication_extenders/mysql_replication.rb +271 -0
- data/lib/rubyrep/replication_extenders/postgresql_replication.rb +236 -0
- data/lib/rubyrep/replication_extenders/replication_extenders.rb +26 -0
- data/lib/rubyrep/replication_helper.rb +142 -0
- data/lib/rubyrep/replication_initializer.rb +327 -0
- data/lib/rubyrep/replication_run.rb +142 -0
- data/lib/rubyrep/replication_runner.rb +166 -0
- data/lib/rubyrep/replicators/replicators.rb +42 -0
- data/lib/rubyrep/replicators/two_way_replicator.rb +361 -0
- data/lib/rubyrep/scan_progress_printers/progress_bar.rb +65 -0
- data/lib/rubyrep/scan_progress_printers/scan_progress_printers.rb +65 -0
- data/lib/rubyrep/scan_report_printers/scan_detail_reporter.rb +111 -0
- data/lib/rubyrep/scan_report_printers/scan_report_printers.rb +67 -0
- data/lib/rubyrep/scan_report_printers/scan_summary_reporter.rb +75 -0
- data/lib/rubyrep/scan_runner.rb +25 -0
- data/lib/rubyrep/session.rb +230 -0
- data/lib/rubyrep/sync_helper.rb +121 -0
- data/lib/rubyrep/sync_runner.rb +31 -0
- data/lib/rubyrep/syncers/syncers.rb +112 -0
- data/lib/rubyrep/syncers/two_way_syncer.rb +174 -0
- data/lib/rubyrep/table_scan.rb +54 -0
- data/lib/rubyrep/table_scan_helper.rb +46 -0
- data/lib/rubyrep/table_sorter.rb +70 -0
- data/lib/rubyrep/table_spec_resolver.rb +142 -0
- data/lib/rubyrep/table_sync.rb +90 -0
- data/lib/rubyrep/task_sweeper.rb +77 -0
- data/lib/rubyrep/trigger_mode_switcher.rb +63 -0
- data/lib/rubyrep/type_casting_cursor.rb +31 -0
- data/lib/rubyrep/uninstall_runner.rb +93 -0
- data/lib/rubyrep/version.rb +9 -0
- data/rubyrep +8 -0
- data/rubyrep.bat +4 -0
- data/setup.rb +1585 -0
- data/spec/base_runner_spec.rb +218 -0
- data/spec/buffered_committer_spec.rb +274 -0
- data/spec/command_runner_spec.rb +145 -0
- data/spec/committers_spec.rb +178 -0
- data/spec/configuration_spec.rb +203 -0
- data/spec/connection_extender_interface_spec.rb +141 -0
- data/spec/connection_extenders_registration_spec.rb +164 -0
- data/spec/database_proxy_spec.rb +48 -0
- data/spec/database_rake_spec.rb +40 -0
- data/spec/db_specific_connection_extenders_spec.rb +34 -0
- data/spec/db_specific_replication_extenders_spec.rb +38 -0
- data/spec/direct_table_scan_spec.rb +61 -0
- data/spec/dolphins.jpg +0 -0
- data/spec/generate_runner_spec.rb +84 -0
- data/spec/initializer_spec.rb +46 -0
- data/spec/log_helper_spec.rb +39 -0
- data/spec/logged_change_loader_spec.rb +68 -0
- data/spec/logged_change_spec.rb +470 -0
- data/spec/noisy_connection_spec.rb +78 -0
- data/spec/postgresql_replication_spec.rb +48 -0
- data/spec/postgresql_schema_support_spec.rb +212 -0
- data/spec/postgresql_support_spec.rb +63 -0
- data/spec/progress_bar_spec.rb +77 -0
- data/spec/proxied_table_scan_spec.rb +151 -0
- data/spec/proxy_block_cursor_spec.rb +197 -0
- data/spec/proxy_connection_spec.rb +423 -0
- data/spec/proxy_cursor_spec.rb +56 -0
- data/spec/proxy_row_cursor_spec.rb +66 -0
- data/spec/proxy_runner_spec.rb +70 -0
- data/spec/replication_difference_spec.rb +161 -0
- data/spec/replication_extender_interface_spec.rb +367 -0
- data/spec/replication_extenders_spec.rb +32 -0
- data/spec/replication_helper_spec.rb +178 -0
- data/spec/replication_initializer_spec.rb +509 -0
- data/spec/replication_run_spec.rb +443 -0
- data/spec/replication_runner_spec.rb +254 -0
- data/spec/replicators_spec.rb +36 -0
- data/spec/rubyrep_spec.rb +8 -0
- data/spec/scan_detail_reporter_spec.rb +119 -0
- data/spec/scan_progress_printers_spec.rb +68 -0
- data/spec/scan_report_printers_spec.rb +67 -0
- data/spec/scan_runner_spec.rb +50 -0
- data/spec/scan_summary_reporter_spec.rb +61 -0
- data/spec/session_spec.rb +253 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +305 -0
- data/spec/strange_name_support_spec.rb +135 -0
- data/spec/sync_helper_spec.rb +169 -0
- data/spec/sync_runner_spec.rb +78 -0
- data/spec/syncers_spec.rb +171 -0
- data/spec/table_scan_helper_spec.rb +36 -0
- data/spec/table_scan_spec.rb +49 -0
- data/spec/table_sorter_spec.rb +30 -0
- data/spec/table_spec_resolver_spec.rb +111 -0
- data/spec/table_sync_spec.rb +140 -0
- data/spec/task_sweeper_spec.rb +47 -0
- data/spec/trigger_mode_switcher_spec.rb +83 -0
- data/spec/two_way_replicator_spec.rb +721 -0
- data/spec/two_way_syncer_spec.rb +256 -0
- data/spec/type_casting_cursor_spec.rb +50 -0
- data/spec/uninstall_runner_spec.rb +93 -0
- metadata +190 -0
@@ -0,0 +1,197 @@
|
|
1
|
+
module RR
|
2
|
+
|
3
|
+
# Makes management of logged change loaders easier
|
4
|
+
class LoggedChangeLoaders
|
5
|
+
|
6
|
+
# The current Session
|
7
|
+
attr_accessor :session
|
8
|
+
|
9
|
+
# A hash of LoggedChangeLoader instances for the :+left+ and :+right+ database
|
10
|
+
attr_accessor :loaders
|
11
|
+
|
12
|
+
# Create new logged change loaders.
|
13
|
+
# * +session+: Current Session
|
14
|
+
def initialize(session)
|
15
|
+
self.session = session
|
16
|
+
self.loaders = {}
|
17
|
+
[:left, :right].each do |database|
|
18
|
+
loaders[database] = LoggedChangeLoader.new(session, database)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns the LoggedChangeLoader for the specified (:+left+ or :+right+)
|
23
|
+
# database.
|
24
|
+
def [](database)
|
25
|
+
loaders[database]
|
26
|
+
end
|
27
|
+
|
28
|
+
# Forces an update of the change log cache
|
29
|
+
def update
|
30
|
+
[:left, :right].each {|database| self[database].update :forced => true}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Caches the entries in the change log table
|
35
|
+
class LoggedChangeLoader
|
36
|
+
|
37
|
+
# The current +Session+.
|
38
|
+
attr_accessor :session
|
39
|
+
|
40
|
+
# Current database (either :+left+ or :+right+)
|
41
|
+
attr_accessor :database
|
42
|
+
|
43
|
+
# The current +ProxyConnection+.
|
44
|
+
attr_accessor :connection
|
45
|
+
|
46
|
+
# Index to the next unprocessed change in the +change_array+.
|
47
|
+
attr_accessor :current_index
|
48
|
+
|
49
|
+
# ID of the last cached change log record.
|
50
|
+
attr_accessor :current_id
|
51
|
+
|
52
|
+
# Array with all cached changes.
|
53
|
+
# Processed change log records are replaced with +nil+.
|
54
|
+
attr_accessor :change_array
|
55
|
+
|
56
|
+
# Tree (hash) structure for fast access to all cached changes.
|
57
|
+
# First level of tree:
|
58
|
+
# * key: table name
|
59
|
+
# * value: 2nd level tree
|
60
|
+
# 2nd level tree:
|
61
|
+
# * key: the change_key value of the according change log records.
|
62
|
+
# * value:
|
63
|
+
# An array of according change log records (column_name => value hash).
|
64
|
+
# Additional entry of each change log hash:
|
65
|
+
# * key: 'array_index'
|
66
|
+
# * value: index to the change log record in +change_array+
|
67
|
+
attr_accessor :change_tree
|
68
|
+
|
69
|
+
# Date of last update of the cache
|
70
|
+
attr_accessor :last_updated
|
71
|
+
|
72
|
+
# Initializes / resets the cache.
|
73
|
+
def init_cache
|
74
|
+
self.change_tree = {}
|
75
|
+
self.change_array = []
|
76
|
+
self.current_index = 0
|
77
|
+
end
|
78
|
+
private :init_cache
|
79
|
+
|
80
|
+
# Create a new change log record cache.
|
81
|
+
# * +session+: The current +Session+
|
82
|
+
# * +database+: Either :+left+ or :+right+
|
83
|
+
def initialize(session, database)
|
84
|
+
self.session = session
|
85
|
+
self.database = database
|
86
|
+
self.connection = session.send(database)
|
87
|
+
|
88
|
+
init_cache
|
89
|
+
self.current_id = -1
|
90
|
+
self.last_updated = 1.year.ago
|
91
|
+
end
|
92
|
+
|
93
|
+
# Updates the cache.
|
94
|
+
# Options is a hash determining when the update is actually executed:
|
95
|
+
# * :+expire_time+: cache is older than the given number of seconds
|
96
|
+
# * :+forced+: if +true+ update the cache even if not yet expired
|
97
|
+
def update(options = {:forced => false, :expire_time => 1})
|
98
|
+
return unless options[:forced] or Time.now - self.last_updated >= options[:expire_time]
|
99
|
+
|
100
|
+
self.last_updated = Time.now
|
101
|
+
|
102
|
+
# First, let's use a LIMIT clause (via :row_buffer_size option) to verify
|
103
|
+
# if there are any pending changes.
|
104
|
+
# (If there are many pending changes, this is (at least with PostgreSQL)
|
105
|
+
# much faster.)
|
106
|
+
cursor = connection.select_cursor(
|
107
|
+
:table => change_log_table,
|
108
|
+
:from => {'id' => current_id},
|
109
|
+
:exclude_starting_row => true,
|
110
|
+
:row_buffer_size => 1
|
111
|
+
)
|
112
|
+
return unless cursor.next?
|
113
|
+
|
114
|
+
# Something is here. Let's actually load it.
|
115
|
+
cursor = connection.select_cursor(
|
116
|
+
:table => change_log_table,
|
117
|
+
:from => {'id' => current_id},
|
118
|
+
:exclude_starting_row => true,
|
119
|
+
:type_cast => true,
|
120
|
+
:row_buffer_size => session.configuration.options[:row_buffer_size]
|
121
|
+
)
|
122
|
+
while cursor.next?
|
123
|
+
change = cursor.next_row
|
124
|
+
self.current_id = change['id']
|
125
|
+
self.change_array << change
|
126
|
+
change['array_index'] = self.change_array.size - 1
|
127
|
+
|
128
|
+
table_change_tree = change_tree[change['change_table']] ||= {}
|
129
|
+
key_changes = table_change_tree[change['change_key']] ||= []
|
130
|
+
key_changes << change
|
131
|
+
end
|
132
|
+
cursor.clear
|
133
|
+
end
|
134
|
+
|
135
|
+
# Returns the creation time of the oldest unprocessed change log record.
|
136
|
+
def oldest_change_time
|
137
|
+
change = oldest_change
|
138
|
+
change['change_time'] if change
|
139
|
+
end
|
140
|
+
|
141
|
+
# Returns the oldest unprocessed change log record (column_name => value hash).
|
142
|
+
def oldest_change
|
143
|
+
update
|
144
|
+
oldest_change = nil
|
145
|
+
unless change_array.empty?
|
146
|
+
while (oldest_change = change_array[self.current_index]) == nil
|
147
|
+
self.current_index += 1
|
148
|
+
end
|
149
|
+
end
|
150
|
+
oldest_change
|
151
|
+
end
|
152
|
+
|
153
|
+
# Returns the specified change log record (column_name => value hash).
|
154
|
+
# * +change_table+: the name of the table that was changed
|
155
|
+
# * +change_key+: the change key of the modified record
|
156
|
+
def load(change_table, change_key)
|
157
|
+
update
|
158
|
+
change = nil
|
159
|
+
table_change_tree = change_tree[change_table]
|
160
|
+
if table_change_tree
|
161
|
+
key_changes = table_change_tree[change_key]
|
162
|
+
if key_changes
|
163
|
+
# get change object and delete from key_changes
|
164
|
+
change = key_changes.shift
|
165
|
+
|
166
|
+
# delete change from change_array
|
167
|
+
change_array[change['array_index']] = nil
|
168
|
+
|
169
|
+
# delete change from database
|
170
|
+
connection.execute "delete from #{change_log_table} where id = #{change['id']}"
|
171
|
+
|
172
|
+
# delete key_changes if empty
|
173
|
+
if key_changes.empty?
|
174
|
+
table_change_tree.delete change_key
|
175
|
+
end
|
176
|
+
|
177
|
+
# delete table_change_tree if empty
|
178
|
+
if table_change_tree.empty?
|
179
|
+
change_tree.delete change_table
|
180
|
+
end
|
181
|
+
|
182
|
+
# reset everything if no more changes remain
|
183
|
+
if change_tree.empty?
|
184
|
+
init_cache
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
change
|
189
|
+
end
|
190
|
+
|
191
|
+
# Returns the name of the change log table
|
192
|
+
def change_log_table
|
193
|
+
@change_log_table ||= "#{session.configuration.options[:rep_prefix]}_pending_changes"
|
194
|
+
end
|
195
|
+
private :change_log_table
|
196
|
+
end
|
197
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module RR
|
2
|
+
|
3
|
+
# Wraps an existing cursor.
|
4
|
+
# Purpose: send regular updates to the installed TaskSweeper
|
5
|
+
class NoisyCursor
|
6
|
+
# The original cusor
|
7
|
+
attr_accessor :org_cursor
|
8
|
+
|
9
|
+
# The installed task sweeper
|
10
|
+
attr_accessor :sweeper
|
11
|
+
|
12
|
+
# Create a new NoisyCursor.
|
13
|
+
# * cursor: the original cursor
|
14
|
+
# * sweeper: the target TaskSweeper
|
15
|
+
def initialize(cursor, sweeper)
|
16
|
+
self.org_cursor = cursor
|
17
|
+
self.sweeper = sweeper
|
18
|
+
end
|
19
|
+
|
20
|
+
# Delegate the uninteresting methods to the original cursor
|
21
|
+
def next?; org_cursor.next? end
|
22
|
+
def clear; org_cursor.clear end
|
23
|
+
|
24
|
+
# Returns the row as a column => value hash and moves the cursor to the next row.
|
25
|
+
def next_row
|
26
|
+
sweeper.ping
|
27
|
+
row = org_cursor.next_row
|
28
|
+
sweeper.ping
|
29
|
+
row
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Modifies ProxyConnections to send regular pings to an installed TaskSweeper
|
34
|
+
module NoisyConnection
|
35
|
+
|
36
|
+
# The installed TaskSweeper
|
37
|
+
attr_accessor :sweeper
|
38
|
+
|
39
|
+
# Modifies ProxyConnection#select_cursor to wrap the returned cursor
|
40
|
+
# into a NoisyCursor.
|
41
|
+
def select_cursor(options)
|
42
|
+
sweeper.ping
|
43
|
+
org_cursor = super
|
44
|
+
sweeper.ping
|
45
|
+
NoisyCursor.new(org_cursor, sweeper)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Wraps ProxyConnection#insert_record to update the TaskSweeper
|
49
|
+
def insert_record(table, values)
|
50
|
+
sweeper.ping
|
51
|
+
result = super
|
52
|
+
sweeper.ping
|
53
|
+
result
|
54
|
+
end
|
55
|
+
|
56
|
+
# Wraps ProxyConnection#update_record to update the TaskSweeper
|
57
|
+
def update_record(table, values, org_key = nil)
|
58
|
+
sweeper.ping
|
59
|
+
result = super
|
60
|
+
sweeper.ping
|
61
|
+
result
|
62
|
+
end
|
63
|
+
|
64
|
+
# Wraps ProxyConnection#delete_record to update the TaskSweeper
|
65
|
+
def delete_record(table, values)
|
66
|
+
sweeper.ping
|
67
|
+
result = super
|
68
|
+
sweeper.ping
|
69
|
+
result
|
70
|
+
end
|
71
|
+
|
72
|
+
# Wraps ProxyConnection#commit_db_transaction to update the TaskSweeper
|
73
|
+
def commit_db_transaction
|
74
|
+
sweeper.ping
|
75
|
+
result = super
|
76
|
+
sweeper.ping
|
77
|
+
result
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
module RR
|
2
|
+
|
3
|
+
# Scans two tables for differences. Goes through a RubyRep Proxy to minimize network load.
|
4
|
+
# Doesn't have any reporting functionality by itself.
|
5
|
+
# Instead ProxiedTableScan#run yields all the differences for the caller to do with as it pleases.
|
6
|
+
# Usage:
|
7
|
+
# 1. Create a new ProxiedTableScan object and hand it all necessary information
|
8
|
+
# 2. Call ProxiedTableScan#run to do the actual comparison
|
9
|
+
# 3. The block handed to ProxiedTableScan#run receives all differences
|
10
|
+
class ProxiedTableScan < TableScan
|
11
|
+
|
12
|
+
# returns block size to use for table scanning
|
13
|
+
def block_size
|
14
|
+
@block_size ||= session.configuration.options_for_table(left_table)[:proxy_block_size]
|
15
|
+
end
|
16
|
+
|
17
|
+
# Creates a new ProxiedTableScan instance
|
18
|
+
# * session: a Session object representing the current database session
|
19
|
+
# * left_table: name of the table in the left database
|
20
|
+
# * right_table: name of the table in the right database. If not given, same like left_table
|
21
|
+
def initialize(session, left_table, right_table = nil)
|
22
|
+
raise "#{self.class.name} only works with proxied sessions" unless session.proxied?
|
23
|
+
|
24
|
+
super
|
25
|
+
end
|
26
|
+
|
27
|
+
# Compares the specified left and right rows.
|
28
|
+
# +left_cursor+ and +right_cursor+ represent the according ProxyBlockCursor objects.
|
29
|
+
# Yields all identified differences with
|
30
|
+
# * diff_type
|
31
|
+
# * row
|
32
|
+
# #run described the yield parameters in detail.
|
33
|
+
def compare_blocks(left_block_cursor, right_block_cursor)
|
34
|
+
left_cursor = right_cursor = nil
|
35
|
+
|
36
|
+
left_row_checksums = left_block_cursor.row_checksums
|
37
|
+
right_row_checksums = right_block_cursor.row_checksums
|
38
|
+
|
39
|
+
# phase 1: identify the different rows and store their primary keys
|
40
|
+
left_diff_rows = []
|
41
|
+
left_diff_checksums = []
|
42
|
+
right_diff_rows = []
|
43
|
+
right_diff_checksums = []
|
44
|
+
i = k = 0
|
45
|
+
while i < left_row_checksums.size or k < right_row_checksums.size
|
46
|
+
left_keys = i < left_row_checksums.size ? left_row_checksums[i][:row_keys] : nil
|
47
|
+
right_keys = k < right_row_checksums.size ? right_row_checksums[k][:row_keys] : nil
|
48
|
+
rank = rank_rows left_keys, right_keys
|
49
|
+
case rank
|
50
|
+
when -1
|
51
|
+
left_diff_rows << left_keys
|
52
|
+
left_diff_checksums << left_row_checksums[i][:checksum]
|
53
|
+
i += 1
|
54
|
+
when 1
|
55
|
+
right_diff_rows << right_keys
|
56
|
+
right_diff_checksums << right_row_checksums[k][:checksum]
|
57
|
+
k += 1
|
58
|
+
when 0
|
59
|
+
if left_row_checksums[i][:checksum] != right_row_checksums[k][:checksum]
|
60
|
+
left_diff_rows << left_keys
|
61
|
+
left_diff_checksums << left_row_checksums[i][:checksum]
|
62
|
+
right_diff_rows << right_keys
|
63
|
+
right_diff_checksums << right_row_checksums[k][:checksum]
|
64
|
+
end
|
65
|
+
i += 1
|
66
|
+
k += 1
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# retrieve possibly existing cached rows from the block cursors
|
71
|
+
left_row_cache = left_block_cursor.retrieve_row_cache left_diff_checksums
|
72
|
+
right_row_cache = right_block_cursor.retrieve_row_cache right_diff_checksums
|
73
|
+
|
74
|
+
# builds arrays of row keys for rows that were not included in the hash
|
75
|
+
left_uncached_rows = []
|
76
|
+
left_diff_rows.each_with_index do |row, i|
|
77
|
+
left_uncached_rows << row unless left_row_cache[left_diff_checksums[i]]
|
78
|
+
end
|
79
|
+
right_uncached_rows = []
|
80
|
+
right_diff_rows.each_with_index do |row, i|
|
81
|
+
right_uncached_rows << row unless right_row_cache[right_diff_checksums[i]]
|
82
|
+
end
|
83
|
+
|
84
|
+
# phase 2: read all different rows and yield them
|
85
|
+
unless left_uncached_rows.empty?
|
86
|
+
left_cursor = session.left.create_cursor \
|
87
|
+
ProxyRowCursor, left_table, :row_keys => left_uncached_rows
|
88
|
+
end
|
89
|
+
unless right_uncached_rows.empty?
|
90
|
+
right_cursor = session.right.create_cursor \
|
91
|
+
ProxyRowCursor, right_table, :row_keys => right_uncached_rows
|
92
|
+
end
|
93
|
+
i = k = 0
|
94
|
+
while i < left_diff_rows.size or k < right_diff_rows.size
|
95
|
+
rank = rank_rows left_diff_rows[i], right_diff_rows[k]
|
96
|
+
case rank
|
97
|
+
when -1
|
98
|
+
if left_row_cache.include? left_diff_checksums[i]
|
99
|
+
row = Marshal.load(left_row_cache[left_diff_checksums[i]])
|
100
|
+
else
|
101
|
+
row = left_cursor.next_row
|
102
|
+
end
|
103
|
+
yield :left, row
|
104
|
+
i += 1
|
105
|
+
when 1
|
106
|
+
if right_row_cache.include? right_diff_checksums[k]
|
107
|
+
row = Marshal.load(right_row_cache[right_diff_checksums[k]])
|
108
|
+
else
|
109
|
+
row = right_cursor.next_row
|
110
|
+
end
|
111
|
+
yield :right, row
|
112
|
+
k += 1
|
113
|
+
when 0
|
114
|
+
if left_row_cache.include? left_diff_checksums[i]
|
115
|
+
left_row = Marshal.load(left_row_cache[left_diff_checksums[i]])
|
116
|
+
else
|
117
|
+
left_row = left_cursor.next_row
|
118
|
+
end
|
119
|
+
if right_row_cache.include? right_diff_checksums[k]
|
120
|
+
right_row = Marshal.load(right_row_cache[right_diff_checksums[k]])
|
121
|
+
else
|
122
|
+
row = right_cursor.next_row
|
123
|
+
end
|
124
|
+
yield :conflict, [left_row, right_row]
|
125
|
+
i += 1
|
126
|
+
k += 1
|
127
|
+
end
|
128
|
+
end
|
129
|
+
ensure
|
130
|
+
session.left.destroy_cursor left_cursor if left_cursor
|
131
|
+
session.right.destroy_cursor right_cursor if right_cursor
|
132
|
+
end
|
133
|
+
|
134
|
+
# Runs the table scan.
|
135
|
+
# Calls the block for every found difference.
|
136
|
+
# Differences are yielded with 2 parameters
|
137
|
+
# * type: describes the difference, either :left (row only in left table), :right (row only in right table) or :conflict
|
138
|
+
# * row: for :left or :right cases a hash describing the row; for :conflict an array of left and right row
|
139
|
+
def run(&blck)
|
140
|
+
left_cursor = right_cursor = nil
|
141
|
+
left_cursor = session.left.create_cursor ProxyBlockCursor, left_table,
|
142
|
+
:row_buffer_size => scan_options[:row_buffer_size]
|
143
|
+
right_cursor = session.right.create_cursor ProxyBlockCursor, right_table,
|
144
|
+
:row_buffer_size => scan_options[:row_buffer_size]
|
145
|
+
update_progress 0 # ensures progress bar is printed even if there are no records
|
146
|
+
while left_cursor.next?
|
147
|
+
left_to, left_checksum, left_progress =
|
148
|
+
left_cursor.checksum :proxy_block_size => block_size
|
149
|
+
_ , right_checksum, right_progress =
|
150
|
+
right_cursor.checksum :max_row => left_to
|
151
|
+
combined_progress = left_progress + right_progress
|
152
|
+
if left_checksum != right_checksum
|
153
|
+
compare_blocks left_cursor, right_cursor do |type, row|
|
154
|
+
steps = type == :conflict ? 2 : 1
|
155
|
+
update_progress steps
|
156
|
+
combined_progress -= steps
|
157
|
+
yield type, row
|
158
|
+
end
|
159
|
+
end
|
160
|
+
update_progress combined_progress
|
161
|
+
end
|
162
|
+
while right_cursor.next?
|
163
|
+
update_progress 1
|
164
|
+
yield :right, right_cursor.next_row
|
165
|
+
end
|
166
|
+
ensure
|
167
|
+
session.left.destroy_cursor left_cursor if left_cursor
|
168
|
+
session.right.destroy_cursor right_cursor if right_cursor
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + '/..'
|
2
|
+
|
3
|
+
require 'digest/sha1'
|
4
|
+
|
5
|
+
require 'rubyrep'
|
6
|
+
|
7
|
+
module RR
|
8
|
+
|
9
|
+
# This class is used to scan a table in blocks.
|
10
|
+
# Calculates the checksums of the scanned blocks.
|
11
|
+
class ProxyBlockCursor < ProxyCursor
|
12
|
+
|
13
|
+
include TableScanHelper
|
14
|
+
|
15
|
+
# The current Digest
|
16
|
+
attr_accessor :digest
|
17
|
+
|
18
|
+
# nil if the last run of the checksum method left no unprocessed row.
|
19
|
+
# Otherwise the left over row of that checksum run
|
20
|
+
attr_accessor :last_row
|
21
|
+
|
22
|
+
# Returns an array of checksums for each encounters row.
|
23
|
+
# Each array element is a Hash with the following elements:
|
24
|
+
# * +:row_keys+: A primary key => value hash identifying the row
|
25
|
+
# * +:checksum+: the checksum for this row
|
26
|
+
attr_accessor :row_checksums
|
27
|
+
|
28
|
+
# The maximum total size (in bytes) up to which rows will be cached
|
29
|
+
attr_accessor :max_row_cache_size
|
30
|
+
|
31
|
+
# A byte counter of many bytes of row data have already been cached
|
32
|
+
attr_accessor :current_row_cache_size
|
33
|
+
|
34
|
+
# A hash of cached rows consisting of row checksum => row dump pairs.
|
35
|
+
attr_accessor :row_cache
|
36
|
+
|
37
|
+
# Creates a new cursor
|
38
|
+
# * session: the current proxy session
|
39
|
+
# * table: table_name
|
40
|
+
def initialize(session, table)
|
41
|
+
self.max_row_cache_size = 1000000 # this size should be sufficient as long as table doesn't contain blobs
|
42
|
+
super
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns true if the current cursor has unprocessed rows
|
46
|
+
def next?
|
47
|
+
last_row != nil or cursor.next?
|
48
|
+
end
|
49
|
+
|
50
|
+
# Returns the cursor's next row
|
51
|
+
def next_row
|
52
|
+
if self.last_row
|
53
|
+
row, self.last_row = self.last_row, nil
|
54
|
+
else
|
55
|
+
row = cursor.next_row
|
56
|
+
end
|
57
|
+
row
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns a hash of row checksum => row dump pairs for the +checksums+
|
61
|
+
# in the provided array
|
62
|
+
def retrieve_row_cache(checksums)
|
63
|
+
row_dumps = {}
|
64
|
+
checksums.each do |checksum|
|
65
|
+
row_dumps[checksum] = row_cache[checksum] if row_cache.include? checksum
|
66
|
+
end
|
67
|
+
row_dumps
|
68
|
+
end
|
69
|
+
|
70
|
+
# Updates block / row checksums and row cache with the given +row+.
|
71
|
+
def update_checksum(row)
|
72
|
+
dump = Marshal.dump(row)
|
73
|
+
|
74
|
+
# updates row checksum array
|
75
|
+
row_keys = row.reject {|key, | not primary_key_names.include? key}
|
76
|
+
checksum = Digest::SHA1.hexdigest(dump)
|
77
|
+
self.row_checksums << {:row_keys => row_keys, :checksum => checksum}
|
78
|
+
|
79
|
+
# update the row cache (unless maximum cache size limit has already been reached)
|
80
|
+
if current_row_cache_size + dump.size < max_row_cache_size
|
81
|
+
self.current_row_cache_size += dump.size
|
82
|
+
row_cache[checksum] = dump
|
83
|
+
end
|
84
|
+
|
85
|
+
# update current total checksum
|
86
|
+
self.digest << dump
|
87
|
+
end
|
88
|
+
|
89
|
+
# Reinitializes the row checksum array and the total checksum
|
90
|
+
def reset_checksum
|
91
|
+
self.row_checksums = []
|
92
|
+
self.current_row_cache_size = 0
|
93
|
+
self.row_cache = {}
|
94
|
+
self.digest = Digest::SHA1.new
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns the current checksum
|
98
|
+
def current_checksum
|
99
|
+
self.digest.hexdigest
|
100
|
+
end
|
101
|
+
|
102
|
+
# Calculates the checksum from the current row up to the row specified by options.
|
103
|
+
# options is a hash including either
|
104
|
+
# * :+proxy_block_size+: The number of rows to scan.
|
105
|
+
# * :+max_row+: A row hash of primary key columns specifying the maximum record to scan.
|
106
|
+
# Returns multiple parameters:
|
107
|
+
# * last row read
|
108
|
+
# * checksum
|
109
|
+
# * number of processed records
|
110
|
+
def checksum(options = {})
|
111
|
+
reset_checksum
|
112
|
+
return_row = row = nil
|
113
|
+
row_count = 0
|
114
|
+
|
115
|
+
if options.include? :proxy_block_size
|
116
|
+
block_size = options[:proxy_block_size]
|
117
|
+
raise ":proxy_block_size must be greater than 0" unless block_size > 0
|
118
|
+
while row_count < block_size and next?
|
119
|
+
row = next_row
|
120
|
+
update_checksum(row)
|
121
|
+
row_count += 1
|
122
|
+
end
|
123
|
+
return_row = row
|
124
|
+
elsif options.include? :max_row
|
125
|
+
max_row = options[:max_row]
|
126
|
+
while next?
|
127
|
+
row = next_row
|
128
|
+
rank = rank_rows row, max_row
|
129
|
+
if rank > 0
|
130
|
+
# row > max_row ==> save the current row and break off
|
131
|
+
self.last_row = row
|
132
|
+
break
|
133
|
+
end
|
134
|
+
row_count += 1
|
135
|
+
update_checksum(row)
|
136
|
+
return_row, row = row, nil
|
137
|
+
end
|
138
|
+
else
|
139
|
+
raise "options must include either :proxy_block_size or :max_row"
|
140
|
+
end
|
141
|
+
return_keys = return_row.reject {|key, | not primary_key_names.include? key} if return_row
|
142
|
+
return return_keys, current_checksum, row_count
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|