andyjeffries-rubyrep 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +83 -0
- data/License.txt +20 -0
- data/Manifest.txt +151 -0
- data/README.txt +37 -0
- data/bin/rubyrep +8 -0
- data/lib/rubyrep.rb +72 -0
- data/lib/rubyrep/base_runner.rb +195 -0
- data/lib/rubyrep/command_runner.rb +144 -0
- data/lib/rubyrep/committers/buffered_committer.rb +151 -0
- data/lib/rubyrep/committers/committers.rb +152 -0
- data/lib/rubyrep/configuration.rb +275 -0
- data/lib/rubyrep/connection_extenders/connection_extenders.rb +165 -0
- data/lib/rubyrep/connection_extenders/jdbc_extender.rb +65 -0
- data/lib/rubyrep/connection_extenders/mysql_extender.rb +59 -0
- data/lib/rubyrep/connection_extenders/postgresql_extender.rb +277 -0
- data/lib/rubyrep/database_proxy.rb +52 -0
- data/lib/rubyrep/direct_table_scan.rb +75 -0
- data/lib/rubyrep/generate_runner.rb +105 -0
- data/lib/rubyrep/initializer.rb +39 -0
- data/lib/rubyrep/log_helper.rb +30 -0
- data/lib/rubyrep/logged_change.rb +160 -0
- data/lib/rubyrep/logged_change_loader.rb +197 -0
- data/lib/rubyrep/noisy_connection.rb +80 -0
- data/lib/rubyrep/proxied_table_scan.rb +171 -0
- data/lib/rubyrep/proxy_block_cursor.rb +145 -0
- data/lib/rubyrep/proxy_connection.rb +431 -0
- data/lib/rubyrep/proxy_cursor.rb +44 -0
- data/lib/rubyrep/proxy_row_cursor.rb +43 -0
- data/lib/rubyrep/proxy_runner.rb +89 -0
- data/lib/rubyrep/replication_difference.rb +100 -0
- data/lib/rubyrep/replication_extenders/mysql_replication.rb +271 -0
- data/lib/rubyrep/replication_extenders/postgresql_replication.rb +236 -0
- data/lib/rubyrep/replication_extenders/replication_extenders.rb +26 -0
- data/lib/rubyrep/replication_helper.rb +142 -0
- data/lib/rubyrep/replication_initializer.rb +327 -0
- data/lib/rubyrep/replication_run.rb +142 -0
- data/lib/rubyrep/replication_runner.rb +166 -0
- data/lib/rubyrep/replicators/replicators.rb +42 -0
- data/lib/rubyrep/replicators/two_way_replicator.rb +361 -0
- data/lib/rubyrep/scan_progress_printers/progress_bar.rb +65 -0
- data/lib/rubyrep/scan_progress_printers/scan_progress_printers.rb +65 -0
- data/lib/rubyrep/scan_report_printers/scan_detail_reporter.rb +111 -0
- data/lib/rubyrep/scan_report_printers/scan_report_printers.rb +67 -0
- data/lib/rubyrep/scan_report_printers/scan_summary_reporter.rb +75 -0
- data/lib/rubyrep/scan_runner.rb +25 -0
- data/lib/rubyrep/session.rb +230 -0
- data/lib/rubyrep/sync_helper.rb +121 -0
- data/lib/rubyrep/sync_runner.rb +31 -0
- data/lib/rubyrep/syncers/syncers.rb +112 -0
- data/lib/rubyrep/syncers/two_way_syncer.rb +174 -0
- data/lib/rubyrep/table_scan.rb +54 -0
- data/lib/rubyrep/table_scan_helper.rb +46 -0
- data/lib/rubyrep/table_sorter.rb +70 -0
- data/lib/rubyrep/table_spec_resolver.rb +142 -0
- data/lib/rubyrep/table_sync.rb +90 -0
- data/lib/rubyrep/task_sweeper.rb +77 -0
- data/lib/rubyrep/trigger_mode_switcher.rb +63 -0
- data/lib/rubyrep/type_casting_cursor.rb +31 -0
- data/lib/rubyrep/uninstall_runner.rb +93 -0
- data/lib/rubyrep/version.rb +9 -0
- data/rubyrep +8 -0
- data/rubyrep.bat +4 -0
- data/setup.rb +1585 -0
- data/spec/base_runner_spec.rb +218 -0
- data/spec/buffered_committer_spec.rb +274 -0
- data/spec/command_runner_spec.rb +145 -0
- data/spec/committers_spec.rb +178 -0
- data/spec/configuration_spec.rb +203 -0
- data/spec/connection_extender_interface_spec.rb +141 -0
- data/spec/connection_extenders_registration_spec.rb +164 -0
- data/spec/database_proxy_spec.rb +48 -0
- data/spec/database_rake_spec.rb +40 -0
- data/spec/db_specific_connection_extenders_spec.rb +34 -0
- data/spec/db_specific_replication_extenders_spec.rb +38 -0
- data/spec/direct_table_scan_spec.rb +61 -0
- data/spec/dolphins.jpg +0 -0
- data/spec/generate_runner_spec.rb +84 -0
- data/spec/initializer_spec.rb +46 -0
- data/spec/log_helper_spec.rb +39 -0
- data/spec/logged_change_loader_spec.rb +68 -0
- data/spec/logged_change_spec.rb +470 -0
- data/spec/noisy_connection_spec.rb +78 -0
- data/spec/postgresql_replication_spec.rb +48 -0
- data/spec/postgresql_schema_support_spec.rb +212 -0
- data/spec/postgresql_support_spec.rb +63 -0
- data/spec/progress_bar_spec.rb +77 -0
- data/spec/proxied_table_scan_spec.rb +151 -0
- data/spec/proxy_block_cursor_spec.rb +197 -0
- data/spec/proxy_connection_spec.rb +423 -0
- data/spec/proxy_cursor_spec.rb +56 -0
- data/spec/proxy_row_cursor_spec.rb +66 -0
- data/spec/proxy_runner_spec.rb +70 -0
- data/spec/replication_difference_spec.rb +161 -0
- data/spec/replication_extender_interface_spec.rb +367 -0
- data/spec/replication_extenders_spec.rb +32 -0
- data/spec/replication_helper_spec.rb +178 -0
- data/spec/replication_initializer_spec.rb +509 -0
- data/spec/replication_run_spec.rb +443 -0
- data/spec/replication_runner_spec.rb +254 -0
- data/spec/replicators_spec.rb +36 -0
- data/spec/rubyrep_spec.rb +8 -0
- data/spec/scan_detail_reporter_spec.rb +119 -0
- data/spec/scan_progress_printers_spec.rb +68 -0
- data/spec/scan_report_printers_spec.rb +67 -0
- data/spec/scan_runner_spec.rb +50 -0
- data/spec/scan_summary_reporter_spec.rb +61 -0
- data/spec/session_spec.rb +253 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +305 -0
- data/spec/strange_name_support_spec.rb +135 -0
- data/spec/sync_helper_spec.rb +169 -0
- data/spec/sync_runner_spec.rb +78 -0
- data/spec/syncers_spec.rb +171 -0
- data/spec/table_scan_helper_spec.rb +36 -0
- data/spec/table_scan_spec.rb +49 -0
- data/spec/table_sorter_spec.rb +30 -0
- data/spec/table_spec_resolver_spec.rb +111 -0
- data/spec/table_sync_spec.rb +140 -0
- data/spec/task_sweeper_spec.rb +47 -0
- data/spec/trigger_mode_switcher_spec.rb +83 -0
- data/spec/two_way_replicator_spec.rb +721 -0
- data/spec/two_way_syncer_spec.rb +256 -0
- data/spec/type_casting_cursor_spec.rb +50 -0
- data/spec/uninstall_runner_spec.rb +93 -0
- metadata +190 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
module RR
|
|
2
|
+
|
|
3
|
+
# Makes management of logged change loaders easier
|
|
4
|
+
class LoggedChangeLoaders
|
|
5
|
+
|
|
6
|
+
# The current Session
|
|
7
|
+
attr_accessor :session
|
|
8
|
+
|
|
9
|
+
# A hash of LoggedChangeLoader instances for the :+left+ and :+right+ database
|
|
10
|
+
attr_accessor :loaders
|
|
11
|
+
|
|
12
|
+
# Create new logged change loaders.
|
|
13
|
+
# * +session+: Current Session
|
|
14
|
+
def initialize(session)
|
|
15
|
+
self.session = session
|
|
16
|
+
self.loaders = {}
|
|
17
|
+
[:left, :right].each do |database|
|
|
18
|
+
loaders[database] = LoggedChangeLoader.new(session, database)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Returns the LoggedChangeLoader for the specified (:+left+ or :+right+)
|
|
23
|
+
# database.
|
|
24
|
+
def [](database)
|
|
25
|
+
loaders[database]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Forces an update of the change log cache
|
|
29
|
+
def update
|
|
30
|
+
[:left, :right].each {|database| self[database].update :forced => true}
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Caches the entries in the change log table
|
|
35
|
+
class LoggedChangeLoader
|
|
36
|
+
|
|
37
|
+
# The current +Session+.
|
|
38
|
+
attr_accessor :session
|
|
39
|
+
|
|
40
|
+
# Current database (either :+left+ or :+right+)
|
|
41
|
+
attr_accessor :database
|
|
42
|
+
|
|
43
|
+
# The current +ProxyConnection+.
|
|
44
|
+
attr_accessor :connection
|
|
45
|
+
|
|
46
|
+
# Index to the next unprocessed change in the +change_array+.
|
|
47
|
+
attr_accessor :current_index
|
|
48
|
+
|
|
49
|
+
# ID of the last cached change log record.
|
|
50
|
+
attr_accessor :current_id
|
|
51
|
+
|
|
52
|
+
# Array with all cached changes.
|
|
53
|
+
# Processed change log records are replaced with +nil+.
|
|
54
|
+
attr_accessor :change_array
|
|
55
|
+
|
|
56
|
+
# Tree (hash) structure for fast access to all cached changes.
|
|
57
|
+
# First level of tree:
|
|
58
|
+
# * key: table name
|
|
59
|
+
# * value: 2nd level tree
|
|
60
|
+
# 2nd level tree:
|
|
61
|
+
# * key: the change_key value of the according change log records.
|
|
62
|
+
# * value:
|
|
63
|
+
# An array of according change log records (column_name => value hash).
|
|
64
|
+
# Additional entry of each change log hash:
|
|
65
|
+
# * key: 'array_index'
|
|
66
|
+
# * value: index to the change log record in +change_array+
|
|
67
|
+
attr_accessor :change_tree
|
|
68
|
+
|
|
69
|
+
# Date of last update of the cache
|
|
70
|
+
attr_accessor :last_updated
|
|
71
|
+
|
|
72
|
+
# Initializes / resets the cache.
|
|
73
|
+
def init_cache
|
|
74
|
+
self.change_tree = {}
|
|
75
|
+
self.change_array = []
|
|
76
|
+
self.current_index = 0
|
|
77
|
+
end
|
|
78
|
+
private :init_cache
|
|
79
|
+
|
|
80
|
+
# Create a new change log record cache.
|
|
81
|
+
# * +session+: The current +Session+
|
|
82
|
+
# * +database+: Either :+left+ or :+right+
|
|
83
|
+
def initialize(session, database)
|
|
84
|
+
self.session = session
|
|
85
|
+
self.database = database
|
|
86
|
+
self.connection = session.send(database)
|
|
87
|
+
|
|
88
|
+
init_cache
|
|
89
|
+
self.current_id = -1
|
|
90
|
+
self.last_updated = 1.year.ago
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Updates the cache.
|
|
94
|
+
# Options is a hash determining when the update is actually executed:
|
|
95
|
+
# * :+expire_time+: cache is older than the given number of seconds
|
|
96
|
+
# * :+forced+: if +true+ update the cache even if not yet expired
|
|
97
|
+
def update(options = {:forced => false, :expire_time => 1})
|
|
98
|
+
return unless options[:forced] or Time.now - self.last_updated >= options[:expire_time]
|
|
99
|
+
|
|
100
|
+
self.last_updated = Time.now
|
|
101
|
+
|
|
102
|
+
# First, let's use a LIMIT clause (via :row_buffer_size option) to verify
|
|
103
|
+
# if there are any pending changes.
|
|
104
|
+
# (If there are many pending changes, this is (at least with PostgreSQL)
|
|
105
|
+
# much faster.)
|
|
106
|
+
cursor = connection.select_cursor(
|
|
107
|
+
:table => change_log_table,
|
|
108
|
+
:from => {'id' => current_id},
|
|
109
|
+
:exclude_starting_row => true,
|
|
110
|
+
:row_buffer_size => 1
|
|
111
|
+
)
|
|
112
|
+
return unless cursor.next?
|
|
113
|
+
|
|
114
|
+
# Something is here. Let's actually load it.
|
|
115
|
+
cursor = connection.select_cursor(
|
|
116
|
+
:table => change_log_table,
|
|
117
|
+
:from => {'id' => current_id},
|
|
118
|
+
:exclude_starting_row => true,
|
|
119
|
+
:type_cast => true,
|
|
120
|
+
:row_buffer_size => session.configuration.options[:row_buffer_size]
|
|
121
|
+
)
|
|
122
|
+
while cursor.next?
|
|
123
|
+
change = cursor.next_row
|
|
124
|
+
self.current_id = change['id']
|
|
125
|
+
self.change_array << change
|
|
126
|
+
change['array_index'] = self.change_array.size - 1
|
|
127
|
+
|
|
128
|
+
table_change_tree = change_tree[change['change_table']] ||= {}
|
|
129
|
+
key_changes = table_change_tree[change['change_key']] ||= []
|
|
130
|
+
key_changes << change
|
|
131
|
+
end
|
|
132
|
+
cursor.clear
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Returns the creation time of the oldest unprocessed change log record.
|
|
136
|
+
def oldest_change_time
|
|
137
|
+
change = oldest_change
|
|
138
|
+
change['change_time'] if change
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Returns the oldest unprocessed change log record (column_name => value hash).
|
|
142
|
+
def oldest_change
|
|
143
|
+
update
|
|
144
|
+
oldest_change = nil
|
|
145
|
+
unless change_array.empty?
|
|
146
|
+
while (oldest_change = change_array[self.current_index]) == nil
|
|
147
|
+
self.current_index += 1
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
oldest_change
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Returns the specified change log record (column_name => value hash).
|
|
154
|
+
# * +change_table+: the name of the table that was changed
|
|
155
|
+
# * +change_key+: the change key of the modified record
|
|
156
|
+
def load(change_table, change_key)
|
|
157
|
+
update
|
|
158
|
+
change = nil
|
|
159
|
+
table_change_tree = change_tree[change_table]
|
|
160
|
+
if table_change_tree
|
|
161
|
+
key_changes = table_change_tree[change_key]
|
|
162
|
+
if key_changes
|
|
163
|
+
# get change object and delete from key_changes
|
|
164
|
+
change = key_changes.shift
|
|
165
|
+
|
|
166
|
+
# delete change from change_array
|
|
167
|
+
change_array[change['array_index']] = nil
|
|
168
|
+
|
|
169
|
+
# delete change from database
|
|
170
|
+
connection.execute "delete from #{change_log_table} where id = #{change['id']}"
|
|
171
|
+
|
|
172
|
+
# delete key_changes if empty
|
|
173
|
+
if key_changes.empty?
|
|
174
|
+
table_change_tree.delete change_key
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# delete table_change_tree if empty
|
|
178
|
+
if table_change_tree.empty?
|
|
179
|
+
change_tree.delete change_table
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# reset everything if no more changes remain
|
|
183
|
+
if change_tree.empty?
|
|
184
|
+
init_cache
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
change
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Returns the name of the change log table
|
|
192
|
+
def change_log_table
|
|
193
|
+
@change_log_table ||= "#{session.configuration.options[:rep_prefix]}_pending_changes"
|
|
194
|
+
end
|
|
195
|
+
private :change_log_table
|
|
196
|
+
end
|
|
197
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
module RR
|
|
2
|
+
|
|
3
|
+
# Wraps an existing cursor.
|
|
4
|
+
# Purpose: send regular updates to the installed TaskSweeper
|
|
5
|
+
class NoisyCursor
|
|
6
|
+
# The original cusor
|
|
7
|
+
attr_accessor :org_cursor
|
|
8
|
+
|
|
9
|
+
# The installed task sweeper
|
|
10
|
+
attr_accessor :sweeper
|
|
11
|
+
|
|
12
|
+
# Create a new NoisyCursor.
|
|
13
|
+
# * cursor: the original cursor
|
|
14
|
+
# * sweeper: the target TaskSweeper
|
|
15
|
+
def initialize(cursor, sweeper)
|
|
16
|
+
self.org_cursor = cursor
|
|
17
|
+
self.sweeper = sweeper
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Delegate the uninteresting methods to the original cursor
|
|
21
|
+
def next?; org_cursor.next? end
|
|
22
|
+
def clear; org_cursor.clear end
|
|
23
|
+
|
|
24
|
+
# Returns the row as a column => value hash and moves the cursor to the next row.
|
|
25
|
+
def next_row
|
|
26
|
+
sweeper.ping
|
|
27
|
+
row = org_cursor.next_row
|
|
28
|
+
sweeper.ping
|
|
29
|
+
row
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Modifies ProxyConnections to send regular pings to an installed TaskSweeper
|
|
34
|
+
module NoisyConnection
|
|
35
|
+
|
|
36
|
+
# The installed TaskSweeper
|
|
37
|
+
attr_accessor :sweeper
|
|
38
|
+
|
|
39
|
+
# Modifies ProxyConnection#select_cursor to wrap the returned cursor
|
|
40
|
+
# into a NoisyCursor.
|
|
41
|
+
def select_cursor(options)
|
|
42
|
+
sweeper.ping
|
|
43
|
+
org_cursor = super
|
|
44
|
+
sweeper.ping
|
|
45
|
+
NoisyCursor.new(org_cursor, sweeper)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Wraps ProxyConnection#insert_record to update the TaskSweeper
|
|
49
|
+
def insert_record(table, values)
|
|
50
|
+
sweeper.ping
|
|
51
|
+
result = super
|
|
52
|
+
sweeper.ping
|
|
53
|
+
result
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Wraps ProxyConnection#update_record to update the TaskSweeper
|
|
57
|
+
def update_record(table, values, org_key = nil)
|
|
58
|
+
sweeper.ping
|
|
59
|
+
result = super
|
|
60
|
+
sweeper.ping
|
|
61
|
+
result
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Wraps ProxyConnection#delete_record to update the TaskSweeper
|
|
65
|
+
def delete_record(table, values)
|
|
66
|
+
sweeper.ping
|
|
67
|
+
result = super
|
|
68
|
+
sweeper.ping
|
|
69
|
+
result
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Wraps ProxyConnection#commit_db_transaction to update the TaskSweeper
|
|
73
|
+
def commit_db_transaction
|
|
74
|
+
sweeper.ping
|
|
75
|
+
result = super
|
|
76
|
+
sweeper.ping
|
|
77
|
+
result
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
module RR
|
|
2
|
+
|
|
3
|
+
# Scans two tables for differences. Goes through a RubyRep Proxy to minimize network load.
|
|
4
|
+
# Doesn't have any reporting functionality by itself.
|
|
5
|
+
# Instead ProxiedTableScan#run yields all the differences for the caller to do with as it pleases.
|
|
6
|
+
# Usage:
|
|
7
|
+
# 1. Create a new ProxiedTableScan object and hand it all necessary information
|
|
8
|
+
# 2. Call ProxiedTableScan#run to do the actual comparison
|
|
9
|
+
# 3. The block handed to ProxiedTableScan#run receives all differences
|
|
10
|
+
class ProxiedTableScan < TableScan
|
|
11
|
+
|
|
12
|
+
# returns block size to use for table scanning
|
|
13
|
+
def block_size
|
|
14
|
+
@block_size ||= session.configuration.options_for_table(left_table)[:proxy_block_size]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Creates a new ProxiedTableScan instance
|
|
18
|
+
# * session: a Session object representing the current database session
|
|
19
|
+
# * left_table: name of the table in the left database
|
|
20
|
+
# * right_table: name of the table in the right database. If not given, same like left_table
|
|
21
|
+
def initialize(session, left_table, right_table = nil)
|
|
22
|
+
raise "#{self.class.name} only works with proxied sessions" unless session.proxied?
|
|
23
|
+
|
|
24
|
+
super
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Compares the specified left and right rows.
|
|
28
|
+
# +left_cursor+ and +right_cursor+ represent the according ProxyBlockCursor objects.
|
|
29
|
+
# Yields all identified differences with
|
|
30
|
+
# * diff_type
|
|
31
|
+
# * row
|
|
32
|
+
# #run described the yield parameters in detail.
|
|
33
|
+
def compare_blocks(left_block_cursor, right_block_cursor)
|
|
34
|
+
left_cursor = right_cursor = nil
|
|
35
|
+
|
|
36
|
+
left_row_checksums = left_block_cursor.row_checksums
|
|
37
|
+
right_row_checksums = right_block_cursor.row_checksums
|
|
38
|
+
|
|
39
|
+
# phase 1: identify the different rows and store their primary keys
|
|
40
|
+
left_diff_rows = []
|
|
41
|
+
left_diff_checksums = []
|
|
42
|
+
right_diff_rows = []
|
|
43
|
+
right_diff_checksums = []
|
|
44
|
+
i = k = 0
|
|
45
|
+
while i < left_row_checksums.size or k < right_row_checksums.size
|
|
46
|
+
left_keys = i < left_row_checksums.size ? left_row_checksums[i][:row_keys] : nil
|
|
47
|
+
right_keys = k < right_row_checksums.size ? right_row_checksums[k][:row_keys] : nil
|
|
48
|
+
rank = rank_rows left_keys, right_keys
|
|
49
|
+
case rank
|
|
50
|
+
when -1
|
|
51
|
+
left_diff_rows << left_keys
|
|
52
|
+
left_diff_checksums << left_row_checksums[i][:checksum]
|
|
53
|
+
i += 1
|
|
54
|
+
when 1
|
|
55
|
+
right_diff_rows << right_keys
|
|
56
|
+
right_diff_checksums << right_row_checksums[k][:checksum]
|
|
57
|
+
k += 1
|
|
58
|
+
when 0
|
|
59
|
+
if left_row_checksums[i][:checksum] != right_row_checksums[k][:checksum]
|
|
60
|
+
left_diff_rows << left_keys
|
|
61
|
+
left_diff_checksums << left_row_checksums[i][:checksum]
|
|
62
|
+
right_diff_rows << right_keys
|
|
63
|
+
right_diff_checksums << right_row_checksums[k][:checksum]
|
|
64
|
+
end
|
|
65
|
+
i += 1
|
|
66
|
+
k += 1
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# retrieve possibly existing cached rows from the block cursors
|
|
71
|
+
left_row_cache = left_block_cursor.retrieve_row_cache left_diff_checksums
|
|
72
|
+
right_row_cache = right_block_cursor.retrieve_row_cache right_diff_checksums
|
|
73
|
+
|
|
74
|
+
# builds arrays of row keys for rows that were not included in the hash
|
|
75
|
+
left_uncached_rows = []
|
|
76
|
+
left_diff_rows.each_with_index do |row, i|
|
|
77
|
+
left_uncached_rows << row unless left_row_cache[left_diff_checksums[i]]
|
|
78
|
+
end
|
|
79
|
+
right_uncached_rows = []
|
|
80
|
+
right_diff_rows.each_with_index do |row, i|
|
|
81
|
+
right_uncached_rows << row unless right_row_cache[right_diff_checksums[i]]
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# phase 2: read all different rows and yield them
|
|
85
|
+
unless left_uncached_rows.empty?
|
|
86
|
+
left_cursor = session.left.create_cursor \
|
|
87
|
+
ProxyRowCursor, left_table, :row_keys => left_uncached_rows
|
|
88
|
+
end
|
|
89
|
+
unless right_uncached_rows.empty?
|
|
90
|
+
right_cursor = session.right.create_cursor \
|
|
91
|
+
ProxyRowCursor, right_table, :row_keys => right_uncached_rows
|
|
92
|
+
end
|
|
93
|
+
i = k = 0
|
|
94
|
+
while i < left_diff_rows.size or k < right_diff_rows.size
|
|
95
|
+
rank = rank_rows left_diff_rows[i], right_diff_rows[k]
|
|
96
|
+
case rank
|
|
97
|
+
when -1
|
|
98
|
+
if left_row_cache.include? left_diff_checksums[i]
|
|
99
|
+
row = Marshal.load(left_row_cache[left_diff_checksums[i]])
|
|
100
|
+
else
|
|
101
|
+
row = left_cursor.next_row
|
|
102
|
+
end
|
|
103
|
+
yield :left, row
|
|
104
|
+
i += 1
|
|
105
|
+
when 1
|
|
106
|
+
if right_row_cache.include? right_diff_checksums[k]
|
|
107
|
+
row = Marshal.load(right_row_cache[right_diff_checksums[k]])
|
|
108
|
+
else
|
|
109
|
+
row = right_cursor.next_row
|
|
110
|
+
end
|
|
111
|
+
yield :right, row
|
|
112
|
+
k += 1
|
|
113
|
+
when 0
|
|
114
|
+
if left_row_cache.include? left_diff_checksums[i]
|
|
115
|
+
left_row = Marshal.load(left_row_cache[left_diff_checksums[i]])
|
|
116
|
+
else
|
|
117
|
+
left_row = left_cursor.next_row
|
|
118
|
+
end
|
|
119
|
+
if right_row_cache.include? right_diff_checksums[k]
|
|
120
|
+
right_row = Marshal.load(right_row_cache[right_diff_checksums[k]])
|
|
121
|
+
else
|
|
122
|
+
row = right_cursor.next_row
|
|
123
|
+
end
|
|
124
|
+
yield :conflict, [left_row, right_row]
|
|
125
|
+
i += 1
|
|
126
|
+
k += 1
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
ensure
|
|
130
|
+
session.left.destroy_cursor left_cursor if left_cursor
|
|
131
|
+
session.right.destroy_cursor right_cursor if right_cursor
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Runs the table scan.
|
|
135
|
+
# Calls the block for every found difference.
|
|
136
|
+
# Differences are yielded with 2 parameters
|
|
137
|
+
# * type: describes the difference, either :left (row only in left table), :right (row only in right table) or :conflict
|
|
138
|
+
# * row: for :left or :right cases a hash describing the row; for :conflict an array of left and right row
|
|
139
|
+
def run(&blck)
|
|
140
|
+
left_cursor = right_cursor = nil
|
|
141
|
+
left_cursor = session.left.create_cursor ProxyBlockCursor, left_table,
|
|
142
|
+
:row_buffer_size => scan_options[:row_buffer_size]
|
|
143
|
+
right_cursor = session.right.create_cursor ProxyBlockCursor, right_table,
|
|
144
|
+
:row_buffer_size => scan_options[:row_buffer_size]
|
|
145
|
+
update_progress 0 # ensures progress bar is printed even if there are no records
|
|
146
|
+
while left_cursor.next?
|
|
147
|
+
left_to, left_checksum, left_progress =
|
|
148
|
+
left_cursor.checksum :proxy_block_size => block_size
|
|
149
|
+
_ , right_checksum, right_progress =
|
|
150
|
+
right_cursor.checksum :max_row => left_to
|
|
151
|
+
combined_progress = left_progress + right_progress
|
|
152
|
+
if left_checksum != right_checksum
|
|
153
|
+
compare_blocks left_cursor, right_cursor do |type, row|
|
|
154
|
+
steps = type == :conflict ? 2 : 1
|
|
155
|
+
update_progress steps
|
|
156
|
+
combined_progress -= steps
|
|
157
|
+
yield type, row
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
update_progress combined_progress
|
|
161
|
+
end
|
|
162
|
+
while right_cursor.next?
|
|
163
|
+
update_progress 1
|
|
164
|
+
yield :right, right_cursor.next_row
|
|
165
|
+
end
|
|
166
|
+
ensure
|
|
167
|
+
session.left.destroy_cursor left_cursor if left_cursor
|
|
168
|
+
session.right.destroy_cursor right_cursor if right_cursor
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + '/..'
|
|
2
|
+
|
|
3
|
+
require 'digest/sha1'
|
|
4
|
+
|
|
5
|
+
require 'rubyrep'
|
|
6
|
+
|
|
7
|
+
module RR
|
|
8
|
+
|
|
9
|
+
# This class is used to scan a table in blocks.
|
|
10
|
+
# Calculates the checksums of the scanned blocks.
|
|
11
|
+
class ProxyBlockCursor < ProxyCursor
|
|
12
|
+
|
|
13
|
+
include TableScanHelper
|
|
14
|
+
|
|
15
|
+
# The current Digest
|
|
16
|
+
attr_accessor :digest
|
|
17
|
+
|
|
18
|
+
# nil if the last run of the checksum method left no unprocessed row.
|
|
19
|
+
# Otherwise the left over row of that checksum run
|
|
20
|
+
attr_accessor :last_row
|
|
21
|
+
|
|
22
|
+
# Returns an array of checksums for each encounters row.
|
|
23
|
+
# Each array element is a Hash with the following elements:
|
|
24
|
+
# * +:row_keys+: A primary key => value hash identifying the row
|
|
25
|
+
# * +:checksum+: the checksum for this row
|
|
26
|
+
attr_accessor :row_checksums
|
|
27
|
+
|
|
28
|
+
# The maximum total size (in bytes) up to which rows will be cached
|
|
29
|
+
attr_accessor :max_row_cache_size
|
|
30
|
+
|
|
31
|
+
# A byte counter of many bytes of row data have already been cached
|
|
32
|
+
attr_accessor :current_row_cache_size
|
|
33
|
+
|
|
34
|
+
# A hash of cached rows consisting of row checksum => row dump pairs.
|
|
35
|
+
attr_accessor :row_cache
|
|
36
|
+
|
|
37
|
+
# Creates a new cursor
|
|
38
|
+
# * session: the current proxy session
|
|
39
|
+
# * table: table_name
|
|
40
|
+
def initialize(session, table)
|
|
41
|
+
self.max_row_cache_size = 1000000 # this size should be sufficient as long as table doesn't contain blobs
|
|
42
|
+
super
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Returns true if the current cursor has unprocessed rows
|
|
46
|
+
def next?
|
|
47
|
+
last_row != nil or cursor.next?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Returns the cursor's next row
|
|
51
|
+
def next_row
|
|
52
|
+
if self.last_row
|
|
53
|
+
row, self.last_row = self.last_row, nil
|
|
54
|
+
else
|
|
55
|
+
row = cursor.next_row
|
|
56
|
+
end
|
|
57
|
+
row
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Returns a hash of row checksum => row dump pairs for the +checksums+
|
|
61
|
+
# in the provided array
|
|
62
|
+
def retrieve_row_cache(checksums)
|
|
63
|
+
row_dumps = {}
|
|
64
|
+
checksums.each do |checksum|
|
|
65
|
+
row_dumps[checksum] = row_cache[checksum] if row_cache.include? checksum
|
|
66
|
+
end
|
|
67
|
+
row_dumps
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Updates block / row checksums and row cache with the given +row+.
|
|
71
|
+
def update_checksum(row)
|
|
72
|
+
dump = Marshal.dump(row)
|
|
73
|
+
|
|
74
|
+
# updates row checksum array
|
|
75
|
+
row_keys = row.reject {|key, | not primary_key_names.include? key}
|
|
76
|
+
checksum = Digest::SHA1.hexdigest(dump)
|
|
77
|
+
self.row_checksums << {:row_keys => row_keys, :checksum => checksum}
|
|
78
|
+
|
|
79
|
+
# update the row cache (unless maximum cache size limit has already been reached)
|
|
80
|
+
if current_row_cache_size + dump.size < max_row_cache_size
|
|
81
|
+
self.current_row_cache_size += dump.size
|
|
82
|
+
row_cache[checksum] = dump
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# update current total checksum
|
|
86
|
+
self.digest << dump
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Reinitializes the row checksum array and the total checksum
|
|
90
|
+
def reset_checksum
|
|
91
|
+
self.row_checksums = []
|
|
92
|
+
self.current_row_cache_size = 0
|
|
93
|
+
self.row_cache = {}
|
|
94
|
+
self.digest = Digest::SHA1.new
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Returns the current checksum
|
|
98
|
+
def current_checksum
|
|
99
|
+
self.digest.hexdigest
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Calculates the checksum from the current row up to the row specified by options.
|
|
103
|
+
# options is a hash including either
|
|
104
|
+
# * :+proxy_block_size+: The number of rows to scan.
|
|
105
|
+
# * :+max_row+: A row hash of primary key columns specifying the maximum record to scan.
|
|
106
|
+
# Returns multiple parameters:
|
|
107
|
+
# * last row read
|
|
108
|
+
# * checksum
|
|
109
|
+
# * number of processed records
|
|
110
|
+
def checksum(options = {})
|
|
111
|
+
reset_checksum
|
|
112
|
+
return_row = row = nil
|
|
113
|
+
row_count = 0
|
|
114
|
+
|
|
115
|
+
if options.include? :proxy_block_size
|
|
116
|
+
block_size = options[:proxy_block_size]
|
|
117
|
+
raise ":proxy_block_size must be greater than 0" unless block_size > 0
|
|
118
|
+
while row_count < block_size and next?
|
|
119
|
+
row = next_row
|
|
120
|
+
update_checksum(row)
|
|
121
|
+
row_count += 1
|
|
122
|
+
end
|
|
123
|
+
return_row = row
|
|
124
|
+
elsif options.include? :max_row
|
|
125
|
+
max_row = options[:max_row]
|
|
126
|
+
while next?
|
|
127
|
+
row = next_row
|
|
128
|
+
rank = rank_rows row, max_row
|
|
129
|
+
if rank > 0
|
|
130
|
+
# row > max_row ==> save the current row and break off
|
|
131
|
+
self.last_row = row
|
|
132
|
+
break
|
|
133
|
+
end
|
|
134
|
+
row_count += 1
|
|
135
|
+
update_checksum(row)
|
|
136
|
+
return_row, row = row, nil
|
|
137
|
+
end
|
|
138
|
+
else
|
|
139
|
+
raise "options must include either :proxy_block_size or :max_row"
|
|
140
|
+
end
|
|
141
|
+
return_keys = return_row.reject {|key, | not primary_key_names.include? key} if return_row
|
|
142
|
+
return return_keys, current_checksum, row_count
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|