rubyrep 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (140) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +137 -0
  4. data/README.txt +37 -0
  5. data/Rakefile +30 -0
  6. data/bin/rubyrep +8 -0
  7. data/config/hoe.rb +72 -0
  8. data/config/mysql_config.rb +25 -0
  9. data/config/postgres_config.rb +21 -0
  10. data/config/proxied_test_config.rb +14 -0
  11. data/config/redmine_config.rb +17 -0
  12. data/config/rep_config.rb +20 -0
  13. data/config/requirements.rb +32 -0
  14. data/config/test_config.rb +20 -0
  15. data/lib/rubyrep/base_runner.rb +195 -0
  16. data/lib/rubyrep/command_runner.rb +144 -0
  17. data/lib/rubyrep/committers/buffered_committer.rb +140 -0
  18. data/lib/rubyrep/committers/committers.rb +146 -0
  19. data/lib/rubyrep/configuration.rb +240 -0
  20. data/lib/rubyrep/connection_extenders/connection_extenders.rb +133 -0
  21. data/lib/rubyrep/connection_extenders/jdbc_extender.rb +284 -0
  22. data/lib/rubyrep/connection_extenders/mysql_extender.rb +168 -0
  23. data/lib/rubyrep/connection_extenders/postgresql_extender.rb +261 -0
  24. data/lib/rubyrep/database_proxy.rb +52 -0
  25. data/lib/rubyrep/direct_table_scan.rb +75 -0
  26. data/lib/rubyrep/generate_runner.rb +105 -0
  27. data/lib/rubyrep/initializer.rb +39 -0
  28. data/lib/rubyrep/logged_change.rb +326 -0
  29. data/lib/rubyrep/proxied_table_scan.rb +171 -0
  30. data/lib/rubyrep/proxy_block_cursor.rb +145 -0
  31. data/lib/rubyrep/proxy_connection.rb +318 -0
  32. data/lib/rubyrep/proxy_cursor.rb +44 -0
  33. data/lib/rubyrep/proxy_row_cursor.rb +43 -0
  34. data/lib/rubyrep/proxy_runner.rb +89 -0
  35. data/lib/rubyrep/replication_difference.rb +91 -0
  36. data/lib/rubyrep/replication_extenders/mysql_replication.rb +271 -0
  37. data/lib/rubyrep/replication_extenders/postgresql_replication.rb +204 -0
  38. data/lib/rubyrep/replication_extenders/replication_extenders.rb +26 -0
  39. data/lib/rubyrep/replication_helper.rb +104 -0
  40. data/lib/rubyrep/replication_initializer.rb +307 -0
  41. data/lib/rubyrep/replication_run.rb +48 -0
  42. data/lib/rubyrep/replication_runner.rb +138 -0
  43. data/lib/rubyrep/replicators/replicators.rb +37 -0
  44. data/lib/rubyrep/replicators/two_way_replicator.rb +334 -0
  45. data/lib/rubyrep/scan_progress_printers/progress_bar.rb +65 -0
  46. data/lib/rubyrep/scan_progress_printers/scan_progress_printers.rb +65 -0
  47. data/lib/rubyrep/scan_report_printers/scan_detail_reporter.rb +111 -0
  48. data/lib/rubyrep/scan_report_printers/scan_report_printers.rb +67 -0
  49. data/lib/rubyrep/scan_report_printers/scan_summary_reporter.rb +75 -0
  50. data/lib/rubyrep/scan_runner.rb +25 -0
  51. data/lib/rubyrep/session.rb +177 -0
  52. data/lib/rubyrep/sync_helper.rb +111 -0
  53. data/lib/rubyrep/sync_runner.rb +31 -0
  54. data/lib/rubyrep/syncers/syncers.rb +112 -0
  55. data/lib/rubyrep/syncers/two_way_syncer.rb +174 -0
  56. data/lib/rubyrep/table_scan.rb +54 -0
  57. data/lib/rubyrep/table_scan_helper.rb +38 -0
  58. data/lib/rubyrep/table_sorter.rb +70 -0
  59. data/lib/rubyrep/table_spec_resolver.rb +136 -0
  60. data/lib/rubyrep/table_sync.rb +68 -0
  61. data/lib/rubyrep/trigger_mode_switcher.rb +63 -0
  62. data/lib/rubyrep/type_casting_cursor.rb +31 -0
  63. data/lib/rubyrep/uninstall_runner.rb +92 -0
  64. data/lib/rubyrep/version.rb +9 -0
  65. data/lib/rubyrep.rb +68 -0
  66. data/script/destroy +14 -0
  67. data/script/generate +14 -0
  68. data/script/txt2html +74 -0
  69. data/setup.rb +1585 -0
  70. data/sims/performance/big_rep_spec.rb +100 -0
  71. data/sims/performance/big_scan_spec.rb +57 -0
  72. data/sims/performance/big_sync_spec.rb +141 -0
  73. data/sims/performance/performance.rake +228 -0
  74. data/sims/sim_helper.rb +24 -0
  75. data/spec/base_runner_spec.rb +218 -0
  76. data/spec/buffered_committer_spec.rb +271 -0
  77. data/spec/command_runner_spec.rb +145 -0
  78. data/spec/committers_spec.rb +174 -0
  79. data/spec/configuration_spec.rb +198 -0
  80. data/spec/connection_extender_interface_spec.rb +138 -0
  81. data/spec/connection_extenders_registration_spec.rb +129 -0
  82. data/spec/database_proxy_spec.rb +48 -0
  83. data/spec/database_rake_spec.rb +40 -0
  84. data/spec/db_specific_connection_extenders_spec.rb +34 -0
  85. data/spec/db_specific_replication_extenders_spec.rb +38 -0
  86. data/spec/direct_table_scan_spec.rb +61 -0
  87. data/spec/generate_runner_spec.rb +84 -0
  88. data/spec/initializer_spec.rb +46 -0
  89. data/spec/logged_change_spec.rb +480 -0
  90. data/spec/postgresql_replication_spec.rb +48 -0
  91. data/spec/postgresql_support_spec.rb +57 -0
  92. data/spec/progress_bar_spec.rb +77 -0
  93. data/spec/proxied_table_scan_spec.rb +151 -0
  94. data/spec/proxy_block_cursor_spec.rb +197 -0
  95. data/spec/proxy_connection_spec.rb +399 -0
  96. data/spec/proxy_cursor_spec.rb +56 -0
  97. data/spec/proxy_row_cursor_spec.rb +66 -0
  98. data/spec/proxy_runner_spec.rb +70 -0
  99. data/spec/replication_difference_spec.rb +160 -0
  100. data/spec/replication_extender_interface_spec.rb +365 -0
  101. data/spec/replication_extenders_spec.rb +32 -0
  102. data/spec/replication_helper_spec.rb +121 -0
  103. data/spec/replication_initializer_spec.rb +477 -0
  104. data/spec/replication_run_spec.rb +166 -0
  105. data/spec/replication_runner_spec.rb +213 -0
  106. data/spec/replicators_spec.rb +31 -0
  107. data/spec/rubyrep_spec.rb +8 -0
  108. data/spec/scan_detail_reporter_spec.rb +119 -0
  109. data/spec/scan_progress_printers_spec.rb +68 -0
  110. data/spec/scan_report_printers_spec.rb +67 -0
  111. data/spec/scan_runner_spec.rb +50 -0
  112. data/spec/scan_summary_reporter_spec.rb +61 -0
  113. data/spec/session_spec.rb +212 -0
  114. data/spec/spec.opts +1 -0
  115. data/spec/spec_helper.rb +295 -0
  116. data/spec/sync_helper_spec.rb +157 -0
  117. data/spec/sync_runner_spec.rb +78 -0
  118. data/spec/syncers_spec.rb +171 -0
  119. data/spec/table_scan_helper_spec.rb +29 -0
  120. data/spec/table_scan_spec.rb +49 -0
  121. data/spec/table_sorter_spec.rb +31 -0
  122. data/spec/table_spec_resolver_spec.rb +102 -0
  123. data/spec/table_sync_spec.rb +84 -0
  124. data/spec/trigger_mode_switcher_spec.rb +83 -0
  125. data/spec/two_way_replicator_spec.rb +551 -0
  126. data/spec/two_way_syncer_spec.rb +256 -0
  127. data/spec/type_casting_cursor_spec.rb +50 -0
  128. data/spec/uninstall_runner_spec.rb +86 -0
  129. data/tasks/database.rake +439 -0
  130. data/tasks/deployment.rake +29 -0
  131. data/tasks/environment.rake +9 -0
  132. data/tasks/java.rake +37 -0
  133. data/tasks/redmine_test.rake +47 -0
  134. data/tasks/rspec.rake +68 -0
  135. data/tasks/rubyrep.tailor +18 -0
  136. data/tasks/stats.rake +19 -0
  137. data/tasks/task_helper.rb +20 -0
  138. data.tar.gz.sig +0 -0
  139. metadata +243 -0
  140. metadata.gz.sig +0 -0
@@ -0,0 +1,326 @@
1
+ module RR
2
+
3
+ class Session
4
+
5
+ # Returns the +LoggedChangeLoader+ of the specified database.
6
+ # * database: either :+left+ or :+right+
7
+ def change_loader(database)
8
+ @change_loaders ||= {}
9
+ unless change_loader = @change_loaders[database]
10
+ change_loader = @change_loaders[database] = LoggedChangeLoader.new(self, database)
11
+ end
12
+ change_loader
13
+ end
14
+
15
+ # Forces an update of the change log cache
16
+ def reload_changes
17
+ change_loader(:left).update :forced => true
18
+ change_loader(:right).update :forced => true
19
+ end
20
+
21
+ end
22
+
23
+ # Caches the entries in the change log table
24
+ class LoggedChangeLoader
25
+
26
+ # The current +Session+.
27
+ attr_accessor :session
28
+
29
+ # The current +ProxyConnection+.
30
+ attr_accessor :connection
31
+
32
+ # Index to the next unprocessed change in the +change_array+.
33
+ attr_accessor :current_index
34
+
35
+ # ID of the last cached change log record.
36
+ attr_accessor :current_id
37
+
38
+ # Array with all cached changes.
39
+ # Processed change log records are replaced with +nil+.
40
+ attr_accessor :change_array
41
+
42
+ # Tree (hash) structure for fast access to all cached changes.
43
+ # First level of tree:
44
+ # * key: table name
45
+ # * value: 2nd level tree
46
+ # 2nd level tree:
47
+ # * key: the change_key value of the according change log records.
48
+ # * value:
49
+ # The according change log record (column_name => value hash).
50
+ # Additional entry of each change log hash:
51
+ # * key: 'array_index'
52
+ # * value: index to the change log record in +change_array+
53
+ attr_accessor :change_tree
54
+
55
+ # Date of last update of the cache
56
+ attr_accessor :last_updated
57
+
58
+ # Initializes / resets the cache.
59
+ def init_cache
60
+ self.change_tree = {}
61
+ self.change_array = []
62
+ self.current_index = 0
63
+ end
64
+ private :init_cache
65
+
66
+ # Create a new change log record cache.
67
+ # * +session+: The current +Session+
68
+ # * +database+: Either :+left+ or :+right+
69
+ def initialize(session, database)
70
+ self.session = session
71
+ self.connection = session.send(database)
72
+
73
+ init_cache
74
+ self.current_id = -1
75
+ self.last_updated = 1.year.ago
76
+ end
77
+
78
+ # Updates the cache.
79
+ # Options is a hash determining when the update is actually executed:
80
+ # * :+expire_time+: cache is older than the given number of seconds
81
+ # * :+forced+: if +true+ update the cache even if not yet expired
82
+ def update(options = {:forced => false, :expire_time => 1})
83
+ return unless options[:forced] or Time.now - self.last_updated >= options[:expire_time]
84
+
85
+ self.last_updated = Time.now
86
+
87
+ cursor = connection.select_cursor(
88
+ :table => change_log_table,
89
+ :from => {'id' => current_id},
90
+ :exclude_starting_row => true,
91
+ :type_cast => true
92
+ )
93
+ while cursor.next?
94
+ change = cursor.next_row
95
+ self.current_id = change['id']
96
+ self.change_array << change
97
+ change['array_index'] = self.change_array.size - 1
98
+
99
+ table_change_tree = change_tree[change['change_table']] ||= {}
100
+ key_changes = table_change_tree[change['change_key']] ||= []
101
+ key_changes << change
102
+ end
103
+ cursor.clear
104
+ end
105
+
106
+ # Returns the creation time of the oldest unprocessed change log record.
107
+ def oldest_change_time
108
+ change = oldest_change
109
+ change['change_time'] if change
110
+ end
111
+
112
+ # Returns the oldest unprocessed change log record (column_name => value hash).
113
+ def oldest_change
114
+ update
115
+ oldest_change = nil
116
+ unless change_array.empty?
117
+ while (oldest_change = change_array[self.current_index]) == nil
118
+ self.current_index += 1
119
+ end
120
+ end
121
+ oldest_change
122
+ end
123
+
124
+ # Returns the specified change log record (column_name => value hash).
125
+ # * +change_table+: the name of the table that was changed
126
+ # * +change_key+: the change key of the modified record
127
+ def load(change_table, change_key)
128
+ update
129
+ change = nil
130
+ table_change_tree = change_tree[change_table]
131
+ if table_change_tree
132
+ key_changes = table_change_tree[change_key]
133
+ if key_changes
134
+ # get change object and delete from key_changes
135
+ change = key_changes.shift
136
+
137
+ # delete change from change_array
138
+ change_array[change['array_index']] = nil
139
+
140
+ # delete change from database
141
+ connection.execute "delete from #{change_log_table} where id = #{change['id']}"
142
+
143
+ # delete key_changes if empty
144
+ if key_changes.empty?
145
+ table_change_tree.delete change_key
146
+ end
147
+
148
+ # delete table_change_tree if empty
149
+ if table_change_tree.empty?
150
+ change_tree.delete change_table
151
+ end
152
+
153
+ # reset everything if no more changes remain
154
+ if change_tree.empty?
155
+ init_cache
156
+ end
157
+ end
158
+ end
159
+ change
160
+ end
161
+
162
+ # Returns the name of the change log table
163
+ def change_log_table
164
+ @change_log_table ||= "#{session.configuration.options[:rep_prefix]}_pending_changes"
165
+ end
166
+ private :change_log_table
167
+ end
168
+
169
+ # Describes a single logged record change.
170
+ #
171
+ # Note:
172
+ # The change loading functionality depends on the current database session
173
+ # being executed in an open database transaction.
174
+ # Also at the end of change processing the transaction must be committed.
175
+ class LoggedChange
176
+
177
+ # The current Session
178
+ attr_accessor :session
179
+
180
+ # The database which was changed. Either :+left+ or :+right+.
181
+ attr_accessor :database
182
+
183
+ # The name of the changed table
184
+ attr_accessor :table
185
+
186
+ # When the first change to the record happened
187
+ attr_accessor :first_changed_at
188
+
189
+ # When the last change to the record happened
190
+ attr_accessor :last_changed_at
191
+
192
+ # Type of the change. Either :+insert+, :+update+ or :+delete+.
193
+ attr_accessor :type
194
+
195
+ # A column_name => value hash identifying the changed record
196
+ attr_accessor :key
197
+
198
+ # Only used for updates: a column_name => value hash of the original primary
199
+ # key of the updated record
200
+ attr_accessor :new_key
201
+
202
+ # Creates a new LoggedChange instance.
203
+ # * +session+: the current Session
204
+ # * +database+: either :+left+ or :+right+
205
+ def initialize(session, database)
206
+ self.session = session
207
+ self.database = database
208
+ self.type = :no_change
209
+ end
210
+
211
+ # A hash describing how the change state morph based on newly found change
212
+ # records.
213
+ # * key: String consisting of 2 letters
214
+ # * first letter: describes current type change (nothing, insert, update, delete)
215
+ # * second letter: the new change type as read of the change log table
216
+ # * value:
217
+ # The resulting change type.
218
+ # [1]: such cases shouldn't happen. but just in case, choose the most
219
+ # sensible solution.
220
+ TYPE_CHANGES = {
221
+ 'NI' => 'I',
222
+ 'NU' => 'U',
223
+ 'ND' => 'D',
224
+ 'II' => 'I', # [1]
225
+ 'IU' => 'I',
226
+ 'ID' => 'N',
227
+ 'UI' => 'U', # [1]
228
+ 'UU' => 'U',
229
+ 'UD' => 'D',
230
+ 'DI' => 'U',
231
+ 'DU' => 'U', # [1]
232
+ 'DD' => 'D', # [1]
233
+ }
234
+
235
+ # A hash translating the short 1-letter types to the according symbols
236
+ SHORT_TYPES = {
237
+ 'I' => :insert,
238
+ 'U' => :update,
239
+ 'D' => :delete,
240
+ 'N' => :no_change
241
+ }
242
+ # A hash translating the symbold types to according 1 letter types
243
+ LONG_TYPES = SHORT_TYPES.invert
244
+
245
+ # Returns the configured key separator
246
+ def key_sep
247
+ @key_sep ||= session.configuration.options[:key_sep]
248
+ end
249
+
250
+ # Returns a column_name => value hash based on the provided +raw_key+ string
251
+ # (which is a string in the format as read directly from the change log table).
252
+ def key_to_hash(raw_key)
253
+ result = {}
254
+ #raw_key.split(key_sep).each_slice(2) {|a| result[a[0]] = a[1]}
255
+ raw_key.split(key_sep).each_slice(2) {|field_name, value| result[field_name] = value}
256
+ result
257
+ end
258
+
259
+ # Loads the change as per #table and #key. Works if the LoggedChange instance
260
+ # is totally new or was already loaded before.
261
+ def load
262
+ current_type = LONG_TYPES[type]
263
+
264
+ org_key = new_key || key
265
+ # change to key string as can be found in change log table
266
+ org_key = session.send(database).primary_key_names(table).map do |key_name|
267
+ "#{key_name}#{key_sep}#{org_key[key_name]}"
268
+ end.join(key_sep)
269
+ current_key = org_key
270
+
271
+ while change = session.change_loader(database).load(table, current_key)
272
+
273
+ new_type = change['change_type']
274
+ current_type = TYPE_CHANGES["#{current_type}#{new_type}"]
275
+
276
+ self.first_changed_at ||= change['change_time']
277
+ self.last_changed_at = change['change_time']
278
+
279
+ if change['change_type'] == 'U' and change['change_new_key'] != current_key
280
+ current_key = change['change_new_key']
281
+ end
282
+ end
283
+
284
+ self.type = SHORT_TYPES[current_type]
285
+ self.new_key = nil
286
+ if type == :update
287
+ self.key ||= key_to_hash(org_key)
288
+ self.new_key = key_to_hash(current_key)
289
+ else
290
+ self.key = key_to_hash(current_key)
291
+ end
292
+ end
293
+
294
+ # Loads the change with the specified key for the named +table+.
295
+ # * +table+: name of the table
296
+ # * +key+: a column_name => value hash for all primary key columns of the table
297
+ def load_specified(table, key)
298
+ self.table = table
299
+ self.key = key
300
+ load
301
+ end
302
+
303
+ # Returns the time of the oldest change. Returns +nil+ if there are no
304
+ # changes left.
305
+ def oldest_change_time
306
+ session.change_loader(database).oldest_change_time
307
+ end
308
+
309
+ # Loads the oldest available change
310
+ def load_oldest
311
+ begin
312
+ change = session.change_loader(database).oldest_change
313
+ break unless change
314
+ self.key = key_to_hash(change['change_key'])
315
+ self.table = change['change_table']
316
+ load
317
+ end until type != :no_change
318
+ end
319
+
320
+ # Prevents session from going into YAML output
321
+ def to_yaml_properties
322
+ instance_variables.sort.reject {|var_name| var_name == '@session'}
323
+ end
324
+
325
+ end
326
+ end
@@ -0,0 +1,171 @@
1
+ module RR
2
+
3
+ # Scans two tables for differences. Goes through a RubyRep Proxy to minimize network load.
4
+ # Doesn't have any reporting functionality by itself.
5
+ # Instead ProxiedTableScan#run yields all the differences for the caller to do with as it pleases.
6
+ # Usage:
7
+ # 1. Create a new ProxiedTableScan object and hand it all necessary information
8
+ # 2. Call ProxiedTableScan#run to do the actual comparison
9
+ # 3. The block handed to ProxiedTableScan#run receives all differences
10
+ class ProxiedTableScan < TableScan
11
+
12
+ # returns block size to use for table scanning
13
+ def block_size
14
+ @block_size ||= session.configuration.options_for_table(left_table)[:proxy_block_size]
15
+ end
16
+
17
+ # Creates a new ProxiedTableScan instance
18
+ # * session: a Session object representing the current database session
19
+ # * left_table: name of the table in the left database
20
+ # * right_table: name of the table in the right database. If not given, same like left_table
21
+ def initialize(session, left_table, right_table = nil)
22
+ raise "#{self.class.name} only works with proxied sessions" unless session.proxied?
23
+
24
+ super
25
+ end
26
+
27
+ # Compares the specified left and right rows.
28
+ # +left_cursor+ and +right_cursor+ represent the according ProxyBlockCursor objects.
29
+ # Yields all identified differences with
30
+ # * diff_type
31
+ # * row
32
+ # #run described the yield parameters in detail.
33
+ def compare_blocks(left_block_cursor, right_block_cursor)
34
+ left_cursor = right_cursor = nil
35
+
36
+ left_row_checksums = left_block_cursor.row_checksums
37
+ right_row_checksums = right_block_cursor.row_checksums
38
+
39
+ # phase 1: identify the different rows and store their primary keys
40
+ left_diff_rows = []
41
+ left_diff_checksums = []
42
+ right_diff_rows = []
43
+ right_diff_checksums = []
44
+ i = k = 0
45
+ while i < left_row_checksums.size or k < right_row_checksums.size
46
+ left_keys = i < left_row_checksums.size ? left_row_checksums[i][:row_keys] : nil
47
+ right_keys = k < right_row_checksums.size ? right_row_checksums[k][:row_keys] : nil
48
+ rank = rank_rows left_keys, right_keys
49
+ case rank
50
+ when -1
51
+ left_diff_rows << left_keys
52
+ left_diff_checksums << left_row_checksums[i][:checksum]
53
+ i += 1
54
+ when 1
55
+ right_diff_rows << right_keys
56
+ right_diff_checksums << right_row_checksums[k][:checksum]
57
+ k += 1
58
+ when 0
59
+ if left_row_checksums[i][:checksum] != right_row_checksums[k][:checksum]
60
+ left_diff_rows << left_keys
61
+ left_diff_checksums << left_row_checksums[i][:checksum]
62
+ right_diff_rows << right_keys
63
+ right_diff_checksums << right_row_checksums[k][:checksum]
64
+ end
65
+ i += 1
66
+ k += 1
67
+ end
68
+ end
69
+
70
+ # retrieve possibly existing cached rows from the block cursors
71
+ left_row_cache = left_block_cursor.retrieve_row_cache left_diff_checksums
72
+ right_row_cache = right_block_cursor.retrieve_row_cache right_diff_checksums
73
+
74
+ # builds arrays of row keys for rows that were not included in the hash
75
+ left_uncached_rows = []
76
+ left_diff_rows.each_with_index do |row, i|
77
+ left_uncached_rows << row unless left_row_cache[left_diff_checksums[i]]
78
+ end
79
+ right_uncached_rows = []
80
+ right_diff_rows.each_with_index do |row, i|
81
+ right_uncached_rows << row unless right_row_cache[right_diff_checksums[i]]
82
+ end
83
+
84
+ # phase 2: read all different rows and yield them
85
+ unless left_uncached_rows.empty?
86
+ left_cursor = session.left.create_cursor \
87
+ ProxyRowCursor, left_table, :row_keys => left_uncached_rows
88
+ end
89
+ unless right_uncached_rows.empty?
90
+ right_cursor = session.right.create_cursor \
91
+ ProxyRowCursor, right_table, :row_keys => right_uncached_rows
92
+ end
93
+ i = k = 0
94
+ while i < left_diff_rows.size or k < right_diff_rows.size
95
+ rank = rank_rows left_diff_rows[i], right_diff_rows[k]
96
+ case rank
97
+ when -1
98
+ if left_row_cache.include? left_diff_checksums[i]
99
+ row = Marshal.load(left_row_cache[left_diff_checksums[i]])
100
+ else
101
+ row = left_cursor.next_row
102
+ end
103
+ yield :left, row
104
+ i += 1
105
+ when 1
106
+ if right_row_cache.include? right_diff_checksums[k]
107
+ row = Marshal.load(right_row_cache[right_diff_checksums[k]])
108
+ else
109
+ row = right_cursor.next_row
110
+ end
111
+ yield :right, row
112
+ k += 1
113
+ when 0
114
+ if left_row_cache.include? left_diff_checksums[i]
115
+ left_row = Marshal.load(left_row_cache[left_diff_checksums[i]])
116
+ else
117
+ left_row = left_cursor.next_row
118
+ end
119
+ if right_row_cache.include? right_diff_checksums[k]
120
+ right_row = Marshal.load(right_row_cache[right_diff_checksums[k]])
121
+ else
122
+ row = right_cursor.next_row
123
+ end
124
+ yield :conflict, [left_row, right_row]
125
+ i += 1
126
+ k += 1
127
+ end
128
+ end
129
+ ensure
130
+ session.left.destroy_cursor left_cursor if left_cursor
131
+ session.right.destroy_cursor right_cursor if right_cursor
132
+ end
133
+
134
+ # Runs the table scan.
135
+ # Calls the block for every found difference.
136
+ # Differences are yielded with 2 parameters
137
+ # * type: describes the difference, either :left (row only in left table), :right (row only in right table) or :conflict
138
+ # * row: for :left or :right cases a hash describing the row; for :conflict an array of left and right row
139
+ def run(&blck)
140
+ left_cursor = right_cursor = nil
141
+ left_cursor = session.left.create_cursor ProxyBlockCursor, left_table,
142
+ :row_buffer_size => scan_options[:row_buffer_size]
143
+ right_cursor = session.right.create_cursor ProxyBlockCursor, right_table,
144
+ :row_buffer_size => scan_options[:row_buffer_size]
145
+ update_progress 0 # ensures progress bar is printed even if there are no records
146
+ while left_cursor.next?
147
+ left_to, left_checksum, left_progress =
148
+ left_cursor.checksum :proxy_block_size => block_size
149
+ _ , right_checksum, right_progress =
150
+ right_cursor.checksum :max_row => left_to
151
+ combined_progress = left_progress + right_progress
152
+ if left_checksum != right_checksum
153
+ compare_blocks left_cursor, right_cursor do |type, row|
154
+ steps = type == :conflict ? 2 : 1
155
+ update_progress steps
156
+ combined_progress -= steps
157
+ yield type, row
158
+ end
159
+ end
160
+ update_progress combined_progress
161
+ end
162
+ while right_cursor.next?
163
+ update_progress 1
164
+ yield :right, right_cursor.next_row
165
+ end
166
+ ensure
167
+ session.left.destroy_cursor left_cursor if left_cursor
168
+ session.right.destroy_cursor right_cursor if right_cursor
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,145 @@
1
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/..'
2
+
3
+ require 'digest/sha1'
4
+
5
+ require 'rubyrep'
6
+
7
+ module RR
8
+
9
+ # This class is used to scan a table in blocks.
10
+ # Calculates the checksums of the scanned blocks.
11
+ class ProxyBlockCursor < ProxyCursor
12
+
13
+ include TableScanHelper
14
+
15
+ # The current Digest
16
+ attr_accessor :digest
17
+
18
+ # nil if the last run of the checksum method left no unprocessed row.
19
+ # Otherwise the left over row of that checksum run
20
+ attr_accessor :last_row
21
+
22
+ # Returns an array of checksums for each encounters row.
23
+ # Each array element is a Hash with the following elements:
24
+ # * +:row_keys+: A primary key => value hash identifying the row
25
+ # * +:checksum+: the checksum for this row
26
+ attr_accessor :row_checksums
27
+
28
+ # The maximum total size (in bytes) up to which rows will be cached
29
+ attr_accessor :max_row_cache_size
30
+
31
+ # A byte counter of many bytes of row data have already been cached
32
+ attr_accessor :current_row_cache_size
33
+
34
+ # A hash of cached rows consisting of row checksum => row dump pairs.
35
+ attr_accessor :row_cache
36
+
37
+ # Creates a new cursor
38
+ # * session: the current proxy session
39
+ # * table: table_name
40
+ def initialize(session, table)
41
+ self.max_row_cache_size = 1000000 # this size should be sufficient as long as table doesn't contain blobs
42
+ super
43
+ end
44
+
45
+ # Returns true if the current cursor has unprocessed rows
46
+ def next?
47
+ last_row != nil or cursor.next?
48
+ end
49
+
50
+ # Returns the cursor's next row
51
+ def next_row
52
+ if self.last_row
53
+ row, self.last_row = self.last_row, nil
54
+ else
55
+ row = cursor.next_row
56
+ end
57
+ row
58
+ end
59
+
60
+ # Returns a hash of row checksum => row dump pairs for the +checksums+
61
+ # in the provided array
62
+ def retrieve_row_cache(checksums)
63
+ row_dumps = {}
64
+ checksums.each do |checksum|
65
+ row_dumps[checksum] = row_cache[checksum] if row_cache.include? checksum
66
+ end
67
+ row_dumps
68
+ end
69
+
70
+ # Updates block / row checksums and row cache with the given +row+.
71
+ def update_checksum(row)
72
+ dump = Marshal.dump(row)
73
+
74
+ # updates row checksum array
75
+ row_keys = row.reject {|key, | not primary_key_names.include? key}
76
+ checksum = Digest::SHA1.hexdigest(dump)
77
+ self.row_checksums << {:row_keys => row_keys, :checksum => checksum}
78
+
79
+ # update the row cache (unless maximum cache size limit has already been reached)
80
+ if current_row_cache_size + dump.size < max_row_cache_size
81
+ self.current_row_cache_size += dump.size
82
+ row_cache[checksum] = dump
83
+ end
84
+
85
+ # update current total checksum
86
+ self.digest << dump
87
+ end
88
+
89
+ # Reinitializes the row checksum array and the total checksum
90
+ def reset_checksum
91
+ self.row_checksums = []
92
+ self.current_row_cache_size = 0
93
+ self.row_cache = {}
94
+ self.digest = Digest::SHA1.new
95
+ end
96
+
97
+ # Returns the current checksum
98
+ def current_checksum
99
+ self.digest.hexdigest
100
+ end
101
+
102
+ # Calculates the checksum from the current row up to the row specified by options.
103
+ # options is a hash including either
104
+ # * :+proxy_block_size+: The number of rows to scan.
105
+ # * :+max_row+: A row hash of primary key columns specifying the maximum record to scan.
106
+ # Returns multiple parameters:
107
+ # * last row read
108
+ # * checksum
109
+ # * number of processed records
110
+ def checksum(options = {})
111
+ reset_checksum
112
+ return_row = row = nil
113
+ row_count = 0
114
+
115
+ if options.include? :proxy_block_size
116
+ block_size = options[:proxy_block_size]
117
+ raise ":proxy_block_size must be greater than 0" unless block_size > 0
118
+ while row_count < block_size and next?
119
+ row = next_row
120
+ update_checksum(row)
121
+ row_count += 1
122
+ end
123
+ return_row = row
124
+ elsif options.include? :max_row
125
+ max_row = options[:max_row]
126
+ while next?
127
+ row = next_row
128
+ rank = rank_rows row, max_row
129
+ if rank > 0
130
+ # row > max_row ==> save the current row and break off
131
+ self.last_row = row
132
+ break
133
+ end
134
+ row_count += 1
135
+ update_checksum(row)
136
+ return_row, row = row, nil
137
+ end
138
+ else
139
+ raise "options must include either :proxy_block_size or :max_row"
140
+ end
141
+ return_keys = return_row.reject {|key, | not primary_key_names.include? key} if return_row
142
+ return return_keys, current_checksum, row_count
143
+ end
144
+ end
145
+ end