rubyrep 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +137 -0
  4. data/README.txt +37 -0
  5. data/Rakefile +30 -0
  6. data/bin/rubyrep +8 -0
  7. data/config/hoe.rb +72 -0
  8. data/config/mysql_config.rb +25 -0
  9. data/config/postgres_config.rb +21 -0
  10. data/config/proxied_test_config.rb +14 -0
  11. data/config/redmine_config.rb +17 -0
  12. data/config/rep_config.rb +20 -0
  13. data/config/requirements.rb +32 -0
  14. data/config/test_config.rb +20 -0
  15. data/lib/rubyrep/base_runner.rb +195 -0
  16. data/lib/rubyrep/command_runner.rb +144 -0
  17. data/lib/rubyrep/committers/buffered_committer.rb +140 -0
  18. data/lib/rubyrep/committers/committers.rb +146 -0
  19. data/lib/rubyrep/configuration.rb +240 -0
  20. data/lib/rubyrep/connection_extenders/connection_extenders.rb +133 -0
  21. data/lib/rubyrep/connection_extenders/jdbc_extender.rb +284 -0
  22. data/lib/rubyrep/connection_extenders/mysql_extender.rb +168 -0
  23. data/lib/rubyrep/connection_extenders/postgresql_extender.rb +261 -0
  24. data/lib/rubyrep/database_proxy.rb +52 -0
  25. data/lib/rubyrep/direct_table_scan.rb +75 -0
  26. data/lib/rubyrep/generate_runner.rb +105 -0
  27. data/lib/rubyrep/initializer.rb +39 -0
  28. data/lib/rubyrep/logged_change.rb +326 -0
  29. data/lib/rubyrep/proxied_table_scan.rb +171 -0
  30. data/lib/rubyrep/proxy_block_cursor.rb +145 -0
  31. data/lib/rubyrep/proxy_connection.rb +318 -0
  32. data/lib/rubyrep/proxy_cursor.rb +44 -0
  33. data/lib/rubyrep/proxy_row_cursor.rb +43 -0
  34. data/lib/rubyrep/proxy_runner.rb +89 -0
  35. data/lib/rubyrep/replication_difference.rb +91 -0
  36. data/lib/rubyrep/replication_extenders/mysql_replication.rb +271 -0
  37. data/lib/rubyrep/replication_extenders/postgresql_replication.rb +204 -0
  38. data/lib/rubyrep/replication_extenders/replication_extenders.rb +26 -0
  39. data/lib/rubyrep/replication_helper.rb +104 -0
  40. data/lib/rubyrep/replication_initializer.rb +307 -0
  41. data/lib/rubyrep/replication_run.rb +48 -0
  42. data/lib/rubyrep/replication_runner.rb +138 -0
  43. data/lib/rubyrep/replicators/replicators.rb +37 -0
  44. data/lib/rubyrep/replicators/two_way_replicator.rb +334 -0
  45. data/lib/rubyrep/scan_progress_printers/progress_bar.rb +65 -0
  46. data/lib/rubyrep/scan_progress_printers/scan_progress_printers.rb +65 -0
  47. data/lib/rubyrep/scan_report_printers/scan_detail_reporter.rb +111 -0
  48. data/lib/rubyrep/scan_report_printers/scan_report_printers.rb +67 -0
  49. data/lib/rubyrep/scan_report_printers/scan_summary_reporter.rb +75 -0
  50. data/lib/rubyrep/scan_runner.rb +25 -0
  51. data/lib/rubyrep/session.rb +177 -0
  52. data/lib/rubyrep/sync_helper.rb +111 -0
  53. data/lib/rubyrep/sync_runner.rb +31 -0
  54. data/lib/rubyrep/syncers/syncers.rb +112 -0
  55. data/lib/rubyrep/syncers/two_way_syncer.rb +174 -0
  56. data/lib/rubyrep/table_scan.rb +54 -0
  57. data/lib/rubyrep/table_scan_helper.rb +38 -0
  58. data/lib/rubyrep/table_sorter.rb +70 -0
  59. data/lib/rubyrep/table_spec_resolver.rb +136 -0
  60. data/lib/rubyrep/table_sync.rb +68 -0
  61. data/lib/rubyrep/trigger_mode_switcher.rb +63 -0
  62. data/lib/rubyrep/type_casting_cursor.rb +31 -0
  63. data/lib/rubyrep/uninstall_runner.rb +92 -0
  64. data/lib/rubyrep/version.rb +9 -0
  65. data/lib/rubyrep.rb +68 -0
  66. data/script/destroy +14 -0
  67. data/script/generate +14 -0
  68. data/script/txt2html +74 -0
  69. data/setup.rb +1585 -0
  70. data/sims/performance/big_rep_spec.rb +100 -0
  71. data/sims/performance/big_scan_spec.rb +57 -0
  72. data/sims/performance/big_sync_spec.rb +141 -0
  73. data/sims/performance/performance.rake +228 -0
  74. data/sims/sim_helper.rb +24 -0
  75. data/spec/base_runner_spec.rb +218 -0
  76. data/spec/buffered_committer_spec.rb +271 -0
  77. data/spec/command_runner_spec.rb +145 -0
  78. data/spec/committers_spec.rb +174 -0
  79. data/spec/configuration_spec.rb +198 -0
  80. data/spec/connection_extender_interface_spec.rb +138 -0
  81. data/spec/connection_extenders_registration_spec.rb +129 -0
  82. data/spec/database_proxy_spec.rb +48 -0
  83. data/spec/database_rake_spec.rb +40 -0
  84. data/spec/db_specific_connection_extenders_spec.rb +34 -0
  85. data/spec/db_specific_replication_extenders_spec.rb +38 -0
  86. data/spec/direct_table_scan_spec.rb +61 -0
  87. data/spec/generate_runner_spec.rb +84 -0
  88. data/spec/initializer_spec.rb +46 -0
  89. data/spec/logged_change_spec.rb +480 -0
  90. data/spec/postgresql_replication_spec.rb +48 -0
  91. data/spec/postgresql_support_spec.rb +57 -0
  92. data/spec/progress_bar_spec.rb +77 -0
  93. data/spec/proxied_table_scan_spec.rb +151 -0
  94. data/spec/proxy_block_cursor_spec.rb +197 -0
  95. data/spec/proxy_connection_spec.rb +399 -0
  96. data/spec/proxy_cursor_spec.rb +56 -0
  97. data/spec/proxy_row_cursor_spec.rb +66 -0
  98. data/spec/proxy_runner_spec.rb +70 -0
  99. data/spec/replication_difference_spec.rb +160 -0
  100. data/spec/replication_extender_interface_spec.rb +365 -0
  101. data/spec/replication_extenders_spec.rb +32 -0
  102. data/spec/replication_helper_spec.rb +121 -0
  103. data/spec/replication_initializer_spec.rb +477 -0
  104. data/spec/replication_run_spec.rb +166 -0
  105. data/spec/replication_runner_spec.rb +213 -0
  106. data/spec/replicators_spec.rb +31 -0
  107. data/spec/rubyrep_spec.rb +8 -0
  108. data/spec/scan_detail_reporter_spec.rb +119 -0
  109. data/spec/scan_progress_printers_spec.rb +68 -0
  110. data/spec/scan_report_printers_spec.rb +67 -0
  111. data/spec/scan_runner_spec.rb +50 -0
  112. data/spec/scan_summary_reporter_spec.rb +61 -0
  113. data/spec/session_spec.rb +212 -0
  114. data/spec/spec.opts +1 -0
  115. data/spec/spec_helper.rb +295 -0
  116. data/spec/sync_helper_spec.rb +157 -0
  117. data/spec/sync_runner_spec.rb +78 -0
  118. data/spec/syncers_spec.rb +171 -0
  119. data/spec/table_scan_helper_spec.rb +29 -0
  120. data/spec/table_scan_spec.rb +49 -0
  121. data/spec/table_sorter_spec.rb +31 -0
  122. data/spec/table_spec_resolver_spec.rb +102 -0
  123. data/spec/table_sync_spec.rb +84 -0
  124. data/spec/trigger_mode_switcher_spec.rb +83 -0
  125. data/spec/two_way_replicator_spec.rb +551 -0
  126. data/spec/two_way_syncer_spec.rb +256 -0
  127. data/spec/type_casting_cursor_spec.rb +50 -0
  128. data/spec/uninstall_runner_spec.rb +86 -0
  129. data/tasks/database.rake +439 -0
  130. data/tasks/deployment.rake +29 -0
  131. data/tasks/environment.rake +9 -0
  132. data/tasks/java.rake +37 -0
  133. data/tasks/redmine_test.rake +47 -0
  134. data/tasks/rspec.rake +68 -0
  135. data/tasks/rubyrep.tailor +18 -0
  136. data/tasks/stats.rake +19 -0
  137. data/tasks/task_helper.rb +20 -0
  138. data.tar.gz.sig +0 -0
  139. metadata +243 -0
  140. metadata.gz.sig +0 -0
@@ -0,0 +1,326 @@
1
+ module RR
2
+
3
+ class Session
4
+
5
+ # Returns the +LoggedChangeLoader+ of the specified database.
6
+ # * database: either :+left+ or :+right+
7
+ def change_loader(database)
8
+ @change_loaders ||= {}
9
+ unless change_loader = @change_loaders[database]
10
+ change_loader = @change_loaders[database] = LoggedChangeLoader.new(self, database)
11
+ end
12
+ change_loader
13
+ end
14
+
15
+ # Forces an update of the change log cache
16
+ def reload_changes
17
+ change_loader(:left).update :forced => true
18
+ change_loader(:right).update :forced => true
19
+ end
20
+
21
+ end
22
+
23
+ # Caches the entries in the change log table
24
+ class LoggedChangeLoader
25
+
26
+ # The current +Session+.
27
+ attr_accessor :session
28
+
29
+ # The current +ProxyConnection+.
30
+ attr_accessor :connection
31
+
32
+ # Index to the next unprocessed change in the +change_array+.
33
+ attr_accessor :current_index
34
+
35
+ # ID of the last cached change log record.
36
+ attr_accessor :current_id
37
+
38
+ # Array with all cached changes.
39
+ # Processed change log records are replaced with +nil+.
40
+ attr_accessor :change_array
41
+
42
+ # Tree (hash) structure for fast access to all cached changes.
43
+ # First level of tree:
44
+ # * key: table name
45
+ # * value: 2nd level tree
46
+ # 2nd level tree:
47
+ # * key: the change_key value of the according change log records.
48
+ # * value:
49
+ # The according change log record (column_name => value hash).
50
+ # Additional entry of each change log hash:
51
+ # * key: 'array_index'
52
+ # * value: index to the change log record in +change_array+
53
+ attr_accessor :change_tree
54
+
55
+ # Date of last update of the cache
56
+ attr_accessor :last_updated
57
+
58
+ # Initializes / resets the cache.
59
+ def init_cache
60
+ self.change_tree = {}
61
+ self.change_array = []
62
+ self.current_index = 0
63
+ end
64
+ private :init_cache
65
+
66
+ # Create a new change log record cache.
67
+ # * +session+: The current +Session+
68
+ # * +database+: Either :+left+ or :+right+
69
+ def initialize(session, database)
70
+ self.session = session
71
+ self.connection = session.send(database)
72
+
73
+ init_cache
74
+ self.current_id = -1
75
+ self.last_updated = 1.year.ago
76
+ end
77
+
78
+ # Updates the cache.
79
+ # Options is a hash determining when the update is actually executed:
80
+ # * :+expire_time+: cache is older than the given number of seconds
81
+ # * :+forced+: if +true+ update the cache even if not yet expired
82
+ def update(options = {:forced => false, :expire_time => 1})
83
+ return unless options[:forced] or Time.now - self.last_updated >= options[:expire_time]
84
+
85
+ self.last_updated = Time.now
86
+
87
+ cursor = connection.select_cursor(
88
+ :table => change_log_table,
89
+ :from => {'id' => current_id},
90
+ :exclude_starting_row => true,
91
+ :type_cast => true
92
+ )
93
+ while cursor.next?
94
+ change = cursor.next_row
95
+ self.current_id = change['id']
96
+ self.change_array << change
97
+ change['array_index'] = self.change_array.size - 1
98
+
99
+ table_change_tree = change_tree[change['change_table']] ||= {}
100
+ key_changes = table_change_tree[change['change_key']] ||= []
101
+ key_changes << change
102
+ end
103
+ cursor.clear
104
+ end
105
+
106
+ # Returns the creation time of the oldest unprocessed change log record.
107
+ def oldest_change_time
108
+ change = oldest_change
109
+ change['change_time'] if change
110
+ end
111
+
112
+ # Returns the oldest unprocessed change log record (column_name => value hash).
113
+ def oldest_change
114
+ update
115
+ oldest_change = nil
116
+ unless change_array.empty?
117
+ while (oldest_change = change_array[self.current_index]) == nil
118
+ self.current_index += 1
119
+ end
120
+ end
121
+ oldest_change
122
+ end
123
+
124
+ # Returns the specified change log record (column_name => value hash).
125
+ # * +change_table+: the name of the table that was changed
126
+ # * +change_key+: the change key of the modified record
127
+ def load(change_table, change_key)
128
+ update
129
+ change = nil
130
+ table_change_tree = change_tree[change_table]
131
+ if table_change_tree
132
+ key_changes = table_change_tree[change_key]
133
+ if key_changes
134
+ # get change object and delete from key_changes
135
+ change = key_changes.shift
136
+
137
+ # delete change from change_array
138
+ change_array[change['array_index']] = nil
139
+
140
+ # delete change from database
141
+ connection.execute "delete from #{change_log_table} where id = #{change['id']}"
142
+
143
+ # delete key_changes if empty
144
+ if key_changes.empty?
145
+ table_change_tree.delete change_key
146
+ end
147
+
148
+ # delete table_change_tree if empty
149
+ if table_change_tree.empty?
150
+ change_tree.delete change_table
151
+ end
152
+
153
+ # reset everything if no more changes remain
154
+ if change_tree.empty?
155
+ init_cache
156
+ end
157
+ end
158
+ end
159
+ change
160
+ end
161
+
162
+ # Returns the name of the change log table
163
+ def change_log_table
164
+ @change_log_table ||= "#{session.configuration.options[:rep_prefix]}_pending_changes"
165
+ end
166
+ private :change_log_table
167
+ end
168
+
169
+ # Describes a single logged record change.
170
+ #
171
+ # Note:
172
+ # The change loading functionality depends on the current database session
173
+ # being executed in an open database transaction.
174
+ # Also at the end of change processing the transaction must be committed.
175
+ class LoggedChange
176
+
177
+ # The current Session
178
+ attr_accessor :session
179
+
180
+ # The database which was changed. Either :+left+ or :+right+.
181
+ attr_accessor :database
182
+
183
+ # The name of the changed table
184
+ attr_accessor :table
185
+
186
+ # When the first change to the record happened
187
+ attr_accessor :first_changed_at
188
+
189
+ # When the last change to the record happened
190
+ attr_accessor :last_changed_at
191
+
192
+ # Type of the change. Either :+insert+, :+update+ or :+delete+.
193
+ attr_accessor :type
194
+
195
+ # A column_name => value hash identifying the changed record
196
+ attr_accessor :key
197
+
198
+ # Only used for updates: a column_name => value hash of the original primary
199
+ # key of the updated record
200
+ attr_accessor :new_key
201
+
202
+ # Creates a new LoggedChange instance.
203
+ # * +session+: the current Session
204
+ # * +database+: either :+left+ or :+right+
205
+ def initialize(session, database)
206
+ self.session = session
207
+ self.database = database
208
+ self.type = :no_change
209
+ end
210
+
211
+ # A hash describing how the change state morph based on newly found change
212
+ # records.
213
+ # * key: String consisting of 2 letters
214
+ # * first letter: describes current type change (nothing, insert, update, delete)
215
+ # * second letter: the new change type as read of the change log table
216
+ # * value:
217
+ # The resulting change type.
218
+ # [1]: such cases shouldn't happen. but just in case, choose the most
219
+ # sensible solution.
220
+ TYPE_CHANGES = {
221
+ 'NI' => 'I',
222
+ 'NU' => 'U',
223
+ 'ND' => 'D',
224
+ 'II' => 'I', # [1]
225
+ 'IU' => 'I',
226
+ 'ID' => 'N',
227
+ 'UI' => 'U', # [1]
228
+ 'UU' => 'U',
229
+ 'UD' => 'D',
230
+ 'DI' => 'U',
231
+ 'DU' => 'U', # [1]
232
+ 'DD' => 'D', # [1]
233
+ }
234
+
235
+ # A hash translating the short 1-letter types to the according symbols
236
+ SHORT_TYPES = {
237
+ 'I' => :insert,
238
+ 'U' => :update,
239
+ 'D' => :delete,
240
+ 'N' => :no_change
241
+ }
242
+ # A hash translating the symbold types to according 1 letter types
243
+ LONG_TYPES = SHORT_TYPES.invert
244
+
245
+ # Returns the configured key separator
246
+ def key_sep
247
+ @key_sep ||= session.configuration.options[:key_sep]
248
+ end
249
+
250
+ # Returns a column_name => value hash based on the provided +raw_key+ string
251
+ # (which is a string in the format as read directly from the change log table).
252
+ def key_to_hash(raw_key)
253
+ result = {}
254
+ #raw_key.split(key_sep).each_slice(2) {|a| result[a[0]] = a[1]}
255
+ raw_key.split(key_sep).each_slice(2) {|field_name, value| result[field_name] = value}
256
+ result
257
+ end
258
+
259
+ # Loads the change as per #table and #key. Works if the LoggedChange instance
260
+ # is totally new or was already loaded before.
261
+ def load
262
+ current_type = LONG_TYPES[type]
263
+
264
+ org_key = new_key || key
265
+ # change to key string as can be found in change log table
266
+ org_key = session.send(database).primary_key_names(table).map do |key_name|
267
+ "#{key_name}#{key_sep}#{org_key[key_name]}"
268
+ end.join(key_sep)
269
+ current_key = org_key
270
+
271
+ while change = session.change_loader(database).load(table, current_key)
272
+
273
+ new_type = change['change_type']
274
+ current_type = TYPE_CHANGES["#{current_type}#{new_type}"]
275
+
276
+ self.first_changed_at ||= change['change_time']
277
+ self.last_changed_at = change['change_time']
278
+
279
+ if change['change_type'] == 'U' and change['change_new_key'] != current_key
280
+ current_key = change['change_new_key']
281
+ end
282
+ end
283
+
284
+ self.type = SHORT_TYPES[current_type]
285
+ self.new_key = nil
286
+ if type == :update
287
+ self.key ||= key_to_hash(org_key)
288
+ self.new_key = key_to_hash(current_key)
289
+ else
290
+ self.key = key_to_hash(current_key)
291
+ end
292
+ end
293
+
294
+ # Loads the change with the specified key for the named +table+.
295
+ # * +table+: name of the table
296
+ # * +key+: a column_name => value hash for all primary key columns of the table
297
+ def load_specified(table, key)
298
+ self.table = table
299
+ self.key = key
300
+ load
301
+ end
302
+
303
+ # Returns the time of the oldest change. Returns +nil+ if there are no
304
+ # changes left.
305
+ def oldest_change_time
306
+ session.change_loader(database).oldest_change_time
307
+ end
308
+
309
+ # Loads the oldest available change
310
+ def load_oldest
311
+ begin
312
+ change = session.change_loader(database).oldest_change
313
+ break unless change
314
+ self.key = key_to_hash(change['change_key'])
315
+ self.table = change['change_table']
316
+ load
317
+ end until type != :no_change
318
+ end
319
+
320
+ # Prevents session from going into YAML output
321
+ def to_yaml_properties
322
+ instance_variables.sort.reject {|var_name| var_name == '@session'}
323
+ end
324
+
325
+ end
326
+ end
@@ -0,0 +1,171 @@
1
+ module RR
2
+
3
+ # Scans two tables for differences. Goes through a RubyRep Proxy to minimize network load.
4
+ # Doesn't have any reporting functionality by itself.
5
+ # Instead ProxiedTableScan#run yields all the differences for the caller to do with as it pleases.
6
+ # Usage:
7
+ # 1. Create a new ProxiedTableScan object and hand it all necessary information
8
+ # 2. Call ProxiedTableScan#run to do the actual comparison
9
+ # 3. The block handed to ProxiedTableScan#run receives all differences
10
+ class ProxiedTableScan < TableScan
11
+
12
+ # returns block size to use for table scanning
13
+ def block_size
14
+ @block_size ||= session.configuration.options_for_table(left_table)[:proxy_block_size]
15
+ end
16
+
17
+ # Creates a new ProxiedTableScan instance
18
+ # * session: a Session object representing the current database session
19
+ # * left_table: name of the table in the left database
20
+ # * right_table: name of the table in the right database. If not given, same like left_table
21
+ def initialize(session, left_table, right_table = nil)
22
+ raise "#{self.class.name} only works with proxied sessions" unless session.proxied?
23
+
24
+ super
25
+ end
26
+
27
+ # Compares the specified left and right rows.
28
+ # +left_cursor+ and +right_cursor+ represent the according ProxyBlockCursor objects.
29
+ # Yields all identified differences with
30
+ # * diff_type
31
+ # * row
32
+ # #run described the yield parameters in detail.
33
+ def compare_blocks(left_block_cursor, right_block_cursor)
34
+ left_cursor = right_cursor = nil
35
+
36
+ left_row_checksums = left_block_cursor.row_checksums
37
+ right_row_checksums = right_block_cursor.row_checksums
38
+
39
+ # phase 1: identify the different rows and store their primary keys
40
+ left_diff_rows = []
41
+ left_diff_checksums = []
42
+ right_diff_rows = []
43
+ right_diff_checksums = []
44
+ i = k = 0
45
+ while i < left_row_checksums.size or k < right_row_checksums.size
46
+ left_keys = i < left_row_checksums.size ? left_row_checksums[i][:row_keys] : nil
47
+ right_keys = k < right_row_checksums.size ? right_row_checksums[k][:row_keys] : nil
48
+ rank = rank_rows left_keys, right_keys
49
+ case rank
50
+ when -1
51
+ left_diff_rows << left_keys
52
+ left_diff_checksums << left_row_checksums[i][:checksum]
53
+ i += 1
54
+ when 1
55
+ right_diff_rows << right_keys
56
+ right_diff_checksums << right_row_checksums[k][:checksum]
57
+ k += 1
58
+ when 0
59
+ if left_row_checksums[i][:checksum] != right_row_checksums[k][:checksum]
60
+ left_diff_rows << left_keys
61
+ left_diff_checksums << left_row_checksums[i][:checksum]
62
+ right_diff_rows << right_keys
63
+ right_diff_checksums << right_row_checksums[k][:checksum]
64
+ end
65
+ i += 1
66
+ k += 1
67
+ end
68
+ end
69
+
70
+ # retrieve possibly existing cached rows from the block cursors
71
+ left_row_cache = left_block_cursor.retrieve_row_cache left_diff_checksums
72
+ right_row_cache = right_block_cursor.retrieve_row_cache right_diff_checksums
73
+
74
+ # builds arrays of row keys for rows that were not included in the hash
75
+ left_uncached_rows = []
76
+ left_diff_rows.each_with_index do |row, i|
77
+ left_uncached_rows << row unless left_row_cache[left_diff_checksums[i]]
78
+ end
79
+ right_uncached_rows = []
80
+ right_diff_rows.each_with_index do |row, i|
81
+ right_uncached_rows << row unless right_row_cache[right_diff_checksums[i]]
82
+ end
83
+
84
+ # phase 2: read all different rows and yield them
85
+ unless left_uncached_rows.empty?
86
+ left_cursor = session.left.create_cursor \
87
+ ProxyRowCursor, left_table, :row_keys => left_uncached_rows
88
+ end
89
+ unless right_uncached_rows.empty?
90
+ right_cursor = session.right.create_cursor \
91
+ ProxyRowCursor, right_table, :row_keys => right_uncached_rows
92
+ end
93
+ i = k = 0
94
+ while i < left_diff_rows.size or k < right_diff_rows.size
95
+ rank = rank_rows left_diff_rows[i], right_diff_rows[k]
96
+ case rank
97
+ when -1
98
+ if left_row_cache.include? left_diff_checksums[i]
99
+ row = Marshal.load(left_row_cache[left_diff_checksums[i]])
100
+ else
101
+ row = left_cursor.next_row
102
+ end
103
+ yield :left, row
104
+ i += 1
105
+ when 1
106
+ if right_row_cache.include? right_diff_checksums[k]
107
+ row = Marshal.load(right_row_cache[right_diff_checksums[k]])
108
+ else
109
+ row = right_cursor.next_row
110
+ end
111
+ yield :right, row
112
+ k += 1
113
+ when 0
114
+ if left_row_cache.include? left_diff_checksums[i]
115
+ left_row = Marshal.load(left_row_cache[left_diff_checksums[i]])
116
+ else
117
+ left_row = left_cursor.next_row
118
+ end
119
+ if right_row_cache.include? right_diff_checksums[k]
120
+ right_row = Marshal.load(right_row_cache[right_diff_checksums[k]])
121
+ else
122
+ row = right_cursor.next_row
123
+ end
124
+ yield :conflict, [left_row, right_row]
125
+ i += 1
126
+ k += 1
127
+ end
128
+ end
129
+ ensure
130
+ session.left.destroy_cursor left_cursor if left_cursor
131
+ session.right.destroy_cursor right_cursor if right_cursor
132
+ end
133
+
134
+ # Runs the table scan.
135
+ # Calls the block for every found difference.
136
+ # Differences are yielded with 2 parameters
137
+ # * type: describes the difference, either :left (row only in left table), :right (row only in right table) or :conflict
138
+ # * row: for :left or :right cases a hash describing the row; for :conflict an array of left and right row
139
+ def run(&blck)
140
+ left_cursor = right_cursor = nil
141
+ left_cursor = session.left.create_cursor ProxyBlockCursor, left_table,
142
+ :row_buffer_size => scan_options[:row_buffer_size]
143
+ right_cursor = session.right.create_cursor ProxyBlockCursor, right_table,
144
+ :row_buffer_size => scan_options[:row_buffer_size]
145
+ update_progress 0 # ensures progress bar is printed even if there are no records
146
+ while left_cursor.next?
147
+ left_to, left_checksum, left_progress =
148
+ left_cursor.checksum :proxy_block_size => block_size
149
+ _ , right_checksum, right_progress =
150
+ right_cursor.checksum :max_row => left_to
151
+ combined_progress = left_progress + right_progress
152
+ if left_checksum != right_checksum
153
+ compare_blocks left_cursor, right_cursor do |type, row|
154
+ steps = type == :conflict ? 2 : 1
155
+ update_progress steps
156
+ combined_progress -= steps
157
+ yield type, row
158
+ end
159
+ end
160
+ update_progress combined_progress
161
+ end
162
+ while right_cursor.next?
163
+ update_progress 1
164
+ yield :right, right_cursor.next_row
165
+ end
166
+ ensure
167
+ session.left.destroy_cursor left_cursor if left_cursor
168
+ session.right.destroy_cursor right_cursor if right_cursor
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,145 @@
1
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/..'
2
+
3
+ require 'digest/sha1'
4
+
5
+ require 'rubyrep'
6
+
7
+ module RR
8
+
9
+ # This class is used to scan a table in blocks.
10
+ # Calculates the checksums of the scanned blocks.
11
+ class ProxyBlockCursor < ProxyCursor
12
+
13
+ include TableScanHelper
14
+
15
+ # The current Digest
16
+ attr_accessor :digest
17
+
18
+ # nil if the last run of the checksum method left no unprocessed row.
19
+ # Otherwise the left over row of that checksum run
20
+ attr_accessor :last_row
21
+
22
+ # Returns an array of checksums for each encounters row.
23
+ # Each array element is a Hash with the following elements:
24
+ # * +:row_keys+: A primary key => value hash identifying the row
25
+ # * +:checksum+: the checksum for this row
26
+ attr_accessor :row_checksums
27
+
28
+ # The maximum total size (in bytes) up to which rows will be cached
29
+ attr_accessor :max_row_cache_size
30
+
31
+ # A byte counter of many bytes of row data have already been cached
32
+ attr_accessor :current_row_cache_size
33
+
34
+ # A hash of cached rows consisting of row checksum => row dump pairs.
35
+ attr_accessor :row_cache
36
+
37
+ # Creates a new cursor
38
+ # * session: the current proxy session
39
+ # * table: table_name
40
+ def initialize(session, table)
41
+ self.max_row_cache_size = 1000000 # this size should be sufficient as long as table doesn't contain blobs
42
+ super
43
+ end
44
+
45
+ # Returns true if the current cursor has unprocessed rows
46
+ def next?
47
+ last_row != nil or cursor.next?
48
+ end
49
+
50
+ # Returns the cursor's next row
51
+ def next_row
52
+ if self.last_row
53
+ row, self.last_row = self.last_row, nil
54
+ else
55
+ row = cursor.next_row
56
+ end
57
+ row
58
+ end
59
+
60
+ # Returns a hash of row checksum => row dump pairs for the +checksums+
61
+ # in the provided array
62
+ def retrieve_row_cache(checksums)
63
+ row_dumps = {}
64
+ checksums.each do |checksum|
65
+ row_dumps[checksum] = row_cache[checksum] if row_cache.include? checksum
66
+ end
67
+ row_dumps
68
+ end
69
+
70
+ # Updates block / row checksums and row cache with the given +row+.
71
+ def update_checksum(row)
72
+ dump = Marshal.dump(row)
73
+
74
+ # updates row checksum array
75
+ row_keys = row.reject {|key, | not primary_key_names.include? key}
76
+ checksum = Digest::SHA1.hexdigest(dump)
77
+ self.row_checksums << {:row_keys => row_keys, :checksum => checksum}
78
+
79
+ # update the row cache (unless maximum cache size limit has already been reached)
80
+ if current_row_cache_size + dump.size < max_row_cache_size
81
+ self.current_row_cache_size += dump.size
82
+ row_cache[checksum] = dump
83
+ end
84
+
85
+ # update current total checksum
86
+ self.digest << dump
87
+ end
88
+
89
+ # Reinitializes the row checksum array and the total checksum
90
+ def reset_checksum
91
+ self.row_checksums = []
92
+ self.current_row_cache_size = 0
93
+ self.row_cache = {}
94
+ self.digest = Digest::SHA1.new
95
+ end
96
+
97
+ # Returns the current checksum
98
+ def current_checksum
99
+ self.digest.hexdigest
100
+ end
101
+
102
+ # Calculates the checksum from the current row up to the row specified by options.
103
+ # options is a hash including either
104
+ # * :+proxy_block_size+: The number of rows to scan.
105
+ # * :+max_row+: A row hash of primary key columns specifying the maximum record to scan.
106
+ # Returns multiple parameters:
107
+ # * last row read
108
+ # * checksum
109
+ # * number of processed records
110
+ def checksum(options = {})
111
+ reset_checksum
112
+ return_row = row = nil
113
+ row_count = 0
114
+
115
+ if options.include? :proxy_block_size
116
+ block_size = options[:proxy_block_size]
117
+ raise ":proxy_block_size must be greater than 0" unless block_size > 0
118
+ while row_count < block_size and next?
119
+ row = next_row
120
+ update_checksum(row)
121
+ row_count += 1
122
+ end
123
+ return_row = row
124
+ elsif options.include? :max_row
125
+ max_row = options[:max_row]
126
+ while next?
127
+ row = next_row
128
+ rank = rank_rows row, max_row
129
+ if rank > 0
130
+ # row > max_row ==> save the current row and break off
131
+ self.last_row = row
132
+ break
133
+ end
134
+ row_count += 1
135
+ update_checksum(row)
136
+ return_row, row = row, nil
137
+ end
138
+ else
139
+ raise "options must include either :proxy_block_size or :max_row"
140
+ end
141
+ return_keys = return_row.reject {|key, | not primary_key_names.include? key} if return_row
142
+ return return_keys, current_checksum, row_count
143
+ end
144
+ end
145
+ end