andyjeffries-rubyrep 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. data/History.txt +83 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +151 -0
  4. data/README.txt +37 -0
  5. data/bin/rubyrep +8 -0
  6. data/lib/rubyrep.rb +72 -0
  7. data/lib/rubyrep/base_runner.rb +195 -0
  8. data/lib/rubyrep/command_runner.rb +144 -0
  9. data/lib/rubyrep/committers/buffered_committer.rb +151 -0
  10. data/lib/rubyrep/committers/committers.rb +152 -0
  11. data/lib/rubyrep/configuration.rb +275 -0
  12. data/lib/rubyrep/connection_extenders/connection_extenders.rb +165 -0
  13. data/lib/rubyrep/connection_extenders/jdbc_extender.rb +65 -0
  14. data/lib/rubyrep/connection_extenders/mysql_extender.rb +59 -0
  15. data/lib/rubyrep/connection_extenders/postgresql_extender.rb +277 -0
  16. data/lib/rubyrep/database_proxy.rb +52 -0
  17. data/lib/rubyrep/direct_table_scan.rb +75 -0
  18. data/lib/rubyrep/generate_runner.rb +105 -0
  19. data/lib/rubyrep/initializer.rb +39 -0
  20. data/lib/rubyrep/log_helper.rb +30 -0
  21. data/lib/rubyrep/logged_change.rb +160 -0
  22. data/lib/rubyrep/logged_change_loader.rb +197 -0
  23. data/lib/rubyrep/noisy_connection.rb +80 -0
  24. data/lib/rubyrep/proxied_table_scan.rb +171 -0
  25. data/lib/rubyrep/proxy_block_cursor.rb +145 -0
  26. data/lib/rubyrep/proxy_connection.rb +431 -0
  27. data/lib/rubyrep/proxy_cursor.rb +44 -0
  28. data/lib/rubyrep/proxy_row_cursor.rb +43 -0
  29. data/lib/rubyrep/proxy_runner.rb +89 -0
  30. data/lib/rubyrep/replication_difference.rb +100 -0
  31. data/lib/rubyrep/replication_extenders/mysql_replication.rb +271 -0
  32. data/lib/rubyrep/replication_extenders/postgresql_replication.rb +236 -0
  33. data/lib/rubyrep/replication_extenders/replication_extenders.rb +26 -0
  34. data/lib/rubyrep/replication_helper.rb +142 -0
  35. data/lib/rubyrep/replication_initializer.rb +327 -0
  36. data/lib/rubyrep/replication_run.rb +142 -0
  37. data/lib/rubyrep/replication_runner.rb +166 -0
  38. data/lib/rubyrep/replicators/replicators.rb +42 -0
  39. data/lib/rubyrep/replicators/two_way_replicator.rb +361 -0
  40. data/lib/rubyrep/scan_progress_printers/progress_bar.rb +65 -0
  41. data/lib/rubyrep/scan_progress_printers/scan_progress_printers.rb +65 -0
  42. data/lib/rubyrep/scan_report_printers/scan_detail_reporter.rb +111 -0
  43. data/lib/rubyrep/scan_report_printers/scan_report_printers.rb +67 -0
  44. data/lib/rubyrep/scan_report_printers/scan_summary_reporter.rb +75 -0
  45. data/lib/rubyrep/scan_runner.rb +25 -0
  46. data/lib/rubyrep/session.rb +230 -0
  47. data/lib/rubyrep/sync_helper.rb +121 -0
  48. data/lib/rubyrep/sync_runner.rb +31 -0
  49. data/lib/rubyrep/syncers/syncers.rb +112 -0
  50. data/lib/rubyrep/syncers/two_way_syncer.rb +174 -0
  51. data/lib/rubyrep/table_scan.rb +54 -0
  52. data/lib/rubyrep/table_scan_helper.rb +46 -0
  53. data/lib/rubyrep/table_sorter.rb +70 -0
  54. data/lib/rubyrep/table_spec_resolver.rb +142 -0
  55. data/lib/rubyrep/table_sync.rb +90 -0
  56. data/lib/rubyrep/task_sweeper.rb +77 -0
  57. data/lib/rubyrep/trigger_mode_switcher.rb +63 -0
  58. data/lib/rubyrep/type_casting_cursor.rb +31 -0
  59. data/lib/rubyrep/uninstall_runner.rb +93 -0
  60. data/lib/rubyrep/version.rb +9 -0
  61. data/rubyrep +8 -0
  62. data/rubyrep.bat +4 -0
  63. data/setup.rb +1585 -0
  64. data/spec/base_runner_spec.rb +218 -0
  65. data/spec/buffered_committer_spec.rb +274 -0
  66. data/spec/command_runner_spec.rb +145 -0
  67. data/spec/committers_spec.rb +178 -0
  68. data/spec/configuration_spec.rb +203 -0
  69. data/spec/connection_extender_interface_spec.rb +141 -0
  70. data/spec/connection_extenders_registration_spec.rb +164 -0
  71. data/spec/database_proxy_spec.rb +48 -0
  72. data/spec/database_rake_spec.rb +40 -0
  73. data/spec/db_specific_connection_extenders_spec.rb +34 -0
  74. data/spec/db_specific_replication_extenders_spec.rb +38 -0
  75. data/spec/direct_table_scan_spec.rb +61 -0
  76. data/spec/dolphins.jpg +0 -0
  77. data/spec/generate_runner_spec.rb +84 -0
  78. data/spec/initializer_spec.rb +46 -0
  79. data/spec/log_helper_spec.rb +39 -0
  80. data/spec/logged_change_loader_spec.rb +68 -0
  81. data/spec/logged_change_spec.rb +470 -0
  82. data/spec/noisy_connection_spec.rb +78 -0
  83. data/spec/postgresql_replication_spec.rb +48 -0
  84. data/spec/postgresql_schema_support_spec.rb +212 -0
  85. data/spec/postgresql_support_spec.rb +63 -0
  86. data/spec/progress_bar_spec.rb +77 -0
  87. data/spec/proxied_table_scan_spec.rb +151 -0
  88. data/spec/proxy_block_cursor_spec.rb +197 -0
  89. data/spec/proxy_connection_spec.rb +423 -0
  90. data/spec/proxy_cursor_spec.rb +56 -0
  91. data/spec/proxy_row_cursor_spec.rb +66 -0
  92. data/spec/proxy_runner_spec.rb +70 -0
  93. data/spec/replication_difference_spec.rb +161 -0
  94. data/spec/replication_extender_interface_spec.rb +367 -0
  95. data/spec/replication_extenders_spec.rb +32 -0
  96. data/spec/replication_helper_spec.rb +178 -0
  97. data/spec/replication_initializer_spec.rb +509 -0
  98. data/spec/replication_run_spec.rb +443 -0
  99. data/spec/replication_runner_spec.rb +254 -0
  100. data/spec/replicators_spec.rb +36 -0
  101. data/spec/rubyrep_spec.rb +8 -0
  102. data/spec/scan_detail_reporter_spec.rb +119 -0
  103. data/spec/scan_progress_printers_spec.rb +68 -0
  104. data/spec/scan_report_printers_spec.rb +67 -0
  105. data/spec/scan_runner_spec.rb +50 -0
  106. data/spec/scan_summary_reporter_spec.rb +61 -0
  107. data/spec/session_spec.rb +253 -0
  108. data/spec/spec.opts +1 -0
  109. data/spec/spec_helper.rb +305 -0
  110. data/spec/strange_name_support_spec.rb +135 -0
  111. data/spec/sync_helper_spec.rb +169 -0
  112. data/spec/sync_runner_spec.rb +78 -0
  113. data/spec/syncers_spec.rb +171 -0
  114. data/spec/table_scan_helper_spec.rb +36 -0
  115. data/spec/table_scan_spec.rb +49 -0
  116. data/spec/table_sorter_spec.rb +30 -0
  117. data/spec/table_spec_resolver_spec.rb +111 -0
  118. data/spec/table_sync_spec.rb +140 -0
  119. data/spec/task_sweeper_spec.rb +47 -0
  120. data/spec/trigger_mode_switcher_spec.rb +83 -0
  121. data/spec/two_way_replicator_spec.rb +721 -0
  122. data/spec/two_way_syncer_spec.rb +256 -0
  123. data/spec/type_casting_cursor_spec.rb +50 -0
  124. data/spec/uninstall_runner_spec.rb +93 -0
  125. metadata +190 -0
@@ -0,0 +1,197 @@
1
+ module RR
2
+
3
+ # Makes management of logged change loaders easier
4
+ class LoggedChangeLoaders
5
+
6
+ # The current Session
7
+ attr_accessor :session
8
+
9
+ # A hash of LoggedChangeLoader instances for the :+left+ and :+right+ database
10
+ attr_accessor :loaders
11
+
12
+ # Create new logged change loaders.
13
+ # * +session+: Current Session
14
+ def initialize(session)
15
+ self.session = session
16
+ self.loaders = {}
17
+ [:left, :right].each do |database|
18
+ loaders[database] = LoggedChangeLoader.new(session, database)
19
+ end
20
+ end
21
+
22
+ # Returns the LoggedChangeLoader for the specified (:+left+ or :+right+)
23
+ # database.
24
+ def [](database)
25
+ loaders[database]
26
+ end
27
+
28
+ # Forces an update of the change log cache
29
+ def update
30
+ [:left, :right].each {|database| self[database].update :forced => true}
31
+ end
32
+ end
33
+
34
+ # Caches the entries in the change log table
35
+ class LoggedChangeLoader
36
+
37
+ # The current +Session+.
38
+ attr_accessor :session
39
+
40
+ # Current database (either :+left+ or :+right+)
41
+ attr_accessor :database
42
+
43
+ # The current +ProxyConnection+.
44
+ attr_accessor :connection
45
+
46
+ # Index to the next unprocessed change in the +change_array+.
47
+ attr_accessor :current_index
48
+
49
+ # ID of the last cached change log record.
50
+ attr_accessor :current_id
51
+
52
+ # Array with all cached changes.
53
+ # Processed change log records are replaced with +nil+.
54
+ attr_accessor :change_array
55
+
56
+ # Tree (hash) structure for fast access to all cached changes.
57
+ # First level of tree:
58
+ # * key: table name
59
+ # * value: 2nd level tree
60
+ # 2nd level tree:
61
+ # * key: the change_key value of the according change log records.
62
+ # * value:
63
+ # An array of according change log records (column_name => value hash).
64
+ # Additional entry of each change log hash:
65
+ # * key: 'array_index'
66
+ # * value: index to the change log record in +change_array+
67
+ attr_accessor :change_tree
68
+
69
+ # Date of last update of the cache
70
+ attr_accessor :last_updated
71
+
72
+ # Initializes / resets the cache.
73
+ def init_cache
74
+ self.change_tree = {}
75
+ self.change_array = []
76
+ self.current_index = 0
77
+ end
78
+ private :init_cache
79
+
80
+ # Create a new change log record cache.
81
+ # * +session+: The current +Session+
82
+ # * +database+: Either :+left+ or :+right+
83
+ def initialize(session, database)
84
+ self.session = session
85
+ self.database = database
86
+ self.connection = session.send(database)
87
+
88
+ init_cache
89
+ self.current_id = -1
90
+ self.last_updated = 1.year.ago
91
+ end
92
+
93
+ # Updates the cache.
94
+ # Options is a hash determining when the update is actually executed:
95
+ # * :+expire_time+: cache is older than the given number of seconds
96
+ # * :+forced+: if +true+ update the cache even if not yet expired
97
+ def update(options = {:forced => false, :expire_time => 1})
98
+ return unless options[:forced] or Time.now - self.last_updated >= options[:expire_time]
99
+
100
+ self.last_updated = Time.now
101
+
102
+ # First, let's use a LIMIT clause (via :row_buffer_size option) to verify
103
+ # if there are any pending changes.
104
+ # (If there are many pending changes, this is (at least with PostgreSQL)
105
+ # much faster.)
106
+ cursor = connection.select_cursor(
107
+ :table => change_log_table,
108
+ :from => {'id' => current_id},
109
+ :exclude_starting_row => true,
110
+ :row_buffer_size => 1
111
+ )
112
+ return unless cursor.next?
113
+
114
+ # Something is here. Let's actually load it.
115
+ cursor = connection.select_cursor(
116
+ :table => change_log_table,
117
+ :from => {'id' => current_id},
118
+ :exclude_starting_row => true,
119
+ :type_cast => true,
120
+ :row_buffer_size => session.configuration.options[:row_buffer_size]
121
+ )
122
+ while cursor.next?
123
+ change = cursor.next_row
124
+ self.current_id = change['id']
125
+ self.change_array << change
126
+ change['array_index'] = self.change_array.size - 1
127
+
128
+ table_change_tree = change_tree[change['change_table']] ||= {}
129
+ key_changes = table_change_tree[change['change_key']] ||= []
130
+ key_changes << change
131
+ end
132
+ cursor.clear
133
+ end
134
+
135
+ # Returns the creation time of the oldest unprocessed change log record.
136
+ def oldest_change_time
137
+ change = oldest_change
138
+ change['change_time'] if change
139
+ end
140
+
141
+ # Returns the oldest unprocessed change log record (column_name => value hash).
142
+ def oldest_change
143
+ update
144
+ oldest_change = nil
145
+ unless change_array.empty?
146
+ while (oldest_change = change_array[self.current_index]) == nil
147
+ self.current_index += 1
148
+ end
149
+ end
150
+ oldest_change
151
+ end
152
+
153
+ # Returns the specified change log record (column_name => value hash).
154
+ # * +change_table+: the name of the table that was changed
155
+ # * +change_key+: the change key of the modified record
156
+ def load(change_table, change_key)
157
+ update
158
+ change = nil
159
+ table_change_tree = change_tree[change_table]
160
+ if table_change_tree
161
+ key_changes = table_change_tree[change_key]
162
+ if key_changes
163
+ # get change object and delete from key_changes
164
+ change = key_changes.shift
165
+
166
+ # delete change from change_array
167
+ change_array[change['array_index']] = nil
168
+
169
+ # delete change from database
170
+ connection.execute "delete from #{change_log_table} where id = #{change['id']}"
171
+
172
+ # delete key_changes if empty
173
+ if key_changes.empty?
174
+ table_change_tree.delete change_key
175
+ end
176
+
177
+ # delete table_change_tree if empty
178
+ if table_change_tree.empty?
179
+ change_tree.delete change_table
180
+ end
181
+
182
+ # reset everything if no more changes remain
183
+ if change_tree.empty?
184
+ init_cache
185
+ end
186
+ end
187
+ end
188
+ change
189
+ end
190
+
191
+ # Returns the name of the change log table
192
+ def change_log_table
193
+ @change_log_table ||= "#{session.configuration.options[:rep_prefix]}_pending_changes"
194
+ end
195
+ private :change_log_table
196
+ end
197
+ end
@@ -0,0 +1,80 @@
1
+ module RR
2
+
3
+ # Wraps an existing cursor.
4
+ # Purpose: send regular updates to the installed TaskSweeper
5
+ class NoisyCursor
6
+ # The original cusor
7
+ attr_accessor :org_cursor
8
+
9
+ # The installed task sweeper
10
+ attr_accessor :sweeper
11
+
12
+ # Create a new NoisyCursor.
13
+ # * cursor: the original cursor
14
+ # * sweeper: the target TaskSweeper
15
+ def initialize(cursor, sweeper)
16
+ self.org_cursor = cursor
17
+ self.sweeper = sweeper
18
+ end
19
+
20
+ # Delegate the uninteresting methods to the original cursor
21
+ def next?; org_cursor.next? end
22
+ def clear; org_cursor.clear end
23
+
24
+ # Returns the row as a column => value hash and moves the cursor to the next row.
25
+ def next_row
26
+ sweeper.ping
27
+ row = org_cursor.next_row
28
+ sweeper.ping
29
+ row
30
+ end
31
+ end
32
+
33
+ # Modifies ProxyConnections to send regular pings to an installed TaskSweeper
34
+ module NoisyConnection
35
+
36
+ # The installed TaskSweeper
37
+ attr_accessor :sweeper
38
+
39
+ # Modifies ProxyConnection#select_cursor to wrap the returned cursor
40
+ # into a NoisyCursor.
41
+ def select_cursor(options)
42
+ sweeper.ping
43
+ org_cursor = super
44
+ sweeper.ping
45
+ NoisyCursor.new(org_cursor, sweeper)
46
+ end
47
+
48
+ # Wraps ProxyConnection#insert_record to update the TaskSweeper
49
+ def insert_record(table, values)
50
+ sweeper.ping
51
+ result = super
52
+ sweeper.ping
53
+ result
54
+ end
55
+
56
+ # Wraps ProxyConnection#update_record to update the TaskSweeper
57
+ def update_record(table, values, org_key = nil)
58
+ sweeper.ping
59
+ result = super
60
+ sweeper.ping
61
+ result
62
+ end
63
+
64
+ # Wraps ProxyConnection#delete_record to update the TaskSweeper
65
+ def delete_record(table, values)
66
+ sweeper.ping
67
+ result = super
68
+ sweeper.ping
69
+ result
70
+ end
71
+
72
+ # Wraps ProxyConnection#commit_db_transaction to update the TaskSweeper
73
+ def commit_db_transaction
74
+ sweeper.ping
75
+ result = super
76
+ sweeper.ping
77
+ result
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,171 @@
1
+ module RR
2
+
3
+ # Scans two tables for differences. Goes through a RubyRep Proxy to minimize network load.
4
+ # Doesn't have any reporting functionality by itself.
5
+ # Instead ProxiedTableScan#run yields all the differences for the caller to do with as it pleases.
6
+ # Usage:
7
+ # 1. Create a new ProxiedTableScan object and hand it all necessary information
8
+ # 2. Call ProxiedTableScan#run to do the actual comparison
9
+ # 3. The block handed to ProxiedTableScan#run receives all differences
10
+ class ProxiedTableScan < TableScan
11
+
12
+ # returns block size to use for table scanning
13
+ def block_size
14
+ @block_size ||= session.configuration.options_for_table(left_table)[:proxy_block_size]
15
+ end
16
+
17
+ # Creates a new ProxiedTableScan instance
18
+ # * session: a Session object representing the current database session
19
+ # * left_table: name of the table in the left database
20
+ # * right_table: name of the table in the right database. If not given, same like left_table
21
+ def initialize(session, left_table, right_table = nil)
22
+ raise "#{self.class.name} only works with proxied sessions" unless session.proxied?
23
+
24
+ super
25
+ end
26
+
27
+ # Compares the specified left and right rows.
28
+ # +left_cursor+ and +right_cursor+ represent the according ProxyBlockCursor objects.
29
+ # Yields all identified differences with
30
+ # * diff_type
31
+ # * row
32
+ # #run described the yield parameters in detail.
33
+ def compare_blocks(left_block_cursor, right_block_cursor)
34
+ left_cursor = right_cursor = nil
35
+
36
+ left_row_checksums = left_block_cursor.row_checksums
37
+ right_row_checksums = right_block_cursor.row_checksums
38
+
39
+ # phase 1: identify the different rows and store their primary keys
40
+ left_diff_rows = []
41
+ left_diff_checksums = []
42
+ right_diff_rows = []
43
+ right_diff_checksums = []
44
+ i = k = 0
45
+ while i < left_row_checksums.size or k < right_row_checksums.size
46
+ left_keys = i < left_row_checksums.size ? left_row_checksums[i][:row_keys] : nil
47
+ right_keys = k < right_row_checksums.size ? right_row_checksums[k][:row_keys] : nil
48
+ rank = rank_rows left_keys, right_keys
49
+ case rank
50
+ when -1
51
+ left_diff_rows << left_keys
52
+ left_diff_checksums << left_row_checksums[i][:checksum]
53
+ i += 1
54
+ when 1
55
+ right_diff_rows << right_keys
56
+ right_diff_checksums << right_row_checksums[k][:checksum]
57
+ k += 1
58
+ when 0
59
+ if left_row_checksums[i][:checksum] != right_row_checksums[k][:checksum]
60
+ left_diff_rows << left_keys
61
+ left_diff_checksums << left_row_checksums[i][:checksum]
62
+ right_diff_rows << right_keys
63
+ right_diff_checksums << right_row_checksums[k][:checksum]
64
+ end
65
+ i += 1
66
+ k += 1
67
+ end
68
+ end
69
+
70
+ # retrieve possibly existing cached rows from the block cursors
71
+ left_row_cache = left_block_cursor.retrieve_row_cache left_diff_checksums
72
+ right_row_cache = right_block_cursor.retrieve_row_cache right_diff_checksums
73
+
74
+ # builds arrays of row keys for rows that were not included in the hash
75
+ left_uncached_rows = []
76
+ left_diff_rows.each_with_index do |row, i|
77
+ left_uncached_rows << row unless left_row_cache[left_diff_checksums[i]]
78
+ end
79
+ right_uncached_rows = []
80
+ right_diff_rows.each_with_index do |row, i|
81
+ right_uncached_rows << row unless right_row_cache[right_diff_checksums[i]]
82
+ end
83
+
84
+ # phase 2: read all different rows and yield them
85
+ unless left_uncached_rows.empty?
86
+ left_cursor = session.left.create_cursor \
87
+ ProxyRowCursor, left_table, :row_keys => left_uncached_rows
88
+ end
89
+ unless right_uncached_rows.empty?
90
+ right_cursor = session.right.create_cursor \
91
+ ProxyRowCursor, right_table, :row_keys => right_uncached_rows
92
+ end
93
+ i = k = 0
94
+ while i < left_diff_rows.size or k < right_diff_rows.size
95
+ rank = rank_rows left_diff_rows[i], right_diff_rows[k]
96
+ case rank
97
+ when -1
98
+ if left_row_cache.include? left_diff_checksums[i]
99
+ row = Marshal.load(left_row_cache[left_diff_checksums[i]])
100
+ else
101
+ row = left_cursor.next_row
102
+ end
103
+ yield :left, row
104
+ i += 1
105
+ when 1
106
+ if right_row_cache.include? right_diff_checksums[k]
107
+ row = Marshal.load(right_row_cache[right_diff_checksums[k]])
108
+ else
109
+ row = right_cursor.next_row
110
+ end
111
+ yield :right, row
112
+ k += 1
113
+ when 0
114
+ if left_row_cache.include? left_diff_checksums[i]
115
+ left_row = Marshal.load(left_row_cache[left_diff_checksums[i]])
116
+ else
117
+ left_row = left_cursor.next_row
118
+ end
119
+ if right_row_cache.include? right_diff_checksums[k]
120
+ right_row = Marshal.load(right_row_cache[right_diff_checksums[k]])
121
+ else
122
+ row = right_cursor.next_row
123
+ end
124
+ yield :conflict, [left_row, right_row]
125
+ i += 1
126
+ k += 1
127
+ end
128
+ end
129
+ ensure
130
+ session.left.destroy_cursor left_cursor if left_cursor
131
+ session.right.destroy_cursor right_cursor if right_cursor
132
+ end
133
+
134
+ # Runs the table scan.
135
+ # Calls the block for every found difference.
136
+ # Differences are yielded with 2 parameters
137
+ # * type: describes the difference, either :left (row only in left table), :right (row only in right table) or :conflict
138
+ # * row: for :left or :right cases a hash describing the row; for :conflict an array of left and right row
139
+ def run(&blck)
140
+ left_cursor = right_cursor = nil
141
+ left_cursor = session.left.create_cursor ProxyBlockCursor, left_table,
142
+ :row_buffer_size => scan_options[:row_buffer_size]
143
+ right_cursor = session.right.create_cursor ProxyBlockCursor, right_table,
144
+ :row_buffer_size => scan_options[:row_buffer_size]
145
+ update_progress 0 # ensures progress bar is printed even if there are no records
146
+ while left_cursor.next?
147
+ left_to, left_checksum, left_progress =
148
+ left_cursor.checksum :proxy_block_size => block_size
149
+ _ , right_checksum, right_progress =
150
+ right_cursor.checksum :max_row => left_to
151
+ combined_progress = left_progress + right_progress
152
+ if left_checksum != right_checksum
153
+ compare_blocks left_cursor, right_cursor do |type, row|
154
+ steps = type == :conflict ? 2 : 1
155
+ update_progress steps
156
+ combined_progress -= steps
157
+ yield type, row
158
+ end
159
+ end
160
+ update_progress combined_progress
161
+ end
162
+ while right_cursor.next?
163
+ update_progress 1
164
+ yield :right, right_cursor.next_row
165
+ end
166
+ ensure
167
+ session.left.destroy_cursor left_cursor if left_cursor
168
+ session.right.destroy_cursor right_cursor if right_cursor
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,145 @@
1
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/..'
2
+
3
+ require 'digest/sha1'
4
+
5
+ require 'rubyrep'
6
+
7
+ module RR
8
+
9
+ # This class is used to scan a table in blocks.
10
+ # Calculates the checksums of the scanned blocks.
11
+ class ProxyBlockCursor < ProxyCursor
12
+
13
+ include TableScanHelper
14
+
15
+ # The current Digest
16
+ attr_accessor :digest
17
+
18
+ # nil if the last run of the checksum method left no unprocessed row.
19
+ # Otherwise the left over row of that checksum run
20
+ attr_accessor :last_row
21
+
22
+ # Returns an array of checksums for each encounters row.
23
+ # Each array element is a Hash with the following elements:
24
+ # * +:row_keys+: A primary key => value hash identifying the row
25
+ # * +:checksum+: the checksum for this row
26
+ attr_accessor :row_checksums
27
+
28
+ # The maximum total size (in bytes) up to which rows will be cached
29
+ attr_accessor :max_row_cache_size
30
+
31
+ # A byte counter of many bytes of row data have already been cached
32
+ attr_accessor :current_row_cache_size
33
+
34
+ # A hash of cached rows consisting of row checksum => row dump pairs.
35
+ attr_accessor :row_cache
36
+
37
+ # Creates a new cursor
38
+ # * session: the current proxy session
39
+ # * table: table_name
40
+ def initialize(session, table)
41
+ self.max_row_cache_size = 1000000 # this size should be sufficient as long as table doesn't contain blobs
42
+ super
43
+ end
44
+
45
+ # Returns true if the current cursor has unprocessed rows
46
+ def next?
47
+ last_row != nil or cursor.next?
48
+ end
49
+
50
+ # Returns the cursor's next row
51
+ def next_row
52
+ if self.last_row
53
+ row, self.last_row = self.last_row, nil
54
+ else
55
+ row = cursor.next_row
56
+ end
57
+ row
58
+ end
59
+
60
+ # Returns a hash of row checksum => row dump pairs for the +checksums+
61
+ # in the provided array
62
+ def retrieve_row_cache(checksums)
63
+ row_dumps = {}
64
+ checksums.each do |checksum|
65
+ row_dumps[checksum] = row_cache[checksum] if row_cache.include? checksum
66
+ end
67
+ row_dumps
68
+ end
69
+
70
+ # Updates block / row checksums and row cache with the given +row+.
71
+ def update_checksum(row)
72
+ dump = Marshal.dump(row)
73
+
74
+ # updates row checksum array
75
+ row_keys = row.reject {|key, | not primary_key_names.include? key}
76
+ checksum = Digest::SHA1.hexdigest(dump)
77
+ self.row_checksums << {:row_keys => row_keys, :checksum => checksum}
78
+
79
+ # update the row cache (unless maximum cache size limit has already been reached)
80
+ if current_row_cache_size + dump.size < max_row_cache_size
81
+ self.current_row_cache_size += dump.size
82
+ row_cache[checksum] = dump
83
+ end
84
+
85
+ # update current total checksum
86
+ self.digest << dump
87
+ end
88
+
89
+ # Reinitializes the row checksum array and the total checksum
90
+ def reset_checksum
91
+ self.row_checksums = []
92
+ self.current_row_cache_size = 0
93
+ self.row_cache = {}
94
+ self.digest = Digest::SHA1.new
95
+ end
96
+
97
+ # Returns the current checksum
98
+ def current_checksum
99
+ self.digest.hexdigest
100
+ end
101
+
102
+ # Calculates the checksum from the current row up to the row specified by options.
103
+ # options is a hash including either
104
+ # * :+proxy_block_size+: The number of rows to scan.
105
+ # * :+max_row+: A row hash of primary key columns specifying the maximum record to scan.
106
+ # Returns multiple parameters:
107
+ # * last row read
108
+ # * checksum
109
+ # * number of processed records
110
+ def checksum(options = {})
111
+ reset_checksum
112
+ return_row = row = nil
113
+ row_count = 0
114
+
115
+ if options.include? :proxy_block_size
116
+ block_size = options[:proxy_block_size]
117
+ raise ":proxy_block_size must be greater than 0" unless block_size > 0
118
+ while row_count < block_size and next?
119
+ row = next_row
120
+ update_checksum(row)
121
+ row_count += 1
122
+ end
123
+ return_row = row
124
+ elsif options.include? :max_row
125
+ max_row = options[:max_row]
126
+ while next?
127
+ row = next_row
128
+ rank = rank_rows row, max_row
129
+ if rank > 0
130
+ # row > max_row ==> save the current row and break off
131
+ self.last_row = row
132
+ break
133
+ end
134
+ row_count += 1
135
+ update_checksum(row)
136
+ return_row, row = row, nil
137
+ end
138
+ else
139
+ raise "options must include either :proxy_block_size or :max_row"
140
+ end
141
+ return_keys = return_row.reject {|key, | not primary_key_names.include? key} if return_row
142
+ return return_keys, current_checksum, row_count
143
+ end
144
+ end
145
+ end