andyjeffries-rubyrep 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. data/History.txt +83 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +151 -0
  4. data/README.txt +37 -0
  5. data/bin/rubyrep +8 -0
  6. data/lib/rubyrep.rb +72 -0
  7. data/lib/rubyrep/base_runner.rb +195 -0
  8. data/lib/rubyrep/command_runner.rb +144 -0
  9. data/lib/rubyrep/committers/buffered_committer.rb +151 -0
  10. data/lib/rubyrep/committers/committers.rb +152 -0
  11. data/lib/rubyrep/configuration.rb +275 -0
  12. data/lib/rubyrep/connection_extenders/connection_extenders.rb +165 -0
  13. data/lib/rubyrep/connection_extenders/jdbc_extender.rb +65 -0
  14. data/lib/rubyrep/connection_extenders/mysql_extender.rb +59 -0
  15. data/lib/rubyrep/connection_extenders/postgresql_extender.rb +277 -0
  16. data/lib/rubyrep/database_proxy.rb +52 -0
  17. data/lib/rubyrep/direct_table_scan.rb +75 -0
  18. data/lib/rubyrep/generate_runner.rb +105 -0
  19. data/lib/rubyrep/initializer.rb +39 -0
  20. data/lib/rubyrep/log_helper.rb +30 -0
  21. data/lib/rubyrep/logged_change.rb +160 -0
  22. data/lib/rubyrep/logged_change_loader.rb +197 -0
  23. data/lib/rubyrep/noisy_connection.rb +80 -0
  24. data/lib/rubyrep/proxied_table_scan.rb +171 -0
  25. data/lib/rubyrep/proxy_block_cursor.rb +145 -0
  26. data/lib/rubyrep/proxy_connection.rb +431 -0
  27. data/lib/rubyrep/proxy_cursor.rb +44 -0
  28. data/lib/rubyrep/proxy_row_cursor.rb +43 -0
  29. data/lib/rubyrep/proxy_runner.rb +89 -0
  30. data/lib/rubyrep/replication_difference.rb +100 -0
  31. data/lib/rubyrep/replication_extenders/mysql_replication.rb +271 -0
  32. data/lib/rubyrep/replication_extenders/postgresql_replication.rb +236 -0
  33. data/lib/rubyrep/replication_extenders/replication_extenders.rb +26 -0
  34. data/lib/rubyrep/replication_helper.rb +142 -0
  35. data/lib/rubyrep/replication_initializer.rb +327 -0
  36. data/lib/rubyrep/replication_run.rb +142 -0
  37. data/lib/rubyrep/replication_runner.rb +166 -0
  38. data/lib/rubyrep/replicators/replicators.rb +42 -0
  39. data/lib/rubyrep/replicators/two_way_replicator.rb +361 -0
  40. data/lib/rubyrep/scan_progress_printers/progress_bar.rb +65 -0
  41. data/lib/rubyrep/scan_progress_printers/scan_progress_printers.rb +65 -0
  42. data/lib/rubyrep/scan_report_printers/scan_detail_reporter.rb +111 -0
  43. data/lib/rubyrep/scan_report_printers/scan_report_printers.rb +67 -0
  44. data/lib/rubyrep/scan_report_printers/scan_summary_reporter.rb +75 -0
  45. data/lib/rubyrep/scan_runner.rb +25 -0
  46. data/lib/rubyrep/session.rb +230 -0
  47. data/lib/rubyrep/sync_helper.rb +121 -0
  48. data/lib/rubyrep/sync_runner.rb +31 -0
  49. data/lib/rubyrep/syncers/syncers.rb +112 -0
  50. data/lib/rubyrep/syncers/two_way_syncer.rb +174 -0
  51. data/lib/rubyrep/table_scan.rb +54 -0
  52. data/lib/rubyrep/table_scan_helper.rb +46 -0
  53. data/lib/rubyrep/table_sorter.rb +70 -0
  54. data/lib/rubyrep/table_spec_resolver.rb +142 -0
  55. data/lib/rubyrep/table_sync.rb +90 -0
  56. data/lib/rubyrep/task_sweeper.rb +77 -0
  57. data/lib/rubyrep/trigger_mode_switcher.rb +63 -0
  58. data/lib/rubyrep/type_casting_cursor.rb +31 -0
  59. data/lib/rubyrep/uninstall_runner.rb +93 -0
  60. data/lib/rubyrep/version.rb +9 -0
  61. data/rubyrep +8 -0
  62. data/rubyrep.bat +4 -0
  63. data/setup.rb +1585 -0
  64. data/spec/base_runner_spec.rb +218 -0
  65. data/spec/buffered_committer_spec.rb +274 -0
  66. data/spec/command_runner_spec.rb +145 -0
  67. data/spec/committers_spec.rb +178 -0
  68. data/spec/configuration_spec.rb +203 -0
  69. data/spec/connection_extender_interface_spec.rb +141 -0
  70. data/spec/connection_extenders_registration_spec.rb +164 -0
  71. data/spec/database_proxy_spec.rb +48 -0
  72. data/spec/database_rake_spec.rb +40 -0
  73. data/spec/db_specific_connection_extenders_spec.rb +34 -0
  74. data/spec/db_specific_replication_extenders_spec.rb +38 -0
  75. data/spec/direct_table_scan_spec.rb +61 -0
  76. data/spec/dolphins.jpg +0 -0
  77. data/spec/generate_runner_spec.rb +84 -0
  78. data/spec/initializer_spec.rb +46 -0
  79. data/spec/log_helper_spec.rb +39 -0
  80. data/spec/logged_change_loader_spec.rb +68 -0
  81. data/spec/logged_change_spec.rb +470 -0
  82. data/spec/noisy_connection_spec.rb +78 -0
  83. data/spec/postgresql_replication_spec.rb +48 -0
  84. data/spec/postgresql_schema_support_spec.rb +212 -0
  85. data/spec/postgresql_support_spec.rb +63 -0
  86. data/spec/progress_bar_spec.rb +77 -0
  87. data/spec/proxied_table_scan_spec.rb +151 -0
  88. data/spec/proxy_block_cursor_spec.rb +197 -0
  89. data/spec/proxy_connection_spec.rb +423 -0
  90. data/spec/proxy_cursor_spec.rb +56 -0
  91. data/spec/proxy_row_cursor_spec.rb +66 -0
  92. data/spec/proxy_runner_spec.rb +70 -0
  93. data/spec/replication_difference_spec.rb +161 -0
  94. data/spec/replication_extender_interface_spec.rb +367 -0
  95. data/spec/replication_extenders_spec.rb +32 -0
  96. data/spec/replication_helper_spec.rb +178 -0
  97. data/spec/replication_initializer_spec.rb +509 -0
  98. data/spec/replication_run_spec.rb +443 -0
  99. data/spec/replication_runner_spec.rb +254 -0
  100. data/spec/replicators_spec.rb +36 -0
  101. data/spec/rubyrep_spec.rb +8 -0
  102. data/spec/scan_detail_reporter_spec.rb +119 -0
  103. data/spec/scan_progress_printers_spec.rb +68 -0
  104. data/spec/scan_report_printers_spec.rb +67 -0
  105. data/spec/scan_runner_spec.rb +50 -0
  106. data/spec/scan_summary_reporter_spec.rb +61 -0
  107. data/spec/session_spec.rb +253 -0
  108. data/spec/spec.opts +1 -0
  109. data/spec/spec_helper.rb +305 -0
  110. data/spec/strange_name_support_spec.rb +135 -0
  111. data/spec/sync_helper_spec.rb +169 -0
  112. data/spec/sync_runner_spec.rb +78 -0
  113. data/spec/syncers_spec.rb +171 -0
  114. data/spec/table_scan_helper_spec.rb +36 -0
  115. data/spec/table_scan_spec.rb +49 -0
  116. data/spec/table_sorter_spec.rb +30 -0
  117. data/spec/table_spec_resolver_spec.rb +111 -0
  118. data/spec/table_sync_spec.rb +140 -0
  119. data/spec/task_sweeper_spec.rb +47 -0
  120. data/spec/trigger_mode_switcher_spec.rb +83 -0
  121. data/spec/two_way_replicator_spec.rb +721 -0
  122. data/spec/two_way_syncer_spec.rb +256 -0
  123. data/spec/type_casting_cursor_spec.rb +50 -0
  124. data/spec/uninstall_runner_spec.rb +93 -0
  125. metadata +190 -0
@@ -0,0 +1,197 @@
1
+ module RR
2
+
3
+ # Makes management of logged change loaders easier
4
+ class LoggedChangeLoaders
5
+
6
+ # The current Session
7
+ attr_accessor :session
8
+
9
+ # A hash of LoggedChangeLoader instances for the :+left+ and :+right+ database
10
+ attr_accessor :loaders
11
+
12
+ # Create new logged change loaders.
13
+ # * +session+: Current Session
14
+ def initialize(session)
15
+ self.session = session
16
+ self.loaders = {}
17
+ [:left, :right].each do |database|
18
+ loaders[database] = LoggedChangeLoader.new(session, database)
19
+ end
20
+ end
21
+
22
+ # Returns the LoggedChangeLoader for the specified (:+left+ or :+right+)
23
+ # database.
24
+ def [](database)
25
+ loaders[database]
26
+ end
27
+
28
+ # Forces an update of the change log cache
29
+ def update
30
+ [:left, :right].each {|database| self[database].update :forced => true}
31
+ end
32
+ end
33
+
34
+ # Caches the entries in the change log table
35
+ class LoggedChangeLoader
36
+
37
+ # The current +Session+.
38
+ attr_accessor :session
39
+
40
+ # Current database (either :+left+ or :+right+)
41
+ attr_accessor :database
42
+
43
+ # The current +ProxyConnection+.
44
+ attr_accessor :connection
45
+
46
+ # Index to the next unprocessed change in the +change_array+.
47
+ attr_accessor :current_index
48
+
49
+ # ID of the last cached change log record.
50
+ attr_accessor :current_id
51
+
52
+ # Array with all cached changes.
53
+ # Processed change log records are replaced with +nil+.
54
+ attr_accessor :change_array
55
+
56
+ # Tree (hash) structure for fast access to all cached changes.
57
+ # First level of tree:
58
+ # * key: table name
59
+ # * value: 2nd level tree
60
+ # 2nd level tree:
61
+ # * key: the change_key value of the according change log records.
62
+ # * value:
63
+ # An array of according change log records (column_name => value hash).
64
+ # Additional entry of each change log hash:
65
+ # * key: 'array_index'
66
+ # * value: index to the change log record in +change_array+
67
+ attr_accessor :change_tree
68
+
69
+ # Date of last update of the cache
70
+ attr_accessor :last_updated
71
+
72
+ # Initializes / resets the cache.
73
+ def init_cache
74
+ self.change_tree = {}
75
+ self.change_array = []
76
+ self.current_index = 0
77
+ end
78
+ private :init_cache
79
+
80
+ # Create a new change log record cache.
81
+ # * +session+: The current +Session+
82
+ # * +database+: Either :+left+ or :+right+
83
+ def initialize(session, database)
84
+ self.session = session
85
+ self.database = database
86
+ self.connection = session.send(database)
87
+
88
+ init_cache
89
+ self.current_id = -1
90
+ self.last_updated = 1.year.ago
91
+ end
92
+
93
+ # Updates the cache.
94
+ # Options is a hash determining when the update is actually executed:
95
+ # * :+expire_time+: cache is older than the given number of seconds
96
+ # * :+forced+: if +true+ update the cache even if not yet expired
97
+ def update(options = {:forced => false, :expire_time => 1})
98
+ return unless options[:forced] or Time.now - self.last_updated >= options[:expire_time]
99
+
100
+ self.last_updated = Time.now
101
+
102
+ # First, let's use a LIMIT clause (via :row_buffer_size option) to verify
103
+ # if there are any pending changes.
104
+ # (If there are many pending changes, this is (at least with PostgreSQL)
105
+ # much faster.)
106
+ cursor = connection.select_cursor(
107
+ :table => change_log_table,
108
+ :from => {'id' => current_id},
109
+ :exclude_starting_row => true,
110
+ :row_buffer_size => 1
111
+ )
112
+ return unless cursor.next?
113
+
114
+ # Something is here. Let's actually load it.
115
+ cursor = connection.select_cursor(
116
+ :table => change_log_table,
117
+ :from => {'id' => current_id},
118
+ :exclude_starting_row => true,
119
+ :type_cast => true,
120
+ :row_buffer_size => session.configuration.options[:row_buffer_size]
121
+ )
122
+ while cursor.next?
123
+ change = cursor.next_row
124
+ self.current_id = change['id']
125
+ self.change_array << change
126
+ change['array_index'] = self.change_array.size - 1
127
+
128
+ table_change_tree = change_tree[change['change_table']] ||= {}
129
+ key_changes = table_change_tree[change['change_key']] ||= []
130
+ key_changes << change
131
+ end
132
+ cursor.clear
133
+ end
134
+
135
+ # Returns the creation time of the oldest unprocessed change log record.
136
+ def oldest_change_time
137
+ change = oldest_change
138
+ change['change_time'] if change
139
+ end
140
+
141
+ # Returns the oldest unprocessed change log record (column_name => value hash).
142
+ def oldest_change
143
+ update
144
+ oldest_change = nil
145
+ unless change_array.empty?
146
+ while (oldest_change = change_array[self.current_index]) == nil
147
+ self.current_index += 1
148
+ end
149
+ end
150
+ oldest_change
151
+ end
152
+
153
+ # Returns the specified change log record (column_name => value hash).
154
+ # * +change_table+: the name of the table that was changed
155
+ # * +change_key+: the change key of the modified record
156
+ def load(change_table, change_key)
157
+ update
158
+ change = nil
159
+ table_change_tree = change_tree[change_table]
160
+ if table_change_tree
161
+ key_changes = table_change_tree[change_key]
162
+ if key_changes
163
+ # get change object and delete from key_changes
164
+ change = key_changes.shift
165
+
166
+ # delete change from change_array
167
+ change_array[change['array_index']] = nil
168
+
169
+ # delete change from database
170
+ connection.execute "delete from #{change_log_table} where id = #{change['id']}"
171
+
172
+ # delete key_changes if empty
173
+ if key_changes.empty?
174
+ table_change_tree.delete change_key
175
+ end
176
+
177
+ # delete table_change_tree if empty
178
+ if table_change_tree.empty?
179
+ change_tree.delete change_table
180
+ end
181
+
182
+ # reset everything if no more changes remain
183
+ if change_tree.empty?
184
+ init_cache
185
+ end
186
+ end
187
+ end
188
+ change
189
+ end
190
+
191
+ # Returns the name of the change log table
192
+ def change_log_table
193
+ @change_log_table ||= "#{session.configuration.options[:rep_prefix]}_pending_changes"
194
+ end
195
+ private :change_log_table
196
+ end
197
+ end
@@ -0,0 +1,80 @@
1
+ module RR
2
+
3
+ # Wraps an existing cursor.
4
+ # Purpose: send regular updates to the installed TaskSweeper
5
+ class NoisyCursor
6
+ # The original cusor
7
+ attr_accessor :org_cursor
8
+
9
+ # The installed task sweeper
10
+ attr_accessor :sweeper
11
+
12
+ # Create a new NoisyCursor.
13
+ # * cursor: the original cursor
14
+ # * sweeper: the target TaskSweeper
15
+ def initialize(cursor, sweeper)
16
+ self.org_cursor = cursor
17
+ self.sweeper = sweeper
18
+ end
19
+
20
+ # Delegate the uninteresting methods to the original cursor
21
+ def next?; org_cursor.next? end
22
+ def clear; org_cursor.clear end
23
+
24
+ # Returns the row as a column => value hash and moves the cursor to the next row.
25
+ def next_row
26
+ sweeper.ping
27
+ row = org_cursor.next_row
28
+ sweeper.ping
29
+ row
30
+ end
31
+ end
32
+
33
+ # Modifies ProxyConnections to send regular pings to an installed TaskSweeper
34
+ module NoisyConnection
35
+
36
+ # The installed TaskSweeper
37
+ attr_accessor :sweeper
38
+
39
+ # Modifies ProxyConnection#select_cursor to wrap the returned cursor
40
+ # into a NoisyCursor.
41
+ def select_cursor(options)
42
+ sweeper.ping
43
+ org_cursor = super
44
+ sweeper.ping
45
+ NoisyCursor.new(org_cursor, sweeper)
46
+ end
47
+
48
+ # Wraps ProxyConnection#insert_record to update the TaskSweeper
49
+ def insert_record(table, values)
50
+ sweeper.ping
51
+ result = super
52
+ sweeper.ping
53
+ result
54
+ end
55
+
56
+ # Wraps ProxyConnection#update_record to update the TaskSweeper
57
+ def update_record(table, values, org_key = nil)
58
+ sweeper.ping
59
+ result = super
60
+ sweeper.ping
61
+ result
62
+ end
63
+
64
+ # Wraps ProxyConnection#delete_record to update the TaskSweeper
65
+ def delete_record(table, values)
66
+ sweeper.ping
67
+ result = super
68
+ sweeper.ping
69
+ result
70
+ end
71
+
72
+ # Wraps ProxyConnection#commit_db_transaction to update the TaskSweeper
73
+ def commit_db_transaction
74
+ sweeper.ping
75
+ result = super
76
+ sweeper.ping
77
+ result
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,171 @@
1
+ module RR
2
+
3
+ # Scans two tables for differences. Goes through a RubyRep Proxy to minimize network load.
4
+ # Doesn't have any reporting functionality by itself.
5
+ # Instead ProxiedTableScan#run yields all the differences for the caller to do with as it pleases.
6
+ # Usage:
7
+ # 1. Create a new ProxiedTableScan object and hand it all necessary information
8
+ # 2. Call ProxiedTableScan#run to do the actual comparison
9
+ # 3. The block handed to ProxiedTableScan#run receives all differences
10
+ class ProxiedTableScan < TableScan
11
+
12
+ # returns block size to use for table scanning
13
+ def block_size
14
+ @block_size ||= session.configuration.options_for_table(left_table)[:proxy_block_size]
15
+ end
16
+
17
+ # Creates a new ProxiedTableScan instance
18
+ # * session: a Session object representing the current database session
19
+ # * left_table: name of the table in the left database
20
+ # * right_table: name of the table in the right database. If not given, same like left_table
21
+ def initialize(session, left_table, right_table = nil)
22
+ raise "#{self.class.name} only works with proxied sessions" unless session.proxied?
23
+
24
+ super
25
+ end
26
+
27
+ # Compares the specified left and right rows.
28
+ # +left_cursor+ and +right_cursor+ represent the according ProxyBlockCursor objects.
29
+ # Yields all identified differences with
30
+ # * diff_type
31
+ # * row
32
+ # #run described the yield parameters in detail.
33
+ def compare_blocks(left_block_cursor, right_block_cursor)
34
+ left_cursor = right_cursor = nil
35
+
36
+ left_row_checksums = left_block_cursor.row_checksums
37
+ right_row_checksums = right_block_cursor.row_checksums
38
+
39
+ # phase 1: identify the different rows and store their primary keys
40
+ left_diff_rows = []
41
+ left_diff_checksums = []
42
+ right_diff_rows = []
43
+ right_diff_checksums = []
44
+ i = k = 0
45
+ while i < left_row_checksums.size or k < right_row_checksums.size
46
+ left_keys = i < left_row_checksums.size ? left_row_checksums[i][:row_keys] : nil
47
+ right_keys = k < right_row_checksums.size ? right_row_checksums[k][:row_keys] : nil
48
+ rank = rank_rows left_keys, right_keys
49
+ case rank
50
+ when -1
51
+ left_diff_rows << left_keys
52
+ left_diff_checksums << left_row_checksums[i][:checksum]
53
+ i += 1
54
+ when 1
55
+ right_diff_rows << right_keys
56
+ right_diff_checksums << right_row_checksums[k][:checksum]
57
+ k += 1
58
+ when 0
59
+ if left_row_checksums[i][:checksum] != right_row_checksums[k][:checksum]
60
+ left_diff_rows << left_keys
61
+ left_diff_checksums << left_row_checksums[i][:checksum]
62
+ right_diff_rows << right_keys
63
+ right_diff_checksums << right_row_checksums[k][:checksum]
64
+ end
65
+ i += 1
66
+ k += 1
67
+ end
68
+ end
69
+
70
+ # retrieve possibly existing cached rows from the block cursors
71
+ left_row_cache = left_block_cursor.retrieve_row_cache left_diff_checksums
72
+ right_row_cache = right_block_cursor.retrieve_row_cache right_diff_checksums
73
+
74
+ # builds arrays of row keys for rows that were not included in the hash
75
+ left_uncached_rows = []
76
+ left_diff_rows.each_with_index do |row, i|
77
+ left_uncached_rows << row unless left_row_cache[left_diff_checksums[i]]
78
+ end
79
+ right_uncached_rows = []
80
+ right_diff_rows.each_with_index do |row, i|
81
+ right_uncached_rows << row unless right_row_cache[right_diff_checksums[i]]
82
+ end
83
+
84
+ # phase 2: read all different rows and yield them
85
+ unless left_uncached_rows.empty?
86
+ left_cursor = session.left.create_cursor \
87
+ ProxyRowCursor, left_table, :row_keys => left_uncached_rows
88
+ end
89
+ unless right_uncached_rows.empty?
90
+ right_cursor = session.right.create_cursor \
91
+ ProxyRowCursor, right_table, :row_keys => right_uncached_rows
92
+ end
93
+ i = k = 0
94
+ while i < left_diff_rows.size or k < right_diff_rows.size
95
+ rank = rank_rows left_diff_rows[i], right_diff_rows[k]
96
+ case rank
97
+ when -1
98
+ if left_row_cache.include? left_diff_checksums[i]
99
+ row = Marshal.load(left_row_cache[left_diff_checksums[i]])
100
+ else
101
+ row = left_cursor.next_row
102
+ end
103
+ yield :left, row
104
+ i += 1
105
+ when 1
106
+ if right_row_cache.include? right_diff_checksums[k]
107
+ row = Marshal.load(right_row_cache[right_diff_checksums[k]])
108
+ else
109
+ row = right_cursor.next_row
110
+ end
111
+ yield :right, row
112
+ k += 1
113
+ when 0
114
+ if left_row_cache.include? left_diff_checksums[i]
115
+ left_row = Marshal.load(left_row_cache[left_diff_checksums[i]])
116
+ else
117
+ left_row = left_cursor.next_row
118
+ end
119
+ if right_row_cache.include? right_diff_checksums[k]
120
+ right_row = Marshal.load(right_row_cache[right_diff_checksums[k]])
121
+ else
122
+ row = right_cursor.next_row
123
+ end
124
+ yield :conflict, [left_row, right_row]
125
+ i += 1
126
+ k += 1
127
+ end
128
+ end
129
+ ensure
130
+ session.left.destroy_cursor left_cursor if left_cursor
131
+ session.right.destroy_cursor right_cursor if right_cursor
132
+ end
133
+
134
+ # Runs the table scan.
135
+ # Calls the block for every found difference.
136
+ # Differences are yielded with 2 parameters
137
+ # * type: describes the difference, either :left (row only in left table), :right (row only in right table) or :conflict
138
+ # * row: for :left or :right cases a hash describing the row; for :conflict an array of left and right row
139
+ def run(&blck)
140
+ left_cursor = right_cursor = nil
141
+ left_cursor = session.left.create_cursor ProxyBlockCursor, left_table,
142
+ :row_buffer_size => scan_options[:row_buffer_size]
143
+ right_cursor = session.right.create_cursor ProxyBlockCursor, right_table,
144
+ :row_buffer_size => scan_options[:row_buffer_size]
145
+ update_progress 0 # ensures progress bar is printed even if there are no records
146
+ while left_cursor.next?
147
+ left_to, left_checksum, left_progress =
148
+ left_cursor.checksum :proxy_block_size => block_size
149
+ _ , right_checksum, right_progress =
150
+ right_cursor.checksum :max_row => left_to
151
+ combined_progress = left_progress + right_progress
152
+ if left_checksum != right_checksum
153
+ compare_blocks left_cursor, right_cursor do |type, row|
154
+ steps = type == :conflict ? 2 : 1
155
+ update_progress steps
156
+ combined_progress -= steps
157
+ yield type, row
158
+ end
159
+ end
160
+ update_progress combined_progress
161
+ end
162
+ while right_cursor.next?
163
+ update_progress 1
164
+ yield :right, right_cursor.next_row
165
+ end
166
+ ensure
167
+ session.left.destroy_cursor left_cursor if left_cursor
168
+ session.right.destroy_cursor right_cursor if right_cursor
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,145 @@
1
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/..'
2
+
3
+ require 'digest/sha1'
4
+
5
+ require 'rubyrep'
6
+
7
+ module RR
8
+
9
+ # This class is used to scan a table in blocks.
10
+ # Calculates the checksums of the scanned blocks.
11
+ class ProxyBlockCursor < ProxyCursor
12
+
13
+ include TableScanHelper
14
+
15
+ # The current Digest
16
+ attr_accessor :digest
17
+
18
+ # nil if the last run of the checksum method left no unprocessed row.
19
+ # Otherwise the left over row of that checksum run
20
+ attr_accessor :last_row
21
+
22
+ # Returns an array of checksums for each encounters row.
23
+ # Each array element is a Hash with the following elements:
24
+ # * +:row_keys+: A primary key => value hash identifying the row
25
+ # * +:checksum+: the checksum for this row
26
+ attr_accessor :row_checksums
27
+
28
+ # The maximum total size (in bytes) up to which rows will be cached
29
+ attr_accessor :max_row_cache_size
30
+
31
+ # A byte counter of many bytes of row data have already been cached
32
+ attr_accessor :current_row_cache_size
33
+
34
+ # A hash of cached rows consisting of row checksum => row dump pairs.
35
+ attr_accessor :row_cache
36
+
37
+ # Creates a new cursor
38
+ # * session: the current proxy session
39
+ # * table: table_name
40
+ def initialize(session, table)
41
+ self.max_row_cache_size = 1000000 # this size should be sufficient as long as table doesn't contain blobs
42
+ super
43
+ end
44
+
45
+ # Returns true if the current cursor has unprocessed rows
46
+ def next?
47
+ last_row != nil or cursor.next?
48
+ end
49
+
50
+ # Returns the cursor's next row
51
+ def next_row
52
+ if self.last_row
53
+ row, self.last_row = self.last_row, nil
54
+ else
55
+ row = cursor.next_row
56
+ end
57
+ row
58
+ end
59
+
60
+ # Returns a hash of row checksum => row dump pairs for the +checksums+
61
+ # in the provided array
62
+ def retrieve_row_cache(checksums)
63
+ row_dumps = {}
64
+ checksums.each do |checksum|
65
+ row_dumps[checksum] = row_cache[checksum] if row_cache.include? checksum
66
+ end
67
+ row_dumps
68
+ end
69
+
70
+ # Updates block / row checksums and row cache with the given +row+.
71
+ def update_checksum(row)
72
+ dump = Marshal.dump(row)
73
+
74
+ # updates row checksum array
75
+ row_keys = row.reject {|key, | not primary_key_names.include? key}
76
+ checksum = Digest::SHA1.hexdigest(dump)
77
+ self.row_checksums << {:row_keys => row_keys, :checksum => checksum}
78
+
79
+ # update the row cache (unless maximum cache size limit has already been reached)
80
+ if current_row_cache_size + dump.size < max_row_cache_size
81
+ self.current_row_cache_size += dump.size
82
+ row_cache[checksum] = dump
83
+ end
84
+
85
+ # update current total checksum
86
+ self.digest << dump
87
+ end
88
+
89
+ # Reinitializes the row checksum array and the total checksum
90
+ def reset_checksum
91
+ self.row_checksums = []
92
+ self.current_row_cache_size = 0
93
+ self.row_cache = {}
94
+ self.digest = Digest::SHA1.new
95
+ end
96
+
97
+ # Returns the current checksum
98
+ def current_checksum
99
+ self.digest.hexdigest
100
+ end
101
+
102
+ # Calculates the checksum from the current row up to the row specified by options.
103
+ # options is a hash including either
104
+ # * :+proxy_block_size+: The number of rows to scan.
105
+ # * :+max_row+: A row hash of primary key columns specifying the maximum record to scan.
106
+ # Returns multiple parameters:
107
+ # * last row read
108
+ # * checksum
109
+ # * number of processed records
110
+ def checksum(options = {})
111
+ reset_checksum
112
+ return_row = row = nil
113
+ row_count = 0
114
+
115
+ if options.include? :proxy_block_size
116
+ block_size = options[:proxy_block_size]
117
+ raise ":proxy_block_size must be greater than 0" unless block_size > 0
118
+ while row_count < block_size and next?
119
+ row = next_row
120
+ update_checksum(row)
121
+ row_count += 1
122
+ end
123
+ return_row = row
124
+ elsif options.include? :max_row
125
+ max_row = options[:max_row]
126
+ while next?
127
+ row = next_row
128
+ rank = rank_rows row, max_row
129
+ if rank > 0
130
+ # row > max_row ==> save the current row and break off
131
+ self.last_row = row
132
+ break
133
+ end
134
+ row_count += 1
135
+ update_checksum(row)
136
+ return_row, row = row, nil
137
+ end
138
+ else
139
+ raise "options must include either :proxy_block_size or :max_row"
140
+ end
141
+ return_keys = return_row.reject {|key, | not primary_key_names.include? key} if return_row
142
+ return return_keys, current_checksum, row_count
143
+ end
144
+ end
145
+ end