jetpants 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,131 @@
1
+ module Jetpants
2
+ # Exception class used to halt further processing in callback chain. See
3
+ # description in CallbackHandler.
4
+ class CallbackAbortError < StandardError; end
5
+
6
+ # If you include CallbackHandler as a mix-in, it grants the base class support
7
+ # for Jetpants callbacks, as defined here:
8
+ #
9
+ # If you invoke a method "foo", Jetpants will first
10
+ # automatically call any "before_foo" methods that exist in the class or its
11
+ # superclasses. You can even define multiple methods named before_foo (in the
12
+ # same class!) and they will each be called. In other words, Jetpants
13
+ # callbacks "stack" instead of overriding each other.
14
+ #
15
+ # After calling any/all before_foo methods, the foo method is called, followed
16
+ # by all after_foo methods in the same manner.
17
+ #
18
+ # If any before_foo method raises a CallbackAbortError, subsequent before_foo
19
+ # methods will NOT be called, NOR will foo itself nor any after_foo methods.
20
+ #
21
+ # If any after_foo method raises a CallbackAbortError, subsequent after_foo
22
+ # methods will NOT be called.
23
+ #
24
+ # You may preceed the definition of a callback method with "callback_priority 123"
25
+ # to set an explicit priority (higher = called first) for subsequent callbacks.
26
+ # The default priority is 100.
27
+ module CallbackHandler
28
+ def self.included(base)
29
+ base.class_eval do
30
+ class << self
31
+ # Set the priority (higher = called first) for any subsequent callbacks defined in the current class.
32
+ def callback_priority(value)
33
+ @callback_priority = value
34
+ end
35
+
36
+ def method_added(name)
37
+ # Intercept before_* and after_* methods and create corresponding Callback objects
38
+ if name.to_s.start_with? 'before_', 'after_'
39
+ Callback.new self, name.to_s.split('_', 2)[1].to_sym, name.to_s.split('_', 2)[0].to_sym, @callback_priority
40
+
41
+ # Intercept redefinitions of methods we've already wrapped, so we can
42
+ # wrap them again
43
+ elsif Callback.wrapped? self, name
44
+ Callback.wrap_method self, name
45
+ end
46
+ end
47
+ end
48
+
49
+ # Default priority for callbacks is 100
50
+ @callback_priority = 100
51
+ end
52
+ end
53
+ end
54
+
55
+ # Generic representation of a before-method or after-method callback.
56
+ # Used internally by CallbackHandler; you won't need to interact with Callback directly.
57
+ class Callback
58
+ @@all_callbacks = {} # hash of class obj -> method_name symbol -> type string -> array of callbacks
59
+ @@currently_wrapping = {} # hash of class obj -> method_name symbol -> bool
60
+
61
+ attr_reader :for_class # class object
62
+ attr_reader :method_name # symbol containing method name (the one being callback-wrapped)
63
+ attr_reader :type # :before or :after
64
+ attr_reader :priority # high numbers get triggered first
65
+ attr_reader :my_alias # method name alias OF THE CALLBACK
66
+
67
+ def initialize(for_class, method_name, type=:after, priority=100)
68
+ @for_class = for_class
69
+ @method_name = method_name
70
+ @type = type
71
+ @priority = priority
72
+
73
+ @@all_callbacks[for_class] ||= {}
74
+ @@all_callbacks[for_class][method_name] ||= {}
75
+ already_wrapped = Callback.wrapped?(for_class, method_name)
76
+ @@all_callbacks[for_class][method_name][type] ||= []
77
+
78
+ next_method_id = @@all_callbacks[for_class][method_name][type].count + 1
79
+ old_name = "#{type.to_s}_#{method_name.to_s}".to_sym
80
+ @my_alias = new_name = ("real_callback_#{old_name}_" + for_class.to_s.sub('::', '_') + "_#{next_method_id}").to_sym
81
+ for_class.class_eval do
82
+ alias_method new_name, old_name
83
+ end
84
+ Callback.wrap_method(for_class, method_name) unless already_wrapped
85
+
86
+ @@all_callbacks[for_class][method_name][type] << self
87
+ end
88
+
89
+ def self.wrap_method(for_class, method_name)
90
+ @@currently_wrapping[for_class] ||= {}
91
+ @@currently_wrapping[for_class][method_name] ||= false
92
+ return if @@currently_wrapping[for_class][method_name] # prevent infinite recursion from the alias_method call
93
+ @@currently_wrapping[for_class][method_name] = true
94
+
95
+ for_class.class_eval do
96
+ alias_method "#{method_name}_without_callbacks".to_sym, method_name
97
+ define_method method_name do |*args|
98
+ begin
99
+ Callback.trigger(self, method_name, :before, *args)
100
+ rescue CallbackAbortError
101
+ return
102
+ end
103
+ result = send "#{method_name}_without_callbacks".to_sym, *args
104
+ begin
105
+ Callback.trigger(self, method_name, :after, *args)
106
+ rescue CallbackAbortError
107
+ end
108
+ result
109
+ end
110
+ end
111
+
112
+ @@currently_wrapping[for_class][method_name] = false
113
+ end
114
+
115
+ def self.trigger(for_object, method_name, type, *args)
116
+ my_callbacks = []
117
+ for_object.class.ancestors.each do |for_class|
118
+ if @@all_callbacks[for_class] && @@all_callbacks[for_class][method_name] && @@all_callbacks[for_class][method_name][type]
119
+ my_callbacks.concat(@@all_callbacks[for_class][method_name][type])
120
+ end
121
+ end
122
+ my_callbacks.sort_by! {|c| -1 * c.priority}
123
+ my_callbacks.each {|c| for_object.send(c.my_alias, *args)}
124
+ end
125
+
126
+ def self.wrapped?(for_class, method_name)
127
+ return false unless @@all_callbacks[for_class] && @@all_callbacks[for_class][method_name]
128
+ @@all_callbacks[for_class][method_name].count > 0
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,122 @@
1
+ require 'sequel'
2
+ require 'json'
3
+
4
+ module Jetpants
5
+
6
+ # A Jetpants::DB is a specific mysql instance running on a particular IP and port.
7
+ # It also contains a Jetpants::Host object corresponding to the IP; any missing
8
+ # method calls get delegated to the Host.
9
+ #
10
+ # This class has been split across several files due to its size. Please see
11
+ # lib/jetpants/db/*.rb for the bulk of its logic, which has been divided along
12
+ # functional lines.
13
+ class DB
14
+ include CallbackHandler
15
+
16
+ # IP address (as a string) of the MySQL instance
17
+ attr_reader :ip
18
+
19
+ # Port number of the MySQL instance. The base Jetpants implementation only supports
20
+ # port 3306, since this is necessary to crawl a replication hierarchy using SHOW
21
+ # PROCESSLIST, which does not include slave port numbers. However, plugins may
22
+ # override this behavior to support nonstandard ports and multi-instance-per-host
23
+ # topologies.
24
+ attr_reader :port
25
+
26
+ # Jetpants::Host object that this MySQL instance runs on.
27
+ attr_reader :host
28
+
29
+ # We keep track of DB instances to prevent DB.new from every returning
30
+ # duplicates.
31
+ @@all_dbs = {}
32
+ @@all_dbs_mutex = Mutex.new
33
+
34
+ # Because this class is rather large, methods have been grouped together
35
+ # and moved to separate files in lib/jetpants/db. We load these all now.
36
+ # They each just re-open the DB class and add some methods.
37
+ Dir[File.join File.dirname(__FILE__), 'db', '*'].each {|f| require f}
38
+
39
+ # We override DB.new so that attempting to create a duplicate DB object
40
+ # (that is, one with the same IP and port as an existing DB object)
41
+ # returns the original object.
42
+ def self.new(ip, port=3306)
43
+ ip, embedded_port = ip.split(':', 2)
44
+ port = embedded_port.to_i if embedded_port
45
+ addr = "#{ip}:#{port}"
46
+ @@all_dbs_mutex.synchronize do
47
+ @@all_dbs[addr] = nil unless @@all_dbs[addr].is_a? self
48
+ @@all_dbs[addr] ||= super
49
+ end
50
+ end
51
+
52
+ def initialize(ip, port=3306)
53
+ @ip, @port = ip, port.to_i
54
+ @user = false # connections will default to app user
55
+ @master = nil
56
+ @slaves = nil
57
+ @repl_paused = nil
58
+ @running = nil
59
+ @host = Host.new(ip)
60
+ end
61
+
62
+
63
+ ###### Host methods ########################################################
64
+
65
+ # Jetpants::DB delegates missing methods to its Jetpants::Host.
66
+ def method_missing(name, *args, &block)
67
+ if @host.respond_to? name
68
+ @host.send name, *args, &block
69
+ else
70
+ super
71
+ end
72
+ end
73
+
74
+ # Alters respond_to? logic to account for delegation of missing methods
75
+ # to the instance's Host.
76
+ def respond_to?(name, include_private=false)
77
+ super || @host.respond_to?(name)
78
+ end
79
+
80
+ # Returns true if the supplied Jetpants::DB is on the same Jetpants::Host
81
+ # as self.
82
+ def same_host_as?(db)
83
+ @ip == db.ip
84
+ end
85
+
86
+ ###### Misc methods ########################################################
87
+
88
+ # Displays the provided output, along with information about the current time,
89
+ # self, and optionally a Jetpants::Table name.
90
+ def output(str, table=nil)
91
+ str = str.to_s.strip
92
+ str = nil if str && str.length == 0
93
+ str ||= "Completed (no output)"
94
+ output = Time.now.strftime("%H:%M:%S") + " [#{self}] "
95
+ output << table.name << ': ' if table
96
+ output << str
97
+ print output + "\n"
98
+ output
99
+ end
100
+
101
+ # DB objects are sorted as strings, ie, by calling to_s
102
+ def <=> other
103
+ to_s <=> other.to_s
104
+ end
105
+
106
+ # Returns a string in the form "ip:port"
107
+ def to_s
108
+ "#{@ip}:#{@port}"
109
+ end
110
+
111
+ # Returns self, since self is already a Jetpants::DB.
112
+ def to_db
113
+ self
114
+ end
115
+
116
+ # Returns the instance's Jetpants::Host.
117
+ def to_host
118
+ @host
119
+ end
120
+
121
+ end
122
+ end
@@ -0,0 +1,103 @@
1
+ module Jetpants
2
+
3
+ #--
4
+ # Connection and query methods ###############################################
5
+ #++
6
+
7
+ class DB
8
+ # Runs the provided SQL statement as root, and returns the response as a single string.
9
+ # Available options:
10
+ # :terminator:: how to terminate the query, such as '\G' or ';'. (default: '\G')
11
+ # :parse:: parse a single-row, vertical-format result (:terminator must be '\G') and return it as a hash
12
+ # :attempts:: by default, queries will be attempted up to 3 times. set this to 0 or false for non-idempotent queries.
13
+ def mysql_root_cmd(cmd, options={})
14
+ terminator = options[:terminator] || '\G'
15
+ attempts = (options[:attempts].nil? ? 3 : (options[:attempts].to_i || 1))
16
+ failures = 0
17
+
18
+ begin
19
+ raise "MySQL is not running" unless running?
20
+ supply_root_pw = (Jetpants.mysql_root_password ? "-p#{Jetpants.mysql_root_password}" : '')
21
+ supply_port = (@port == 3306 ? '' : "-h 127.0.0.1 -P #{@port}")
22
+ real_cmd = %Q{mysql #{supply_root_pw} #{supply_port} -ss -e "#{cmd}#{terminator}" #{Jetpants.mysql_schema}}
23
+ real_cmd.untaint
24
+ result = ssh_cmd!(real_cmd)
25
+ raise result if result && result.downcase.start_with?('error ')
26
+ result = parse_vertical_result(result) if options[:parse] && terminator == '\G'
27
+ return result
28
+ rescue => ex
29
+ failures += 1
30
+ raise if failures >= attempts
31
+ output "Root query \"#{cmd}\" failed: #{ex.message}, re-trying after delay"
32
+ sleep 3 * failures
33
+ retry
34
+ end
35
+ end
36
+
37
+ # Returns a Sequel database object
38
+ def mysql
39
+ return @db if @db
40
+ @db = Sequel.connect(
41
+ :adapter => 'mysql2',
42
+ :host => @ip,
43
+ :port => @port,
44
+ :user => @user || Jetpants.app_credentials[:user],
45
+ :password => Jetpants.app_credentials[:pass],
46
+ :database => Jetpants.mysql_schema,
47
+ :max_connections => Jetpants.max_concurrency)
48
+ end
49
+ alias init_db_connection_pool mysql
50
+
51
+ # Closes existing mysql connection pool and opens a new one. Useful when changing users.
52
+ # Supply a new user name as the param, or nothing/false to keep old user name, or
53
+ # a literal true value to switch to the default app user in Jetpants configuration
54
+ def reconnect(new_user=false)
55
+ @user = (new_user == true ? Jetpants.app_credentials[:user] : new_user) if new_user
56
+ if @db
57
+ @db.disconnect rescue nil
58
+ @db = nil
59
+ end
60
+ init_db_connection_pool
61
+ end
62
+
63
+ # Execute a write (INSERT, UPDATE, DELETE, REPLACE, etc) query.
64
+ # If the query is an INSERT, returns the last insert ID (if an auto_increment
65
+ # column is involved). Otherwise returns the number of affected rows.
66
+ def query(sql, *binds)
67
+ ds = mysql.fetch(sql, *binds)
68
+ mysql.execute_dui(ds.update_sql) {|c| return c.last_id > 0 ? c.last_id : c.affected_rows}
69
+ end
70
+
71
+ # Execute a read (SELECT) query. Returns an array of hashes.
72
+ def query_return_array(sql, *binds)
73
+ mysql.fetch(sql, *binds).all
74
+ end
75
+
76
+ # Execute a read (SELECT) query. Returns a hash of the first row only.
77
+ def query_return_first(sql, *binds)
78
+ mysql.fetch(sql, *binds).first
79
+ end
80
+
81
+ # Execute a read (SELECT) query. Returns the value of the first column of the first row only.
82
+ def query_return_first_value(sql, *binds)
83
+ mysql.fetch(sql, *binds).single_value
84
+ end
85
+
86
+ # Parses the result of a MySQL query run with a \G terminator. Useful when
87
+ # interacting with MySQL via the command-line client (for secure access to
88
+ # the root user) instead of via the MySQL protocol.
89
+ def parse_vertical_result(text)
90
+ results = {}
91
+ return results unless text
92
+ raise text.chomp if text =~ /^ERROR/
93
+ lines = text.split("\n")
94
+ lines.each do |line|
95
+ col, val = line.split ':'
96
+ next unless val
97
+ results[col.strip.downcase.to_sym] = val.strip
98
+ end
99
+ results
100
+ end
101
+
102
+ end
103
+ end
@@ -0,0 +1,330 @@
1
+ module Jetpants
2
+
3
+ #--
4
+ # Import, export, and data set methods #######################################
5
+ #++
6
+
7
+ class DB
8
+ # Exports the DROP TABLE + CREATE TABLE statements for the given tables via mysqldump
9
+ def export_schemata(tables)
10
+ output 'Exporting table definitions'
11
+ supply_root_pw = (Jetpants.mysql_root_password ? "-p#{Jetpants.mysql_root_password}" : '')
12
+ supply_port = (@port == 3306 ? '' : "-h 127.0.0.1 -P #{@port}")
13
+ cmd = "mysqldump #{supply_root_pw} #{supply_port} -d #{Jetpants.mysql_schema} " + tables.join(' ') + " >#{Jetpants.export_location}/create_tables_#{@port}.sql"
14
+ cmd.untaint
15
+ result = ssh_cmd(cmd)
16
+ output result
17
+ end
18
+
19
+ # Executes a .sql file previously created via export_schemata.
20
+ # Warning: this will DESTROY AND RECREATE any tables contained in the file.
21
+ # DO NOT USE ON A DATABASE THAT CONTAINS REAL DATA!!! This method doesn't
22
+ # check first! The statements will replicate to any slaves! PROCEED WITH
23
+ # CAUTION IF RUNNING THIS MANUALLY!
24
+ def import_schemata!
25
+ output 'Dropping and re-creating table definitions'
26
+ result = mysql_root_cmd "source #{Jetpants.export_location}/create_tables_#{@port}.sql", :terminator => ''
27
+ output result
28
+ end
29
+
30
+ # Has no built-in effect. Plugins can override this and/or use before_alter_schemata
31
+ # and after_alter_schemata callbacks to provide an implementation.
32
+ # Also sometimes useful to override this as a singleton method on specific DB objects
33
+ # in a migration script.
34
+ def alter_schemata
35
+ end
36
+
37
+ # Exports data for the supplied tables. If min/max ID supplied, only exports
38
+ # data where at least one of the table's sharding keys falls within this range.
39
+ # Creates a 'jetpants' db user with FILE permissions for the duration of the
40
+ # export.
41
+ def export_data(tables, min_id=false, max_id=false)
42
+ pause_replication if @master && ! @repl_paused
43
+ import_export_user = 'jetpants'
44
+ create_user(import_export_user)
45
+ grant_privileges(import_export_user) # standard privs
46
+ grant_privileges(import_export_user, '*', 'FILE') # FILE global privs
47
+ reconnect(import_export_user)
48
+ @counts ||= {}
49
+ tables.each {|t| @counts[t.name] = export_table_data t, min_id, max_id}
50
+ ensure
51
+ reconnect(true) # switches back to default app user
52
+ drop_user(import_export_user)
53
+ end
54
+
55
+ # Exports data for a table. Only includes the data subset that falls
56
+ # within min_id and max_id. The export files will be located according
57
+ # to the export_location configuration setting.
58
+ # Returns the number of rows exported.
59
+ def export_table_data(table, min_id=false, max_id=false)
60
+ unless min_id && max_id && table.chunks > 0
61
+ output "Exporting all data", table
62
+ rows_exported = query(table.sql_export_all)
63
+ output "#{rows_exported} rows exported", table
64
+ return rows_exported
65
+ end
66
+
67
+ output "Exporting data for ID range #{min_id}..#{max_id}", table
68
+ lock = Mutex.new
69
+ rows_exported = 0
70
+ chunks_completed = 0
71
+
72
+ (min_id..max_id).in_chunks(table.chunks) do |min, max|
73
+ attempts = 0
74
+ begin
75
+ sql = table.sql_export_range(min, max)
76
+ result = query sql
77
+ lock.synchronize do
78
+ rows_exported += result
79
+ chunks_completed += 1
80
+ percent_finished = 100 * chunks_completed / table.chunks
81
+ output("Export #{percent_finished}% complete.", table) if table.chunks >= 40 && chunks_completed % 20 == 0
82
+ end
83
+ rescue => ex
84
+ if attempts >= 10
85
+ output "EXPORT ERROR: #{ex.message}, chunk #{min}-#{max}, giving up", table
86
+ raise
87
+ end
88
+ attempts += 1
89
+ output "EXPORT ERROR: #{ex.message}, chunk #{min}-#{max}, attempt #{attempts}, re-trying after delay", table
90
+ ssh_cmd("rm -f " + table.export_file_path(min, max))
91
+ sleep(1.0 * attempts)
92
+ retry
93
+ end
94
+ end
95
+ output "#{rows_exported} rows exported", table
96
+ rows_exported
97
+ end
98
+
99
+ # Imports data for a table that was previously exported using export_data.
100
+ # Only includes the data subset that falls within min_id and max_id. If
101
+ # run after export_data (in the same process), import_data will
102
+ # automatically confirm that the import counts match the previous export
103
+ # counts.
104
+ # Creates a 'jetpants' db user with FILE permissions for the duration of the
105
+ # import.
106
+ def import_data(tables, min_id=false, max_id=false)
107
+ import_export_user = 'jetpants'
108
+ create_user(import_export_user)
109
+ grant_privileges(import_export_user) # standard privs
110
+ grant_privileges(import_export_user, '*', 'FILE') # FILE global privs
111
+ reconnect(import_export_user)
112
+
113
+ import_counts = {}
114
+ tables.each {|t| import_counts[t.name] = import_table_data t, min_id, max_id}
115
+
116
+ # Verify counts
117
+ @counts ||= {}
118
+ @counts.each do |name, exported|
119
+ if exported == import_counts[name]
120
+ output "Verified import count matches export count for table #{name}"
121
+ else
122
+ raise "Import count (#{import_counts[name]}) does not match export count (#{exported}) for table #{name}"
123
+ end
124
+ end
125
+
126
+ ensure
127
+ reconnect(true) # switches back to default app user
128
+ drop_user(import_export_user)
129
+ end
130
+
131
+ # Imports the data subset previously dumped thorugh export_data.
132
+ # Returns number of rows imported.
133
+ def import_table_data(table, min_id=false, max_id=false)
134
+ unless min_id && max_id && table.chunks > 0
135
+ output "Importing all data", table
136
+ rows_imported = query(table.sql_import_all)
137
+ output "#{rows_imported} rows imported", table
138
+ return rows_imported
139
+ end
140
+
141
+ output "Importing data for ID range #{min_id}..#{max_id}", table
142
+ lock = Mutex.new
143
+ rows_imported = 0
144
+ chunks_completed = 0
145
+
146
+ (min_id..max_id).in_chunks(table.chunks) do |min, max|
147
+ attempts = 0
148
+ begin
149
+ sql = table.sql_import_range(min, max)
150
+ result = query sql
151
+ lock.synchronize do
152
+ rows_imported += result
153
+ chunks_completed += 1
154
+ percent_finished = 100 * chunks_completed / table.chunks
155
+ output("Import #{percent_finished}% complete.", table) if table.chunks >= 40 && chunks_completed % 20 == 0
156
+ chunk_file_name = table.export_file_path(min, max)
157
+ ssh_cmd "rm -f #{chunk_file_name}"
158
+ end
159
+ rescue => ex
160
+ if attempts >= 10
161
+ output "IMPORT ERROR: #{ex.message}, chunk #{min}-#{max}, giving up", table
162
+ raise
163
+ end
164
+ attempts += 1
165
+ output "IMPORT ERROR: #{ex.message}, chunk #{min}-#{max}, attempt #{attempts}, re-trying after delay", table
166
+ sleep(3.0 * attempts)
167
+ retry
168
+ end
169
+ end
170
+ output "#{rows_imported} rows imported", table
171
+ rows_imported
172
+ end
173
+
174
+ # Counts rows falling between min_id and max_id for the supplied tables.
175
+ # Returns a hash mapping table names to counts.
176
+ # Note: runs 10 concurrent queries to perform the count quickly. This is
177
+ # MUCH faster than doing a single count, but far more I/O intensive, so
178
+ # don't use this on a master or active slave.
179
+ def row_counts(tables, min_id, max_id)
180
+ lock = Mutex.new
181
+ row_count = {}
182
+ tables.each do |t|
183
+ row_count[t.name] = 0
184
+ (min_id..max_id).in_chunks(t.chunks, 10) do |min, max|
185
+ result = query_return_first_value(t.sql_count_rows(min, max))
186
+ lock.synchronize {row_count[t.name] += result}
187
+ end
188
+ output "#{row_count[t.name]} rows counted", t
189
+ end
190
+ row_count
191
+ end
192
+
193
+ # Cleans up all rows that should no longer be on this db.
194
+ # Supply the ID range (in terms of the table's sharding key)
195
+ # of rows to KEEP.
196
+ def prune_data_to_range(tables, keep_min_id, keep_max_id)
197
+ reconnect(true)
198
+ tables.each do |t|
199
+ output "Cleaning up data, pruning to only keep range #{keep_min_id}-#{keep_max_id}", t
200
+ rows_deleted = 0
201
+ [:asc, :desc].each {|direction| rows_deleted += delete_table_data_outside_range(t, keep_min_id, keep_max_id, direction)}
202
+ output "Done cleanup; #{rows_deleted} rows deleted", t
203
+ end
204
+ end
205
+
206
+ # Helper method used by prune_data_to_range. Deletes data for the given table that falls
207
+ # either below the supplied keep_min_id (if direction is :desc) or falls above the
208
+ # supplied keep_max_id (if direction is :asc).
209
+ def delete_table_data_outside_range(table, keep_min_id, keep_max_id, direction)
210
+ rows_deleted = 0
211
+
212
+ if direction == :asc
213
+ dir_english = "Ascending"
214
+ boundary = keep_max_id
215
+ output "Removing rows with ID > #{boundary}", table
216
+ elsif direction == :desc
217
+ dir_english = "Descending"
218
+ boundary = keep_min_id
219
+ output "Removing rows with ID < #{boundary}", table
220
+ else
221
+ raise "Unknown order parameter #{order}"
222
+ end
223
+
224
+ table.sharding_keys.each do |col|
225
+ deleter_sql = table.sql_cleanup_delete(col, keep_min_id, keep_max_id)
226
+
227
+ id = boundary
228
+ iter = 0
229
+ while id do
230
+ finder_sql = table.sql_cleanup_next_id(col, id, direction)
231
+ id = query_return_first_value(finder_sql)
232
+ break unless id
233
+ rows_deleted += query(deleter_sql, id)
234
+ iter += 1
235
+ output("#{dir_english} deletion progress: through #{col} #{id}, deleted #{rows_deleted} rows so far", table) if iter % 50000 == 0
236
+ end
237
+ end
238
+ rows_deleted
239
+ end
240
+
241
+ # Exports and re-imports data for the specified tables, optionally bounded by the
242
+ # given range. Useful for defragmenting a node. Also useful for doing fast schema
243
+ # alterations, if alter_schemata (or its callbacks) has been implemented.
244
+ #
245
+ # You can omit all params for a shard, in which case the method will use the list
246
+ # of sharded tables in the Jetpants config file, and will use the shard's min and
247
+ # max ID.
248
+ def rebuild!(tables=false, min_id=false, max_id=false)
249
+ raise "Cannot rebuild an active node" unless is_standby? || for_backups?
250
+
251
+ p = pool
252
+ if p.is_a?(Shard)
253
+ tables ||= Table.from_config 'sharded_tables'
254
+ min_id ||= p.min_id
255
+ max_id ||= p.max_id if p.max_id != 'INFINITY'
256
+ end
257
+ raise "No tables supplied" unless tables && tables.count > 0
258
+
259
+ disable_monitoring
260
+ stop_query_killer
261
+ disable_binary_logging
262
+ restart_mysql
263
+ reconnect
264
+ pause_replication if is_slave?
265
+
266
+ # Automatically detect missing min/max. Assumes that all tables' primary keys
267
+ # are on the same scale, so this may be non-ideal, but better than just erroring.
268
+ unless min_id
269
+ tables.each do |t|
270
+ my_min = query_return_first_value "SELECT MIN(#{t.sharding_keys[0]}) FROM #{t.name}"
271
+ min_id = my_min if !min_id || my_min < min_id
272
+ end
273
+ end
274
+ unless max_id
275
+ @found_max_ids = {} # we store the detected maxes in case DB#alter_schemata needs them later
276
+ tables.each do |t|
277
+ my_max = @found_max_ids[t.name] = query_return_first_value("SELECT MAX(#{t.sharding_keys[0]}) FROM #{t.name}")
278
+ max_id = my_max if !max_id || my_max > max_id
279
+ end
280
+ end
281
+
282
+ export_schemata tables
283
+ export_data tables, min_id, max_id
284
+ import_schemata!
285
+ alter_schemata if respond_to? :alter_schemata
286
+ import_data tables, min_id, max_id
287
+
288
+ resume_replication if is_slave?
289
+ enable_binary_logging
290
+ restart_mysql
291
+ catch_up_to_master
292
+ start_query_killer
293
+ enable_monitoring
294
+ end
295
+
296
+ # Returns the data set size in bytes (if in_gb is false or omitted) or in gigabytes
297
+ # (if in_gb is true). Note that this is actually in gibibytes (2^30) rather than
298
+ # a metric gigabyte. This puts it on the same scale as the output to tools like
299
+ # "du -h" and "df -h".
300
+ def data_set_size(in_gb=false)
301
+ bytes = dir_size("#{mysql_directory}/#{Jetpants.mysql_schema}")
302
+ in_gb ? (bytes / 1073741824.0).round : bytes
303
+ end
304
+
305
+ # Copies mysql db files from self to one or more additional DBs.
306
+ # WARNING: temporarily shuts down mysql on self, and WILL OVERWRITE CONTENTS
307
+ # OF MYSQL DIRECTORY ON TARGETS. Confirms first that none of the targets
308
+ # have over 100MB of data in the schema directory or in ibdata1.
309
+ # MySQL is restarted on source and targets afterwards.
310
+ def clone_to!(*targets)
311
+ targets.flatten!
312
+ raise "Cannot clone an instance onto its master" if master && targets.include?(master)
313
+ destinations = {}
314
+ targets.each do |t|
315
+ destinations[t] = t.mysql_directory
316
+ existing_size = t.data_set_size + t.dir_size("#{t.mysql_directory}/ibdata1")
317
+ raise "Over 100 MB of existing MySQL data on target #{t}, aborting copy!" if existing_size > 100000000
318
+ end
319
+ [self, targets].flatten.concurrent_each {|t| t.stop_query_killer; t.stop_mysql}
320
+ targets.concurrent_each {|t| t.ssh_cmd "rm -rf #{t.mysql_directory}/ib_logfile*"}
321
+ fast_copy_chain(mysql_directory,
322
+ destinations,
323
+ port: 3306,
324
+ files: ['ibdata1', 'mysql', 'test', Jetpants.mysql_schema],
325
+ overwrite: true)
326
+ [self, targets].flatten.concurrent_each {|t| t.start_mysql; t.start_query_killer}
327
+ end
328
+
329
+ end
330
+ end