jetpants 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,131 @@
1
+ module Jetpants
2
+ # Exception class used to halt further processing in callback chain. See
3
+ # description in CallbackHandler.
4
+ class CallbackAbortError < StandardError; end
5
+
6
+ # If you include CallbackHandler as a mix-in, it grants the base class support
7
+ # for Jetpants callbacks, as defined here:
8
+ #
9
+ # If you invoke a method "foo", Jetpants will first
10
+ # automatically call any "before_foo" methods that exist in the class or its
11
+ # superclasses. You can even define multiple methods named before_foo (in the
12
+ # same class!) and they will each be called. In other words, Jetpants
13
+ # callbacks "stack" instead of overriding each other.
14
+ #
15
+ # After calling any/all before_foo methods, the foo method is called, followed
16
+ # by all after_foo methods in the same manner.
17
+ #
18
+ # If any before_foo method raises a CallbackAbortError, subsequent before_foo
19
+ # methods will NOT be called, NOR will foo itself nor any after_foo methods.
20
+ #
21
+ # If any after_foo method raises a CallbackAbortError, subsequent after_foo
22
+ # methods will NOT be called.
23
+ #
24
+ # You may preceed the definition of a callback method with "callback_priority 123"
25
+ # to set an explicit priority (higher = called first) for subsequent callbacks.
26
+ # The default priority is 100.
27
+ module CallbackHandler
28
+ def self.included(base)
29
+ base.class_eval do
30
+ class << self
31
+ # Set the priority (higher = called first) for any subsequent callbacks defined in the current class.
32
+ def callback_priority(value)
33
+ @callback_priority = value
34
+ end
35
+
36
+ def method_added(name)
37
+ # Intercept before_* and after_* methods and create corresponding Callback objects
38
+ if name.to_s.start_with? 'before_', 'after_'
39
+ Callback.new self, name.to_s.split('_', 2)[1].to_sym, name.to_s.split('_', 2)[0].to_sym, @callback_priority
40
+
41
+ # Intercept redefinitions of methods we've already wrapped, so we can
42
+ # wrap them again
43
+ elsif Callback.wrapped? self, name
44
+ Callback.wrap_method self, name
45
+ end
46
+ end
47
+ end
48
+
49
+ # Default priority for callbacks is 100
50
+ @callback_priority = 100
51
+ end
52
+ end
53
+ end
54
+
55
+ # Generic representation of a before-method or after-method callback.
56
+ # Used internally by CallbackHandler; you won't need to interact with Callback directly.
57
+ class Callback
58
+ @@all_callbacks = {} # hash of class obj -> method_name symbol -> type string -> array of callbacks
59
+ @@currently_wrapping = {} # hash of class obj -> method_name symbol -> bool
60
+
61
+ attr_reader :for_class # class object
62
+ attr_reader :method_name # symbol containing method name (the one being callback-wrapped)
63
+ attr_reader :type # :before or :after
64
+ attr_reader :priority # high numbers get triggered first
65
+ attr_reader :my_alias # method name alias OF THE CALLBACK
66
+
67
+ def initialize(for_class, method_name, type=:after, priority=100)
68
+ @for_class = for_class
69
+ @method_name = method_name
70
+ @type = type
71
+ @priority = priority
72
+
73
+ @@all_callbacks[for_class] ||= {}
74
+ @@all_callbacks[for_class][method_name] ||= {}
75
+ already_wrapped = Callback.wrapped?(for_class, method_name)
76
+ @@all_callbacks[for_class][method_name][type] ||= []
77
+
78
+ next_method_id = @@all_callbacks[for_class][method_name][type].count + 1
79
+ old_name = "#{type.to_s}_#{method_name.to_s}".to_sym
80
+ @my_alias = new_name = ("real_callback_#{old_name}_" + for_class.to_s.sub('::', '_') + "_#{next_method_id}").to_sym
81
+ for_class.class_eval do
82
+ alias_method new_name, old_name
83
+ end
84
+ Callback.wrap_method(for_class, method_name) unless already_wrapped
85
+
86
+ @@all_callbacks[for_class][method_name][type] << self
87
+ end
88
+
89
+ def self.wrap_method(for_class, method_name)
90
+ @@currently_wrapping[for_class] ||= {}
91
+ @@currently_wrapping[for_class][method_name] ||= false
92
+ return if @@currently_wrapping[for_class][method_name] # prevent infinite recursion from the alias_method call
93
+ @@currently_wrapping[for_class][method_name] = true
94
+
95
+ for_class.class_eval do
96
+ alias_method "#{method_name}_without_callbacks".to_sym, method_name
97
+ define_method method_name do |*args|
98
+ begin
99
+ Callback.trigger(self, method_name, :before, *args)
100
+ rescue CallbackAbortError
101
+ return
102
+ end
103
+ result = send "#{method_name}_without_callbacks".to_sym, *args
104
+ begin
105
+ Callback.trigger(self, method_name, :after, *args)
106
+ rescue CallbackAbortError
107
+ end
108
+ result
109
+ end
110
+ end
111
+
112
+ @@currently_wrapping[for_class][method_name] = false
113
+ end
114
+
115
+ def self.trigger(for_object, method_name, type, *args)
116
+ my_callbacks = []
117
+ for_object.class.ancestors.each do |for_class|
118
+ if @@all_callbacks[for_class] && @@all_callbacks[for_class][method_name] && @@all_callbacks[for_class][method_name][type]
119
+ my_callbacks.concat(@@all_callbacks[for_class][method_name][type])
120
+ end
121
+ end
122
+ my_callbacks.sort_by! {|c| -1 * c.priority}
123
+ my_callbacks.each {|c| for_object.send(c.my_alias, *args)}
124
+ end
125
+
126
+ def self.wrapped?(for_class, method_name)
127
+ return false unless @@all_callbacks[for_class] && @@all_callbacks[for_class][method_name]
128
+ @@all_callbacks[for_class][method_name].count > 0
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,122 @@
1
+ require 'sequel'
2
+ require 'json'
3
+
4
+ module Jetpants
5
+
6
+ # A Jetpants::DB is a specific mysql instance running on a particular IP and port.
7
+ # It also contains a Jetpants::Host object corresponding to the IP; any missing
8
+ # method calls get delegated to the Host.
9
+ #
10
+ # This class has been split across several files due to its size. Please see
11
+ # lib/jetpants/db/*.rb for the bulk of its logic, which has been divided along
12
+ # functional lines.
13
+ class DB
14
+ include CallbackHandler
15
+
16
+ # IP address (as a string) of the MySQL instance
17
+ attr_reader :ip
18
+
19
+ # Port number of the MySQL instance. The base Jetpants implementation only supports
20
+ # port 3306, since this is necessary to crawl a replication hierarchy using SHOW
21
+ # PROCESSLIST, which does not include slave port numbers. However, plugins may
22
+ # override this behavior to support nonstandard ports and multi-instance-per-host
23
+ # topologies.
24
+ attr_reader :port
25
+
26
+ # Jetpants::Host object that this MySQL instance runs on.
27
+ attr_reader :host
28
+
29
+ # We keep track of DB instances to prevent DB.new from every returning
30
+ # duplicates.
31
+ @@all_dbs = {}
32
+ @@all_dbs_mutex = Mutex.new
33
+
34
+ # Because this class is rather large, methods have been grouped together
35
+ # and moved to separate files in lib/jetpants/db. We load these all now.
36
+ # They each just re-open the DB class and add some methods.
37
+ Dir[File.join File.dirname(__FILE__), 'db', '*'].each {|f| require f}
38
+
39
+ # We override DB.new so that attempting to create a duplicate DB object
40
+ # (that is, one with the same IP and port as an existing DB object)
41
+ # returns the original object.
42
+ def self.new(ip, port=3306)
43
+ ip, embedded_port = ip.split(':', 2)
44
+ port = embedded_port.to_i if embedded_port
45
+ addr = "#{ip}:#{port}"
46
+ @@all_dbs_mutex.synchronize do
47
+ @@all_dbs[addr] = nil unless @@all_dbs[addr].is_a? self
48
+ @@all_dbs[addr] ||= super
49
+ end
50
+ end
51
+
52
+ def initialize(ip, port=3306)
53
+ @ip, @port = ip, port.to_i
54
+ @user = false # connections will default to app user
55
+ @master = nil
56
+ @slaves = nil
57
+ @repl_paused = nil
58
+ @running = nil
59
+ @host = Host.new(ip)
60
+ end
61
+
62
+
63
+ ###### Host methods ########################################################
64
+
65
+ # Jetpants::DB delegates missing methods to its Jetpants::Host.
66
+ def method_missing(name, *args, &block)
67
+ if @host.respond_to? name
68
+ @host.send name, *args, &block
69
+ else
70
+ super
71
+ end
72
+ end
73
+
74
+ # Alters respond_to? logic to account for delegation of missing methods
75
+ # to the instance's Host.
76
+ def respond_to?(name, include_private=false)
77
+ super || @host.respond_to?(name)
78
+ end
79
+
80
+ # Returns true if the supplied Jetpants::DB is on the same Jetpants::Host
81
+ # as self.
82
+ def same_host_as?(db)
83
+ @ip == db.ip
84
+ end
85
+
86
+ ###### Misc methods ########################################################
87
+
88
+ # Displays the provided output, along with information about the current time,
89
+ # self, and optionally a Jetpants::Table name.
90
+ def output(str, table=nil)
91
+ str = str.to_s.strip
92
+ str = nil if str && str.length == 0
93
+ str ||= "Completed (no output)"
94
+ output = Time.now.strftime("%H:%M:%S") + " [#{self}] "
95
+ output << table.name << ': ' if table
96
+ output << str
97
+ print output + "\n"
98
+ output
99
+ end
100
+
101
+ # DB objects are sorted as strings, ie, by calling to_s
102
+ def <=> other
103
+ to_s <=> other.to_s
104
+ end
105
+
106
+ # Returns a string in the form "ip:port"
107
+ def to_s
108
+ "#{@ip}:#{@port}"
109
+ end
110
+
111
+ # Returns self, since self is already a Jetpants::DB.
112
+ def to_db
113
+ self
114
+ end
115
+
116
+ # Returns the instance's Jetpants::Host.
117
+ def to_host
118
+ @host
119
+ end
120
+
121
+ end
122
+ end
@@ -0,0 +1,103 @@
1
+ module Jetpants
2
+
3
+ #--
4
+ # Connection and query methods ###############################################
5
+ #++
6
+
7
+ class DB
8
+ # Runs the provided SQL statement as root, and returns the response as a single string.
9
+ # Available options:
10
+ # :terminator:: how to terminate the query, such as '\G' or ';'. (default: '\G')
11
+ # :parse:: parse a single-row, vertical-format result (:terminator must be '\G') and return it as a hash
12
+ # :attempts:: by default, queries will be attempted up to 3 times. set this to 0 or false for non-idempotent queries.
13
+ def mysql_root_cmd(cmd, options={})
14
+ terminator = options[:terminator] || '\G'
15
+ attempts = (options[:attempts].nil? ? 3 : (options[:attempts].to_i || 1))
16
+ failures = 0
17
+
18
+ begin
19
+ raise "MySQL is not running" unless running?
20
+ supply_root_pw = (Jetpants.mysql_root_password ? "-p#{Jetpants.mysql_root_password}" : '')
21
+ supply_port = (@port == 3306 ? '' : "-h 127.0.0.1 -P #{@port}")
22
+ real_cmd = %Q{mysql #{supply_root_pw} #{supply_port} -ss -e "#{cmd}#{terminator}" #{Jetpants.mysql_schema}}
23
+ real_cmd.untaint
24
+ result = ssh_cmd!(real_cmd)
25
+ raise result if result && result.downcase.start_with?('error ')
26
+ result = parse_vertical_result(result) if options[:parse] && terminator == '\G'
27
+ return result
28
+ rescue => ex
29
+ failures += 1
30
+ raise if failures >= attempts
31
+ output "Root query \"#{cmd}\" failed: #{ex.message}, re-trying after delay"
32
+ sleep 3 * failures
33
+ retry
34
+ end
35
+ end
36
+
37
+ # Returns a Sequel database object
38
+ def mysql
39
+ return @db if @db
40
+ @db = Sequel.connect(
41
+ :adapter => 'mysql2',
42
+ :host => @ip,
43
+ :port => @port,
44
+ :user => @user || Jetpants.app_credentials[:user],
45
+ :password => Jetpants.app_credentials[:pass],
46
+ :database => Jetpants.mysql_schema,
47
+ :max_connections => Jetpants.max_concurrency)
48
+ end
49
+ alias init_db_connection_pool mysql
50
+
51
+ # Closes existing mysql connection pool and opens a new one. Useful when changing users.
52
+ # Supply a new user name as the param, or nothing/false to keep old user name, or
53
+ # a literal true value to switch to the default app user in Jetpants configuration
54
+ def reconnect(new_user=false)
55
+ @user = (new_user == true ? Jetpants.app_credentials[:user] : new_user) if new_user
56
+ if @db
57
+ @db.disconnect rescue nil
58
+ @db = nil
59
+ end
60
+ init_db_connection_pool
61
+ end
62
+
63
+ # Execute a write (INSERT, UPDATE, DELETE, REPLACE, etc) query.
64
+ # If the query is an INSERT, returns the last insert ID (if an auto_increment
65
+ # column is involved). Otherwise returns the number of affected rows.
66
+ def query(sql, *binds)
67
+ ds = mysql.fetch(sql, *binds)
68
+ mysql.execute_dui(ds.update_sql) {|c| return c.last_id > 0 ? c.last_id : c.affected_rows}
69
+ end
70
+
71
+ # Execute a read (SELECT) query. Returns an array of hashes.
72
+ def query_return_array(sql, *binds)
73
+ mysql.fetch(sql, *binds).all
74
+ end
75
+
76
+ # Execute a read (SELECT) query. Returns a hash of the first row only.
77
+ def query_return_first(sql, *binds)
78
+ mysql.fetch(sql, *binds).first
79
+ end
80
+
81
+ # Execute a read (SELECT) query. Returns the value of the first column of the first row only.
82
+ def query_return_first_value(sql, *binds)
83
+ mysql.fetch(sql, *binds).single_value
84
+ end
85
+
86
+ # Parses the result of a MySQL query run with a \G terminator. Useful when
87
+ # interacting with MySQL via the command-line client (for secure access to
88
+ # the root user) instead of via the MySQL protocol.
89
+ def parse_vertical_result(text)
90
+ results = {}
91
+ return results unless text
92
+ raise text.chomp if text =~ /^ERROR/
93
+ lines = text.split("\n")
94
+ lines.each do |line|
95
+ col, val = line.split ':'
96
+ next unless val
97
+ results[col.strip.downcase.to_sym] = val.strip
98
+ end
99
+ results
100
+ end
101
+
102
+ end
103
+ end
@@ -0,0 +1,330 @@
1
+ module Jetpants
2
+
3
+ #--
4
+ # Import, export, and data set methods #######################################
5
+ #++
6
+
7
+ class DB
8
+ # Exports the DROP TABLE + CREATE TABLE statements for the given tables via mysqldump
9
+ def export_schemata(tables)
10
+ output 'Exporting table definitions'
11
+ supply_root_pw = (Jetpants.mysql_root_password ? "-p#{Jetpants.mysql_root_password}" : '')
12
+ supply_port = (@port == 3306 ? '' : "-h 127.0.0.1 -P #{@port}")
13
+ cmd = "mysqldump #{supply_root_pw} #{supply_port} -d #{Jetpants.mysql_schema} " + tables.join(' ') + " >#{Jetpants.export_location}/create_tables_#{@port}.sql"
14
+ cmd.untaint
15
+ result = ssh_cmd(cmd)
16
+ output result
17
+ end
18
+
19
+ # Executes a .sql file previously created via export_schemata.
20
+ # Warning: this will DESTROY AND RECREATE any tables contained in the file.
21
+ # DO NOT USE ON A DATABASE THAT CONTAINS REAL DATA!!! This method doesn't
22
+ # check first! The statements will replicate to any slaves! PROCEED WITH
23
+ # CAUTION IF RUNNING THIS MANUALLY!
24
+ def import_schemata!
25
+ output 'Dropping and re-creating table definitions'
26
+ result = mysql_root_cmd "source #{Jetpants.export_location}/create_tables_#{@port}.sql", :terminator => ''
27
+ output result
28
+ end
29
+
30
+ # Has no built-in effect. Plugins can override this and/or use before_alter_schemata
31
+ # and after_alter_schemata callbacks to provide an implementation.
32
+ # Also sometimes useful to override this as a singleton method on specific DB objects
33
+ # in a migration script.
34
+ def alter_schemata
35
+ end
36
+
37
+ # Exports data for the supplied tables. If min/max ID supplied, only exports
38
+ # data where at least one of the table's sharding keys falls within this range.
39
+ # Creates a 'jetpants' db user with FILE permissions for the duration of the
40
+ # export.
41
+ def export_data(tables, min_id=false, max_id=false)
42
+ pause_replication if @master && ! @repl_paused
43
+ import_export_user = 'jetpants'
44
+ create_user(import_export_user)
45
+ grant_privileges(import_export_user) # standard privs
46
+ grant_privileges(import_export_user, '*', 'FILE') # FILE global privs
47
+ reconnect(import_export_user)
48
+ @counts ||= {}
49
+ tables.each {|t| @counts[t.name] = export_table_data t, min_id, max_id}
50
+ ensure
51
+ reconnect(true) # switches back to default app user
52
+ drop_user(import_export_user)
53
+ end
54
+
55
+ # Exports data for a table. Only includes the data subset that falls
56
+ # within min_id and max_id. The export files will be located according
57
+ # to the export_location configuration setting.
58
+ # Returns the number of rows exported.
59
+ def export_table_data(table, min_id=false, max_id=false)
60
+ unless min_id && max_id && table.chunks > 0
61
+ output "Exporting all data", table
62
+ rows_exported = query(table.sql_export_all)
63
+ output "#{rows_exported} rows exported", table
64
+ return rows_exported
65
+ end
66
+
67
+ output "Exporting data for ID range #{min_id}..#{max_id}", table
68
+ lock = Mutex.new
69
+ rows_exported = 0
70
+ chunks_completed = 0
71
+
72
+ (min_id..max_id).in_chunks(table.chunks) do |min, max|
73
+ attempts = 0
74
+ begin
75
+ sql = table.sql_export_range(min, max)
76
+ result = query sql
77
+ lock.synchronize do
78
+ rows_exported += result
79
+ chunks_completed += 1
80
+ percent_finished = 100 * chunks_completed / table.chunks
81
+ output("Export #{percent_finished}% complete.", table) if table.chunks >= 40 && chunks_completed % 20 == 0
82
+ end
83
+ rescue => ex
84
+ if attempts >= 10
85
+ output "EXPORT ERROR: #{ex.message}, chunk #{min}-#{max}, giving up", table
86
+ raise
87
+ end
88
+ attempts += 1
89
+ output "EXPORT ERROR: #{ex.message}, chunk #{min}-#{max}, attempt #{attempts}, re-trying after delay", table
90
+ ssh_cmd("rm -f " + table.export_file_path(min, max))
91
+ sleep(1.0 * attempts)
92
+ retry
93
+ end
94
+ end
95
+ output "#{rows_exported} rows exported", table
96
+ rows_exported
97
+ end
98
+
99
+ # Imports data for a table that was previously exported using export_data.
100
+ # Only includes the data subset that falls within min_id and max_id. If
101
+ # run after export_data (in the same process), import_data will
102
+ # automatically confirm that the import counts match the previous export
103
+ # counts.
104
+ # Creates a 'jetpants' db user with FILE permissions for the duration of the
105
+ # import.
106
+ def import_data(tables, min_id=false, max_id=false)
107
+ import_export_user = 'jetpants'
108
+ create_user(import_export_user)
109
+ grant_privileges(import_export_user) # standard privs
110
+ grant_privileges(import_export_user, '*', 'FILE') # FILE global privs
111
+ reconnect(import_export_user)
112
+
113
+ import_counts = {}
114
+ tables.each {|t| import_counts[t.name] = import_table_data t, min_id, max_id}
115
+
116
+ # Verify counts
117
+ @counts ||= {}
118
+ @counts.each do |name, exported|
119
+ if exported == import_counts[name]
120
+ output "Verified import count matches export count for table #{name}"
121
+ else
122
+ raise "Import count (#{import_counts[name]}) does not match export count (#{exported}) for table #{name}"
123
+ end
124
+ end
125
+
126
+ ensure
127
+ reconnect(true) # switches back to default app user
128
+ drop_user(import_export_user)
129
+ end
130
+
131
+ # Imports the data subset previously dumped thorugh export_data.
132
+ # Returns number of rows imported.
133
+ def import_table_data(table, min_id=false, max_id=false)
134
+ unless min_id && max_id && table.chunks > 0
135
+ output "Importing all data", table
136
+ rows_imported = query(table.sql_import_all)
137
+ output "#{rows_imported} rows imported", table
138
+ return rows_imported
139
+ end
140
+
141
+ output "Importing data for ID range #{min_id}..#{max_id}", table
142
+ lock = Mutex.new
143
+ rows_imported = 0
144
+ chunks_completed = 0
145
+
146
+ (min_id..max_id).in_chunks(table.chunks) do |min, max|
147
+ attempts = 0
148
+ begin
149
+ sql = table.sql_import_range(min, max)
150
+ result = query sql
151
+ lock.synchronize do
152
+ rows_imported += result
153
+ chunks_completed += 1
154
+ percent_finished = 100 * chunks_completed / table.chunks
155
+ output("Import #{percent_finished}% complete.", table) if table.chunks >= 40 && chunks_completed % 20 == 0
156
+ chunk_file_name = table.export_file_path(min, max)
157
+ ssh_cmd "rm -f #{chunk_file_name}"
158
+ end
159
+ rescue => ex
160
+ if attempts >= 10
161
+ output "IMPORT ERROR: #{ex.message}, chunk #{min}-#{max}, giving up", table
162
+ raise
163
+ end
164
+ attempts += 1
165
+ output "IMPORT ERROR: #{ex.message}, chunk #{min}-#{max}, attempt #{attempts}, re-trying after delay", table
166
+ sleep(3.0 * attempts)
167
+ retry
168
+ end
169
+ end
170
+ output "#{rows_imported} rows imported", table
171
+ rows_imported
172
+ end
173
+
174
+ # Counts rows falling between min_id and max_id for the supplied tables.
175
+ # Returns a hash mapping table names to counts.
176
+ # Note: runs 10 concurrent queries to perform the count quickly. This is
177
+ # MUCH faster than doing a single count, but far more I/O intensive, so
178
+ # don't use this on a master or active slave.
179
+ def row_counts(tables, min_id, max_id)
180
+ lock = Mutex.new
181
+ row_count = {}
182
+ tables.each do |t|
183
+ row_count[t.name] = 0
184
+ (min_id..max_id).in_chunks(t.chunks, 10) do |min, max|
185
+ result = query_return_first_value(t.sql_count_rows(min, max))
186
+ lock.synchronize {row_count[t.name] += result}
187
+ end
188
+ output "#{row_count[t.name]} rows counted", t
189
+ end
190
+ row_count
191
+ end
192
+
193
+ # Cleans up all rows that should no longer be on this db.
194
+ # Supply the ID range (in terms of the table's sharding key)
195
+ # of rows to KEEP.
196
+ def prune_data_to_range(tables, keep_min_id, keep_max_id)
197
+ reconnect(true)
198
+ tables.each do |t|
199
+ output "Cleaning up data, pruning to only keep range #{keep_min_id}-#{keep_max_id}", t
200
+ rows_deleted = 0
201
+ [:asc, :desc].each {|direction| rows_deleted += delete_table_data_outside_range(t, keep_min_id, keep_max_id, direction)}
202
+ output "Done cleanup; #{rows_deleted} rows deleted", t
203
+ end
204
+ end
205
+
206
+ # Helper method used by prune_data_to_range. Deletes data for the given table that falls
207
+ # either below the supplied keep_min_id (if direction is :desc) or falls above the
208
+ # supplied keep_max_id (if direction is :asc).
209
+ def delete_table_data_outside_range(table, keep_min_id, keep_max_id, direction)
210
+ rows_deleted = 0
211
+
212
+ if direction == :asc
213
+ dir_english = "Ascending"
214
+ boundary = keep_max_id
215
+ output "Removing rows with ID > #{boundary}", table
216
+ elsif direction == :desc
217
+ dir_english = "Descending"
218
+ boundary = keep_min_id
219
+ output "Removing rows with ID < #{boundary}", table
220
+ else
221
+ raise "Unknown order parameter #{order}"
222
+ end
223
+
224
+ table.sharding_keys.each do |col|
225
+ deleter_sql = table.sql_cleanup_delete(col, keep_min_id, keep_max_id)
226
+
227
+ id = boundary
228
+ iter = 0
229
+ while id do
230
+ finder_sql = table.sql_cleanup_next_id(col, id, direction)
231
+ id = query_return_first_value(finder_sql)
232
+ break unless id
233
+ rows_deleted += query(deleter_sql, id)
234
+ iter += 1
235
+ output("#{dir_english} deletion progress: through #{col} #{id}, deleted #{rows_deleted} rows so far", table) if iter % 50000 == 0
236
+ end
237
+ end
238
+ rows_deleted
239
+ end
240
+
241
+ # Exports and re-imports data for the specified tables, optionally bounded by the
242
+ # given range. Useful for defragmenting a node. Also useful for doing fast schema
243
+ # alterations, if alter_schemata (or its callbacks) has been implemented.
244
+ #
245
+ # You can omit all params for a shard, in which case the method will use the list
246
+ # of sharded tables in the Jetpants config file, and will use the shard's min and
247
+ # max ID.
248
+ def rebuild!(tables=false, min_id=false, max_id=false)
249
+ raise "Cannot rebuild an active node" unless is_standby? || for_backups?
250
+
251
+ p = pool
252
+ if p.is_a?(Shard)
253
+ tables ||= Table.from_config 'sharded_tables'
254
+ min_id ||= p.min_id
255
+ max_id ||= p.max_id if p.max_id != 'INFINITY'
256
+ end
257
+ raise "No tables supplied" unless tables && tables.count > 0
258
+
259
+ disable_monitoring
260
+ stop_query_killer
261
+ disable_binary_logging
262
+ restart_mysql
263
+ reconnect
264
+ pause_replication if is_slave?
265
+
266
+ # Automatically detect missing min/max. Assumes that all tables' primary keys
267
+ # are on the same scale, so this may be non-ideal, but better than just erroring.
268
+ unless min_id
269
+ tables.each do |t|
270
+ my_min = query_return_first_value "SELECT MIN(#{t.sharding_keys[0]}) FROM #{t.name}"
271
+ min_id = my_min if !min_id || my_min < min_id
272
+ end
273
+ end
274
+ unless max_id
275
+ @found_max_ids = {} # we store the detected maxes in case DB#alter_schemata needs them later
276
+ tables.each do |t|
277
+ my_max = @found_max_ids[t.name] = query_return_first_value("SELECT MAX(#{t.sharding_keys[0]}) FROM #{t.name}")
278
+ max_id = my_max if !max_id || my_max > max_id
279
+ end
280
+ end
281
+
282
+ export_schemata tables
283
+ export_data tables, min_id, max_id
284
+ import_schemata!
285
+ alter_schemata if respond_to? :alter_schemata
286
+ import_data tables, min_id, max_id
287
+
288
+ resume_replication if is_slave?
289
+ enable_binary_logging
290
+ restart_mysql
291
+ catch_up_to_master
292
+ start_query_killer
293
+ enable_monitoring
294
+ end
295
+
296
+ # Returns the data set size in bytes (if in_gb is false or omitted) or in gigabytes
297
+ # (if in_gb is true). Note that this is actually in gibibytes (2^30) rather than
298
+ # a metric gigabyte. This puts it on the same scale as the output to tools like
299
+ # "du -h" and "df -h".
300
+ def data_set_size(in_gb=false)
301
+ bytes = dir_size("#{mysql_directory}/#{Jetpants.mysql_schema}")
302
+ in_gb ? (bytes / 1073741824.0).round : bytes
303
+ end
304
+
305
+ # Copies mysql db files from self to one or more additional DBs.
306
+ # WARNING: temporarily shuts down mysql on self, and WILL OVERWRITE CONTENTS
307
+ # OF MYSQL DIRECTORY ON TARGETS. Confirms first that none of the targets
308
+ # have over 100MB of data in the schema directory or in ibdata1.
309
+ # MySQL is restarted on source and targets afterwards.
310
+ def clone_to!(*targets)
311
+ targets.flatten!
312
+ raise "Cannot clone an instance onto its master" if master && targets.include?(master)
313
+ destinations = {}
314
+ targets.each do |t|
315
+ destinations[t] = t.mysql_directory
316
+ existing_size = t.data_set_size + t.dir_size("#{t.mysql_directory}/ibdata1")
317
+ raise "Over 100 MB of existing MySQL data on target #{t}, aborting copy!" if existing_size > 100000000
318
+ end
319
+ [self, targets].flatten.concurrent_each {|t| t.stop_query_killer; t.stop_mysql}
320
+ targets.concurrent_each {|t| t.ssh_cmd "rm -rf #{t.mysql_directory}/ib_logfile*"}
321
+ fast_copy_chain(mysql_directory,
322
+ destinations,
323
+ port: 3306,
324
+ files: ['ibdata1', 'mysql', 'test', Jetpants.mysql_schema],
325
+ overwrite: true)
326
+ [self, targets].flatten.concurrent_each {|t| t.start_mysql; t.start_query_killer}
327
+ end
328
+
329
+ end
330
+ end