jetpants 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,272 @@
1
+ require 'json'
2
+ require 'db'
3
+
4
+ module Jetpants
5
+
6
+ # a Pool represents a group of database instances (Jetpants::DB objects).
7
+ #
8
+ # The default implementation assumes that a Pool contains:
9
+ # * 1 master
10
+ # * 0 or more slaves, falling into one of these categories:
11
+ # * active slaves (actively taking production read queries)
12
+ # * standby slaves (for HA, promotable if a master or active slave fails + used to clone new replacements)
13
+ # * backup slaves (dedicated for backups and background jobs, never put into prod, potentially different hardware spec)
14
+ #
15
+ # Plugins may of course override this extensively, to support different
16
+ # topologies, such as master-master trees.
17
+ #
18
+ # Many of these methods are only useful in conjunction with an asset-tracker /
19
+ # configuration-generator plugin
20
+ class Pool
21
+ include CallbackHandler
22
+
23
+ # human-readable String name of pool
24
+ attr_reader :name
25
+
26
+ # Jetpants::DB object that is the pool's master
27
+ attr_reader :master
28
+
29
+ # Array of strings containing other equivalent names for this pool
30
+ attr_reader :aliases
31
+
32
+ # Can be used to store a name that refers to just the active_slaves, for
33
+ # instance if your framework isn't smart enough to know about master/slave
34
+ # relationships. Safe to leave as nil otherwise. Has no effect in Jetpants,
35
+ # but an asset tracker / config generator plugin may include this in the
36
+ # generated config file.
37
+ attr_accessor :slave_name
38
+
39
+ # Hash mapping DB object => weight, for active (read) slaves. Default weight
40
+ # is 100. Safe to leave at default if your app framework doesn't support
41
+ # different weights for individual read slaves. Weights have no effect inside
42
+ # Jetpants, but any asset tracker / config generator plugin can carry them
43
+ # through to the config file.
44
+ attr_reader :active_slave_weights
45
+
46
+ # If the master also receives read queries, this stores its weight. Set to 0
47
+ # if the master does not receive read queries (which is the default). This
48
+ # has no effect inside of Jetpants, but can be used by an asset tracker /
49
+ # config generator plugin to carry the value through to the config file.
50
+ attr_accessor :master_read_weight
51
+
52
+ def initialize(name, master)
53
+ @name = name
54
+ @slave_name = false
55
+ @aliases = []
56
+ @master = master.to_db
57
+ @master_read_weight = 0
58
+ @active_slave_weights = {}
59
+ end
60
+
61
+ # Returns all slaves, or pass in :active, :standby, or :backup to receive slaves
62
+ # just of a particular type
63
+ def slaves(type=false)
64
+ case type
65
+ when :active then active_slaves
66
+ when :standby then standby_slaves
67
+ when :backup then backup_slaves
68
+ when false then @master.slaves
69
+ else []
70
+ end
71
+ end
72
+
73
+ # Returns an array of Jetpants::DB objects.
74
+ # Active slaves are ones that receive read queries from your application.
75
+ def active_slaves
76
+ @master.slaves.select {|sl| @active_slave_weights[sl]}
77
+ end
78
+
79
+ # Returns an array of Jetpants::DB objects.
80
+ # Standby slaves do not receive queries from your application. These are for high availability.
81
+ # They can be turned into active slaves or even the master, and can also be used for cloning
82
+ # additional slaves.
83
+ def standby_slaves
84
+ @master.slaves.reject {|sl| @active_slave_weights[sl] || sl.for_backups?}
85
+ end
86
+
87
+ # Returns an array of Jetpants::DB objects.
88
+ # Backup slaves are never promoted to active or master. They are for dedicated backup purposes.
89
+ # They may be a different/cheaper hardware spec than other slaves.
90
+ def backup_slaves
91
+ @master.slaves.reject {|sl| @active_slave_weights[sl] || !sl.for_backups?}
92
+ end
93
+
94
+ # returns a flat array of all Jetpants::DB objects in the pool: the master and
95
+ # all slaves of all types.
96
+ def nodes
97
+ [master, slaves].flatten
98
+ end
99
+
100
+ # Informs Jetpants that slave_db is an active slave. Potentially used by
101
+ # plugins, such as in Topology at start-up time.
102
+ def has_active_slave(slave_db, weight=100)
103
+ slave_db = slave_db.to_db
104
+ raise "Attempt to mark a DB as its own active slave" if slave_db == @master
105
+ @active_slave_weights[slave_db] = weight
106
+ end
107
+
108
+ # Turns a standby slave into an active slave, giving it the specified read weight.
109
+ # Syncs the pool's configuration afterwards. It's up to your asset tracker plugin to
110
+ # actually do something with this information.
111
+ def mark_slave_active(slave_db, weight=100)
112
+ raise "Attempt to make a backup slave be an active slave" if slave_db.for_backups?
113
+ has_active_slave slave_db, weight
114
+ sync_configuration
115
+ end
116
+
117
+ # Turns an active slave into a standby slave. Syncs the pool's configuration afterwards.
118
+ # It's up to your asset tracker plugin to actually do something with this information.
119
+ def mark_slave_standby(slave_db)
120
+ slave_db = slave_db.to_db
121
+ raise "Cannot call mark_slave_standby on a master" if slave_db == @master
122
+ @active_slave_weights.delete(slave_db)
123
+ sync_configuration
124
+ end
125
+
126
+ # Remove a slave from a pool entirely. This is destructive, ie, it does a
127
+ # RESET SLAVE on the db.
128
+ # Note that a plugin may want to override this (or implement after_remove_slave!)
129
+ # to actually sync the change to an asset tracker, depending on how the plugin
130
+ # implements Pool#sync_configuration. (If the implementation makes sync_configuration
131
+ # work by iterating over the pool's current slaves, it won't see any slaves that have
132
+ # been removed.)
133
+ def remove_slave!(slave_db)
134
+ raise "Slave is not in this pool" unless slave_db.pool == self
135
+ slave_db.disable_monitoring
136
+ slave_db.stop_replication
137
+ slave_db.repl_binlog_coordinates # displays how far we replicated, in case you need to roll back this change manually
138
+ slave_db.disable_replication!
139
+ end
140
+
141
+ # Informs this pool that it has an alias. A pool may have any number of aliases.
142
+ def add_alias(name)
143
+ if @aliases.include? name
144
+ false
145
+ else
146
+ @aliases << name
147
+ true
148
+ end
149
+ end
150
+
151
+ # Displays a summary of the pool's members. This outputs immediately instead
152
+ # of returning a string, so that you can invoke something like:
153
+ # Jetpants.topology.pools.each &:summary
154
+ # to easily display a summary.
155
+ def summary
156
+ probe
157
+ if @aliases.count > 0
158
+ alias_text = ' (aliases: ' + @aliases.join(', ') + ')'
159
+ end
160
+ print "#{name}#{alias_text} [#{master.data_set_size(true)}GB]\n"
161
+ print "\tmaster = %-13s #{master.hostname}\n" % @master.ip
162
+ [:active, :standby, :backup].each do |type|
163
+ slave_list = slaves(type)
164
+ slave_list.each_with_index do |s, i|
165
+ print "\t%-7s slave #{i + 1} = %-13s #{s.hostname}\n" % [type, s.ip]
166
+ end
167
+ end
168
+ true
169
+ end
170
+
171
+ # Performs the last steps of the master promotion process. Do not use this
172
+ # as a stand-alone method; there's other necessary logic, such as setting
173
+ # the old master to read-only mode, and doing a STOP SLAVE on all slaves.
174
+ # Use the "jetpants promotion" task instead to do an interactive promotion.
175
+ # (In a future release, this will be refactored to be fully scriptable.)
176
+ def master_promotion!(promoted)
177
+ demoted = @master
178
+ raise "Promoted host is not in the right pool!" unless @master.slaves.include? promoted
179
+ user, password = promoted.replication_credentials.values
180
+ log, position = promoted.binlog_coordinates
181
+
182
+ # reset slave on promoted
183
+ if demoted.available?
184
+ promoted.disable_replication!
185
+ else
186
+ promoted.mysql_root_cmd "STOP SLAVE; RESET SLAVE"
187
+ end
188
+
189
+ # gather our new replicas
190
+ replicas = demoted.slaves.select {|replica| replica != promoted}
191
+ replicas << demoted if demoted.available?
192
+ replicas.flatten!
193
+
194
+ # perform promotion
195
+ replicas.each do |replica|
196
+ replica.change_master_to promoted,
197
+ :user => user,
198
+ :password => password,
199
+ :log_file => log,
200
+ :log_pos => position
201
+ end
202
+
203
+ # ensure our replicas are configured correctly by comparing our staged values to current values of replicas
204
+ promoted_replication_config = {
205
+ master_host: promoted.ip,
206
+ master_user: user,
207
+ master_log_file: log,
208
+ exec_master_log_pos: position.to_s
209
+ }
210
+ replicas.each do |r|
211
+ promoted_replication_config.each do |option, value|
212
+ raise "Unexpected slave status value for #{option} in replica #{r} after promotion" unless r.slave_status[option] == value
213
+ end
214
+ r.resume_replication unless r.replicating?
215
+ end
216
+
217
+ # Update the pool
218
+ # Note: if the demoted machine is offline, plugin may need to implement an
219
+ # after_master_promotion! method which handles this case in configuration tracker
220
+ @active_slave_weights.delete promoted # if promoting an active slave, remove it from read pool
221
+ @master = promoted
222
+ sync_configuration
223
+ Jetpants.topology.write_config
224
+
225
+ replicas.all? {|r| r.replicating?}
226
+ end
227
+
228
+ # Informs your asset tracker about any changes in the pool's state or members.
229
+ # Plugins should override this, or use before_sync_configuration / after_sync_configuration
230
+ # callbacks, to provide an implementation of this method.
231
+ def sync_configuration
232
+ end
233
+
234
+ # Callback to ensure that a sync'ed pool is already in Topology.pools
235
+ def before_sync_configuration
236
+ unless Jetpants.topology.pools.include? self
237
+ Jetpants.topology.pools << self
238
+ end
239
+ end
240
+
241
+ # Returns the name of the pool.
242
+ def to_s
243
+ @name
244
+ end
245
+
246
+ # Displays the provided output, along with information about the current time,
247
+ # and self (the name of this Pool)
248
+ def output(str)
249
+ str = str.to_s.strip
250
+ str = nil if str && str.length == 0
251
+ str ||= "Completed (no output)"
252
+ output = Time.now.strftime("%H:%M:%S") + " [#{self}] "
253
+ output << str
254
+ print output + "\n"
255
+ output
256
+ end
257
+
258
+ # Jetpants::Pool proxies missing methods to the pool's @master Jetpants::DB instance.
259
+ def method_missing(name, *args, &block)
260
+ if @master.respond_to? name
261
+ @master.send name, *args, &block
262
+ else
263
+ super
264
+ end
265
+ end
266
+
267
+ def respond_to?(name, include_private=false)
268
+ super || @master.respond_to?(name)
269
+ end
270
+
271
+ end
272
+ end
@@ -0,0 +1,311 @@
1
+ require 'json'
2
+ require 'db'
3
+ require 'table'
4
+ require 'pool'
5
+
6
+
7
+ module Jetpants
8
+
9
+ # a Shard in Jetpants is a range-based Pool. All Shards have the exact same
10
+ # set of tables, just they only contain data that falls within within their
11
+ # range.
12
+ class Shard < Pool
13
+ include CallbackHandler
14
+
15
+ # min ID for this shard
16
+ attr_reader :min_id
17
+
18
+ # max ID for this shard, or string "INFINITY"
19
+ attr_reader :max_id
20
+
21
+ # if this shard is being split, this is an array of "child" Shard objects.
22
+ attr_reader :children
23
+
24
+ # if this shard is a child of one being split, this links back to the parent Shard.
25
+ attr_accessor :parent
26
+
27
+ # A symbol representing the shard's state. Possible state values include:
28
+ # :ready -- Normal shard, online / in production, optimal codition, no current operation/maintenance.
29
+ # :read_only -- Online / in production but not currently writable due to maintenance or emergency.
30
+ # :offline -- In production but not current readable or writable due to maintenance or emergency.
31
+ # :initializing -- New child shard, being created, not in production.
32
+ # :exporting -- Child shard that is exporting its portion of the data set. Shard not in production yet.
33
+ # :importing -- Child shard that is importing its portion of the data set. Shard not in production yet.
34
+ # :replicating -- Child shard that is being cloned to new replicas. Shard not in production yet.
35
+ # :child -- Child shard that is in production for reads, but still slaving from its parent for writes.
36
+ # :needs_cleanup -- Child shard that is fully in production, but parent replication not torn down yet, and redundant data (from wrong range) not removed yet
37
+ # :deprecated -- Parent shard that has been split but children are still in :child or :needs_cleanup state. Shard may still be in production for writes.
38
+ # :recycle -- Parent shard that has been split and children are now in the :ready state. Shard no longer in production.
39
+ attr_accessor :state
40
+
41
+ # Constructor for Shard --
42
+ # * min_id: int
43
+ # * max_id: int or the string "INFINITY"
44
+ # * master: string (IP address) or a Jetpants::DB object
45
+ # * state: one of the above state symbols
46
+ def initialize(min_id, max_id, master, state=:ready)
47
+ @min_id = min_id
48
+ @max_id = max_id
49
+ @state = state
50
+
51
+ @children = [] # array of shards being initialized by splitting this one
52
+ @parent = nil
53
+
54
+ super(generate_name, master)
55
+ end
56
+
57
+ # Generates a string containing the shard's min and max IDs. Plugin may want to override.
58
+ def generate_name
59
+ "shard-#{min_id}-#{max_id.to_s.downcase}"
60
+ end
61
+
62
+ # Returns true if the shard state is one of the values that indicates it's
63
+ # a live / in-production shard. These states include :ready, :child,
64
+ # :needs_cleanup, :read_only, and :offline.
65
+ def in_config?
66
+ [:ready, :child, :needs_cleanup, :read_only, :offline].include? @state
67
+ end
68
+
69
+ # In default Jetpants, we assume each Shard has 1 master and N standby slaves;
70
+ # we never have active (read) slaves for shards. So calling mark_slave_active
71
+ # on a Shard generates an exception. Plugins may override this behavior, which
72
+ # may be necessary for sites spanning two or more active data centers.
73
+ def mark_slave_active(slave_db, weight=100)
74
+ raise "Shards do not support active slaves"
75
+ end
76
+
77
+ # Returns an empty array, because we assume that shard pools have no active
78
+ # slaves. (If your read volume would require active slaves, think about
79
+ # splitting your shard instead...)
80
+ #
81
+ # Plugins may of course override this behavior.
82
+ def active_slaves
83
+ []
84
+ end
85
+
86
+ # Returns the master's standby slaves, ignoring any child shards since they
87
+ # are a special case of slaves.
88
+ def standby_slaves
89
+ result = super
90
+ if @children.count > 0
91
+ is_child_master = {}
92
+ @children.each {|c| is_child_master[c.master] = true}
93
+ result.reject {|sl| is_child_master[sl]}
94
+ else
95
+ result
96
+ end
97
+ end
98
+
99
+ # Returns the Jetpants::DB object corresponding to the requested access
100
+ # mode (either :read or :write). Ordinarily this will be the shard's
101
+ # @master, unless this shard is still a child, in which case we send
102
+ # writes the the shard's parent's master instead.
103
+ def db(mode=:read)
104
+ (mode.to_sym == :write && @parent ? @parent.master : master)
105
+ end
106
+
107
+ # Adds a Jetpants::Shard to this shard's array of children, and sets
108
+ # the child's parent to be self.
109
+ def add_child(shard)
110
+ raise "Shard #{shard} already has a parent!" if shard.parent
111
+ @children << shard
112
+ shard.parent = self
113
+ end
114
+
115
+ # Removes a Jetpants::Shard from this shard's array of children, and sets
116
+ # the child's parent to nil.
117
+ def remove_child(shard)
118
+ raise "Shard #{shard} isn't a child of this shard!" unless shard.parent == self
119
+ @children.delete shard
120
+ shard.parent = nil
121
+ end
122
+
123
+ # Creates and returns <count> child shards, pulling boxes for masters from spare list.
124
+ # You can optionally supply the ID ranges to use: pass in an array of arrays,
125
+ # where the outer array is of size <count> and each inner array is [min_id, max_id].
126
+ # If you omit id_ranges, the parent's ID range will be divided evenly amongst the
127
+ # children automatically.
128
+ def init_children(count, id_ranges=false)
129
+ # Make sure we have enough machines in spare pool
130
+ raise "Not enough master role machines in spare pool!" if count > Jetpants.topology.count_spares(role: 'master')
131
+ raise "Not enough standby_slave role machines in spare pool!" if count * Jetpants.standby_slaves_per_pool > Jetpants.topology.count_spares(role: 'standby_slave')
132
+
133
+ # Make sure enough slaves of shard being split
134
+ raise "Must have at least #{Jetpants.standby_slaves_per_pool} slaves of shard being split" if master.slaves.count < Jetpants.standby_slaves_per_pool
135
+
136
+ # Make sure right number of id_ranges were supplied, if any were
137
+ raise "Wrong number of id_ranges supplied" if id_ranges && id_ranges.count != count
138
+
139
+ unless id_ranges
140
+ id_ranges = []
141
+ ids_total = 1 + @max_id - @min_id
142
+ current_min_id = @min_id
143
+ count.times do |i|
144
+ ids_this_pool = (ids_total / count).floor
145
+ ids_this_pool += 1 if i < (ids_total % count)
146
+ id_ranges << [current_min_id, current_min_id + ids_this_pool - 1]
147
+ current_min_id += ids_this_pool
148
+ end
149
+ end
150
+
151
+ count.times do |i|
152
+ spare = Jetpants.topology.claim_spare(role: 'master')
153
+ spare.output "Using ID range of #{id_ranges[i][0]} to #{id_ranges[i][1]} (inclusive)"
154
+ s = Shard.new(id_ranges[i][0], id_ranges[i][1], spare, :initializing)
155
+ add_child(s)
156
+ Jetpants.topology.pools << s
157
+ s.sync_configuration
158
+ end
159
+
160
+ @children
161
+ end
162
+
163
+ # Splits a shard into <pieces> child shards. The children will still be slaving
164
+ # from the parent after this point; you need to do additional things to fully
165
+ # complete the shard split. See the command suite tasks shard_split_move_reads,
166
+ # shard_split_move_writes, and shard_split_cleanup.
167
+ def split!(pieces=2)
168
+ raise "Cannot split a shard that is still a child!" if @parent
169
+
170
+ init_children(pieces) unless @children.count > 0
171
+
172
+ @children.concurrent_each {|c| c.stop_query_killer; c.disable_binary_logging}
173
+ clone_to_children!
174
+ @children.concurrent_each {|c| c.rebuild!}
175
+ @children.each {|c| c.sync_configuration}
176
+
177
+ @state = :deprecated
178
+ sync_configuration
179
+ output "Initial split complete."
180
+ end
181
+
182
+ # Transitions the shard's children into the :needs_cleanup state. It is the
183
+ # responsibility of an asset tracker plugin / config generator to implement
184
+ # config generation in a way that actually makes writes go to shards
185
+ # in the :needs_cleanup state.
186
+ def move_writes_to_children
187
+ @children.each do |c|
188
+ c.state = :needs_cleanup
189
+ c.sync_configuration
190
+ end
191
+ end
192
+
193
+ # Clones the current shard to its children. Uses a standby slave of self as
194
+ # the source for copying.
195
+ def clone_to_children!
196
+ # Figure out which slave(s) we can use for populating the new masters
197
+ sources = standby_slaves.dup
198
+ sources.shift
199
+ raise "Need to have at least 1 slave in order to create additional slaves" if sources.length < 1
200
+
201
+ # Figure out which machines we need to turn into slaves
202
+ targets = []
203
+ @children.each do |child_shard|
204
+ if child_shard.master.is_slave? && child_shard.master.master != @master
205
+ raise "Child shard master #{child_shard.master} is already a slave of another pool"
206
+ elsif child_shard.master.is_slave?
207
+ child_shard.output "Already slaving from parent shard master"
208
+ child_shard.restart_mysql # to make previous disable of binary logging take effect
209
+ else
210
+ targets << child_shard.master
211
+ end
212
+ end
213
+
214
+ while targets.count > 0 do
215
+ chain_length = (targets.count.to_f / sources.count.to_f).ceil
216
+ chain_length = 3 if chain_length > 3 # For sanity's sake, we only allow a copy pipeline that populates 3 instances at once.
217
+ sources.concurrent_each_with_index do |src, idx|
218
+ my_targets = targets[idx * chain_length, chain_length]
219
+ src.enslave_siblings! my_targets
220
+ chain_length.times {|n| targets[(idx * chain_length) + n] = nil}
221
+ end
222
+ targets.compact!
223
+ end
224
+ end
225
+
226
+ # Exports data that should stay on this shard, drops and re-creates tables,
227
+ # re-imports the data, and then adds slaves to the shard pool as needed.
228
+ # The optional stage param lets you skip some steps, but this is only really
229
+ # useful if you're running this manually and it failed part-way.
230
+ def rebuild!(stage=0)
231
+ # Sanity check
232
+ raise "Cannot rebuild a shard that isn't still slaving from another shard" unless @master.is_slave?
233
+ raise "Cannot rebuild an active shard" if in_config?
234
+
235
+ tables = Table.from_config 'sharded_tables'
236
+
237
+ if stage <= 1
238
+ raise "Shard is not in the expected initializing or exporting states" unless [:initializing, :exporting].include? @state
239
+ @state = :exporting
240
+ sync_configuration
241
+ export_schemata tables
242
+ export_data tables, @min_id, @max_id
243
+ end
244
+
245
+ if stage <= 2
246
+ raise "Shard is not in the expected exporting or importing states" unless [:exporting, :importing].include? @state
247
+ @state = :importing
248
+ sync_configuration
249
+ import_schemata!
250
+ alter_schemata if respond_to? :alter_schemata
251
+ import_data tables, @min_id, @max_id
252
+ start_query_killer
253
+ end
254
+
255
+ if stage <= 3
256
+ raise "Shard is not in the expected importing or replicating states" unless [:importing, :replicating].include? @state
257
+ enable_binary_logging
258
+ restart_mysql
259
+ @state = :replicating
260
+ sync_configuration
261
+ my_slaves = Jetpants.topology.claim_spares(Jetpants.standby_slaves_per_pool, role: 'standby_slave')
262
+ enslave!(my_slaves)
263
+ my_slaves.each {|slv| slv.resume_replication}
264
+ [self, my_slaves].flatten.each {|db| db.catch_up_to_master}
265
+ end
266
+
267
+ @state = :child
268
+ end
269
+
270
+ # Run this on a parent shard after the rest of a shard split is complete.
271
+ # Sets this shard's master to read-only; removes the application user from
272
+ # self (without replicating this change to children); disables replication
273
+ # between the parent and the children; and then removes rows from the
274
+ # children that replicated to the wrong shard.
275
+ def cleanup!
276
+ raise "Can only run cleanup! on a parent shard in the deprecated state" unless @state == :deprecated
277
+ raise "Cannot call cleanup! on a child shard" if @parent
278
+
279
+ tables = Table.from_config 'sharded_tables'
280
+ @master.revoke_all_access!
281
+ @children.concurrent_each do |child_shard|
282
+ raise "Child state does not indicate cleanup is needed" unless child_shard.state == :needs_cleanup
283
+ raise "Child shard master should be a slave in order to clean up" unless child_shard.is_slave?
284
+ child_shard.master.disable_replication! # stop slaving from parent
285
+ child_shard.prune_data_to_range tables, child_shard.min_id, child_shard.max_id
286
+ end
287
+
288
+ # We have to iterate over a copy of the @children array, rather than the array
289
+ # directly, since Array#each skips elements when you remove elements in-place,
290
+ # which Shard#remove_child does...
291
+ @children.dup.each do |child_shard|
292
+ child_shard.state = :ready
293
+ remove_child child_shard
294
+ child_shard.sync_configuration
295
+ end
296
+ @state = :recycle
297
+ sync_configuration
298
+ end
299
+
300
+ # Displays information about the shard
301
+ def summary(with_children=true)
302
+ super()
303
+ if with_children
304
+ children.each {|c| c.summary}
305
+ end
306
+ true
307
+ end
308
+
309
+ end
310
+ end
311
+