jetpants 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,272 @@
1
+ require 'json'
2
+ require 'db'
3
+
4
+ module Jetpants
5
+
6
+ # a Pool represents a group of database instances (Jetpants::DB objects).
7
+ #
8
+ # The default implementation assumes that a Pool contains:
9
+ # * 1 master
10
+ # * 0 or more slaves, falling into one of these categories:
11
+ # * active slaves (actively taking production read queries)
12
+ # * standby slaves (for HA, promotable if a master or active slave fails + used to clone new replacements)
13
+ # * backup slaves (dedicated for backups and background jobs, never put into prod, potentially different hardware spec)
14
+ #
15
+ # Plugins may of course override this extensively, to support different
16
+ # topologies, such as master-master trees.
17
+ #
18
+ # Many of these methods are only useful in conjunction with an asset-tracker /
19
+ # configuration-generator plugin
20
+ class Pool
21
+ include CallbackHandler
22
+
23
+ # human-readable String name of pool
24
+ attr_reader :name
25
+
26
+ # Jetpants::DB object that is the pool's master
27
+ attr_reader :master
28
+
29
+ # Array of strings containing other equivalent names for this pool
30
+ attr_reader :aliases
31
+
32
+ # Can be used to store a name that refers to just the active_slaves, for
33
+ # instance if your framework isn't smart enough to know about master/slave
34
+ # relationships. Safe to leave as nil otherwise. Has no effect in Jetpants,
35
+ # but an asset tracker / config generator plugin may include this in the
36
+ # generated config file.
37
+ attr_accessor :slave_name
38
+
39
+ # Hash mapping DB object => weight, for active (read) slaves. Default weight
40
+ # is 100. Safe to leave at default if your app framework doesn't support
41
+ # different weights for individual read slaves. Weights have no effect inside
42
+ # Jetpants, but any asset tracker / config generator plugin can carry them
43
+ # through to the config file.
44
+ attr_reader :active_slave_weights
45
+
46
+ # If the master also receives read queries, this stores its weight. Set to 0
47
+ # if the master does not receive read queries (which is the default). This
48
+ # has no effect inside of Jetpants, but can be used by an asset tracker /
49
+ # config generator plugin to carry the value through to the config file.
50
+ attr_accessor :master_read_weight
51
+
52
+ def initialize(name, master)
53
+ @name = name
54
+ @slave_name = false
55
+ @aliases = []
56
+ @master = master.to_db
57
+ @master_read_weight = 0
58
+ @active_slave_weights = {}
59
+ end
60
+
61
+ # Returns all slaves, or pass in :active, :standby, or :backup to receive slaves
62
+ # just of a particular type
63
+ def slaves(type=false)
64
+ case type
65
+ when :active then active_slaves
66
+ when :standby then standby_slaves
67
+ when :backup then backup_slaves
68
+ when false then @master.slaves
69
+ else []
70
+ end
71
+ end
72
+
73
+ # Returns an array of Jetpants::DB objects.
74
+ # Active slaves are ones that receive read queries from your application.
75
+ def active_slaves
76
+ @master.slaves.select {|sl| @active_slave_weights[sl]}
77
+ end
78
+
79
+ # Returns an array of Jetpants::DB objects.
80
+ # Standby slaves do not receive queries from your application. These are for high availability.
81
+ # They can be turned into active slaves or even the master, and can also be used for cloning
82
+ # additional slaves.
83
+ def standby_slaves
84
+ @master.slaves.reject {|sl| @active_slave_weights[sl] || sl.for_backups?}
85
+ end
86
+
87
+ # Returns an array of Jetpants::DB objects.
88
+ # Backup slaves are never promoted to active or master. They are for dedicated backup purposes.
89
+ # They may be a different/cheaper hardware spec than other slaves.
90
+ def backup_slaves
91
+ @master.slaves.reject {|sl| @active_slave_weights[sl] || !sl.for_backups?}
92
+ end
93
+
94
+ # returns a flat array of all Jetpants::DB objects in the pool: the master and
95
+ # all slaves of all types.
96
+ def nodes
97
+ [master, slaves].flatten
98
+ end
99
+
100
+ # Informs Jetpants that slave_db is an active slave. Potentially used by
101
+ # plugins, such as in Topology at start-up time.
102
+ def has_active_slave(slave_db, weight=100)
103
+ slave_db = slave_db.to_db
104
+ raise "Attempt to mark a DB as its own active slave" if slave_db == @master
105
+ @active_slave_weights[slave_db] = weight
106
+ end
107
+
108
+ # Turns a standby slave into an active slave, giving it the specified read weight.
109
+ # Syncs the pool's configuration afterwards. It's up to your asset tracker plugin to
110
+ # actually do something with this information.
111
+ def mark_slave_active(slave_db, weight=100)
112
+ raise "Attempt to make a backup slave be an active slave" if slave_db.for_backups?
113
+ has_active_slave slave_db, weight
114
+ sync_configuration
115
+ end
116
+
117
+ # Turns an active slave into a standby slave. Syncs the pool's configuration afterwards.
118
+ # It's up to your asset tracker plugin to actually do something with this information.
119
+ def mark_slave_standby(slave_db)
120
+ slave_db = slave_db.to_db
121
+ raise "Cannot call mark_slave_standby on a master" if slave_db == @master
122
+ @active_slave_weights.delete(slave_db)
123
+ sync_configuration
124
+ end
125
+
126
+ # Remove a slave from a pool entirely. This is destructive, ie, it does a
127
+ # RESET SLAVE on the db.
128
+ # Note that a plugin may want to override this (or implement after_remove_slave!)
129
+ # to actually sync the change to an asset tracker, depending on how the plugin
130
+ # implements Pool#sync_configuration. (If the implementation makes sync_configuration
131
+ # work by iterating over the pool's current slaves, it won't see any slaves that have
132
+ # been removed.)
133
+ def remove_slave!(slave_db)
134
+ raise "Slave is not in this pool" unless slave_db.pool == self
135
+ slave_db.disable_monitoring
136
+ slave_db.stop_replication
137
+ slave_db.repl_binlog_coordinates # displays how far we replicated, in case you need to roll back this change manually
138
+ slave_db.disable_replication!
139
+ end
140
+
141
+ # Informs this pool that it has an alias. A pool may have any number of aliases.
142
+ def add_alias(name)
143
+ if @aliases.include? name
144
+ false
145
+ else
146
+ @aliases << name
147
+ true
148
+ end
149
+ end
150
+
151
+ # Displays a summary of the pool's members. This outputs immediately instead
152
+ # of returning a string, so that you can invoke something like:
153
+ # Jetpants.topology.pools.each &:summary
154
+ # to easily display a summary.
155
+ def summary
156
+ probe
157
+ if @aliases.count > 0
158
+ alias_text = ' (aliases: ' + @aliases.join(', ') + ')'
159
+ end
160
+ print "#{name}#{alias_text} [#{master.data_set_size(true)}GB]\n"
161
+ print "\tmaster = %-13s #{master.hostname}\n" % @master.ip
162
+ [:active, :standby, :backup].each do |type|
163
+ slave_list = slaves(type)
164
+ slave_list.each_with_index do |s, i|
165
+ print "\t%-7s slave #{i + 1} = %-13s #{s.hostname}\n" % [type, s.ip]
166
+ end
167
+ end
168
+ true
169
+ end
170
+
171
+ # Performs the last steps of the master promotion process. Do not use this
172
+ # as a stand-alone method; there's other necessary logic, such as setting
173
+ # the old master to read-only mode, and doing a STOP SLAVE on all slaves.
174
+ # Use the "jetpants promotion" task instead to do an interactive promotion.
175
+ # (In a future release, this will be refactored to be fully scriptable.)
176
+ def master_promotion!(promoted)
177
+ demoted = @master
178
+ raise "Promoted host is not in the right pool!" unless @master.slaves.include? promoted
179
+ user, password = promoted.replication_credentials.values
180
+ log, position = promoted.binlog_coordinates
181
+
182
+ # reset slave on promoted
183
+ if demoted.available?
184
+ promoted.disable_replication!
185
+ else
186
+ promoted.mysql_root_cmd "STOP SLAVE; RESET SLAVE"
187
+ end
188
+
189
+ # gather our new replicas
190
+ replicas = demoted.slaves.select {|replica| replica != promoted}
191
+ replicas << demoted if demoted.available?
192
+ replicas.flatten!
193
+
194
+ # perform promotion
195
+ replicas.each do |replica|
196
+ replica.change_master_to promoted,
197
+ :user => user,
198
+ :password => password,
199
+ :log_file => log,
200
+ :log_pos => position
201
+ end
202
+
203
+ # ensure our replicas are configured correctly by comparing our staged values to current values of replicas
204
+ promoted_replication_config = {
205
+ master_host: promoted.ip,
206
+ master_user: user,
207
+ master_log_file: log,
208
+ exec_master_log_pos: position.to_s
209
+ }
210
+ replicas.each do |r|
211
+ promoted_replication_config.each do |option, value|
212
+ raise "Unexpected slave status value for #{option} in replica #{r} after promotion" unless r.slave_status[option] == value
213
+ end
214
+ r.resume_replication unless r.replicating?
215
+ end
216
+
217
+ # Update the pool
218
+ # Note: if the demoted machine is offline, plugin may need to implement an
219
+ # after_master_promotion! method which handles this case in configuration tracker
220
+ @active_slave_weights.delete promoted # if promoting an active slave, remove it from read pool
221
+ @master = promoted
222
+ sync_configuration
223
+ Jetpants.topology.write_config
224
+
225
+ replicas.all? {|r| r.replicating?}
226
+ end
227
+
228
+ # Informs your asset tracker about any changes in the pool's state or members.
229
+ # Plugins should override this, or use before_sync_configuration / after_sync_configuration
230
+ # callbacks, to provide an implementation of this method.
231
+ def sync_configuration
232
+ end
233
+
234
+ # Callback to ensure that a sync'ed pool is already in Topology.pools
235
+ def before_sync_configuration
236
+ unless Jetpants.topology.pools.include? self
237
+ Jetpants.topology.pools << self
238
+ end
239
+ end
240
+
241
+ # Returns the name of the pool.
242
+ def to_s
243
+ @name
244
+ end
245
+
246
+ # Displays the provided output, along with information about the current time,
247
+ # and self (the name of this Pool)
248
+ def output(str)
249
+ str = str.to_s.strip
250
+ str = nil if str && str.length == 0
251
+ str ||= "Completed (no output)"
252
+ output = Time.now.strftime("%H:%M:%S") + " [#{self}] "
253
+ output << str
254
+ print output + "\n"
255
+ output
256
+ end
257
+
258
+ # Jetpants::Pool proxies missing methods to the pool's @master Jetpants::DB instance.
259
+ def method_missing(name, *args, &block)
260
+ if @master.respond_to? name
261
+ @master.send name, *args, &block
262
+ else
263
+ super
264
+ end
265
+ end
266
+
267
+ def respond_to?(name, include_private=false)
268
+ super || @master.respond_to?(name)
269
+ end
270
+
271
+ end
272
+ end
@@ -0,0 +1,311 @@
1
+ require 'json'
2
+ require 'db'
3
+ require 'table'
4
+ require 'pool'
5
+
6
+
7
+ module Jetpants
8
+
9
+ # a Shard in Jetpants is a range-based Pool. All Shards have the exact same
10
+ # set of tables, just they only contain data that falls within within their
11
+ # range.
12
+ class Shard < Pool
13
+ include CallbackHandler
14
+
15
+ # min ID for this shard
16
+ attr_reader :min_id
17
+
18
+ # max ID for this shard, or string "INFINITY"
19
+ attr_reader :max_id
20
+
21
+ # if this shard is being split, this is an array of "child" Shard objects.
22
+ attr_reader :children
23
+
24
+ # if this shard is a child of one being split, this links back to the parent Shard.
25
+ attr_accessor :parent
26
+
27
+ # A symbol representing the shard's state. Possible state values include:
28
+ # :ready -- Normal shard, online / in production, optimal codition, no current operation/maintenance.
29
+ # :read_only -- Online / in production but not currently writable due to maintenance or emergency.
30
+ # :offline -- In production but not current readable or writable due to maintenance or emergency.
31
+ # :initializing -- New child shard, being created, not in production.
32
+ # :exporting -- Child shard that is exporting its portion of the data set. Shard not in production yet.
33
+ # :importing -- Child shard that is importing its portion of the data set. Shard not in production yet.
34
+ # :replicating -- Child shard that is being cloned to new replicas. Shard not in production yet.
35
+ # :child -- Child shard that is in production for reads, but still slaving from its parent for writes.
36
+ # :needs_cleanup -- Child shard that is fully in production, but parent replication not torn down yet, and redundant data (from wrong range) not removed yet
37
+ # :deprecated -- Parent shard that has been split but children are still in :child or :needs_cleanup state. Shard may still be in production for writes.
38
+ # :recycle -- Parent shard that has been split and children are now in the :ready state. Shard no longer in production.
39
+ attr_accessor :state
40
+
41
+ # Constructor for Shard --
42
+ # * min_id: int
43
+ # * max_id: int or the string "INFINITY"
44
+ # * master: string (IP address) or a Jetpants::DB object
45
+ # * state: one of the above state symbols
46
+ def initialize(min_id, max_id, master, state=:ready)
47
+ @min_id = min_id
48
+ @max_id = max_id
49
+ @state = state
50
+
51
+ @children = [] # array of shards being initialized by splitting this one
52
+ @parent = nil
53
+
54
+ super(generate_name, master)
55
+ end
56
+
57
+ # Generates a string containing the shard's min and max IDs. Plugin may want to override.
58
+ def generate_name
59
+ "shard-#{min_id}-#{max_id.to_s.downcase}"
60
+ end
61
+
62
+ # Returns true if the shard state is one of the values that indicates it's
63
+ # a live / in-production shard. These states include :ready, :child,
64
+ # :needs_cleanup, :read_only, and :offline.
65
+ def in_config?
66
+ [:ready, :child, :needs_cleanup, :read_only, :offline].include? @state
67
+ end
68
+
69
+ # In default Jetpants, we assume each Shard has 1 master and N standby slaves;
70
+ # we never have active (read) slaves for shards. So calling mark_slave_active
71
+ # on a Shard generates an exception. Plugins may override this behavior, which
72
+ # may be necessary for sites spanning two or more active data centers.
73
+ def mark_slave_active(slave_db, weight=100)
74
+ raise "Shards do not support active slaves"
75
+ end
76
+
77
+ # Returns an empty array, because we assume that shard pools have no active
78
+ # slaves. (If your read volume would require active slaves, think about
79
+ # splitting your shard instead...)
80
+ #
81
+ # Plugins may of course override this behavior.
82
+ def active_slaves
83
+ []
84
+ end
85
+
86
+ # Returns the master's standby slaves, ignoring any child shards since they
87
+ # are a special case of slaves.
88
+ def standby_slaves
89
+ result = super
90
+ if @children.count > 0
91
+ is_child_master = {}
92
+ @children.each {|c| is_child_master[c.master] = true}
93
+ result.reject {|sl| is_child_master[sl]}
94
+ else
95
+ result
96
+ end
97
+ end
98
+
99
+ # Returns the Jetpants::DB object corresponding to the requested access
100
+ # mode (either :read or :write). Ordinarily this will be the shard's
101
+ # @master, unless this shard is still a child, in which case we send
102
+ # writes the the shard's parent's master instead.
103
+ def db(mode=:read)
104
+ (mode.to_sym == :write && @parent ? @parent.master : master)
105
+ end
106
+
107
+ # Adds a Jetpants::Shard to this shard's array of children, and sets
108
+ # the child's parent to be self.
109
+ def add_child(shard)
110
+ raise "Shard #{shard} already has a parent!" if shard.parent
111
+ @children << shard
112
+ shard.parent = self
113
+ end
114
+
115
+ # Removes a Jetpants::Shard from this shard's array of children, and sets
116
+ # the child's parent to nil.
117
+ def remove_child(shard)
118
+ raise "Shard #{shard} isn't a child of this shard!" unless shard.parent == self
119
+ @children.delete shard
120
+ shard.parent = nil
121
+ end
122
+
123
+ # Creates and returns <count> child shards, pulling boxes for masters from spare list.
124
+ # You can optionally supply the ID ranges to use: pass in an array of arrays,
125
+ # where the outer array is of size <count> and each inner array is [min_id, max_id].
126
+ # If you omit id_ranges, the parent's ID range will be divided evenly amongst the
127
+ # children automatically.
128
+ def init_children(count, id_ranges=false)
129
+ # Make sure we have enough machines in spare pool
130
+ raise "Not enough master role machines in spare pool!" if count > Jetpants.topology.count_spares(role: 'master')
131
+ raise "Not enough standby_slave role machines in spare pool!" if count * Jetpants.standby_slaves_per_pool > Jetpants.topology.count_spares(role: 'standby_slave')
132
+
133
+ # Make sure enough slaves of shard being split
134
+ raise "Must have at least #{Jetpants.standby_slaves_per_pool} slaves of shard being split" if master.slaves.count < Jetpants.standby_slaves_per_pool
135
+
136
+ # Make sure right number of id_ranges were supplied, if any were
137
+ raise "Wrong number of id_ranges supplied" if id_ranges && id_ranges.count != count
138
+
139
+ unless id_ranges
140
+ id_ranges = []
141
+ ids_total = 1 + @max_id - @min_id
142
+ current_min_id = @min_id
143
+ count.times do |i|
144
+ ids_this_pool = (ids_total / count).floor
145
+ ids_this_pool += 1 if i < (ids_total % count)
146
+ id_ranges << [current_min_id, current_min_id + ids_this_pool - 1]
147
+ current_min_id += ids_this_pool
148
+ end
149
+ end
150
+
151
+ count.times do |i|
152
+ spare = Jetpants.topology.claim_spare(role: 'master')
153
+ spare.output "Using ID range of #{id_ranges[i][0]} to #{id_ranges[i][1]} (inclusive)"
154
+ s = Shard.new(id_ranges[i][0], id_ranges[i][1], spare, :initializing)
155
+ add_child(s)
156
+ Jetpants.topology.pools << s
157
+ s.sync_configuration
158
+ end
159
+
160
+ @children
161
+ end
162
+
163
+ # Splits a shard into <pieces> child shards. The children will still be slaving
164
+ # from the parent after this point; you need to do additional things to fully
165
+ # complete the shard split. See the command suite tasks shard_split_move_reads,
166
+ # shard_split_move_writes, and shard_split_cleanup.
167
+ def split!(pieces=2)
168
+ raise "Cannot split a shard that is still a child!" if @parent
169
+
170
+ init_children(pieces) unless @children.count > 0
171
+
172
+ @children.concurrent_each {|c| c.stop_query_killer; c.disable_binary_logging}
173
+ clone_to_children!
174
+ @children.concurrent_each {|c| c.rebuild!}
175
+ @children.each {|c| c.sync_configuration}
176
+
177
+ @state = :deprecated
178
+ sync_configuration
179
+ output "Initial split complete."
180
+ end
181
+
182
+ # Transitions the shard's children into the :needs_cleanup state. It is the
183
+ # responsibility of an asset tracker plugin / config generator to implement
184
+ # config generation in a way that actually makes writes go to shards
185
+ # in the :needs_cleanup state.
186
+ def move_writes_to_children
187
+ @children.each do |c|
188
+ c.state = :needs_cleanup
189
+ c.sync_configuration
190
+ end
191
+ end
192
+
193
+ # Clones the current shard to its children. Uses a standby slave of self as
194
+ # the source for copying.
195
+ def clone_to_children!
196
+ # Figure out which slave(s) we can use for populating the new masters
197
+ sources = standby_slaves.dup
198
+ sources.shift
199
+ raise "Need to have at least 1 slave in order to create additional slaves" if sources.length < 1
200
+
201
+ # Figure out which machines we need to turn into slaves
202
+ targets = []
203
+ @children.each do |child_shard|
204
+ if child_shard.master.is_slave? && child_shard.master.master != @master
205
+ raise "Child shard master #{child_shard.master} is already a slave of another pool"
206
+ elsif child_shard.master.is_slave?
207
+ child_shard.output "Already slaving from parent shard master"
208
+ child_shard.restart_mysql # to make previous disable of binary logging take effect
209
+ else
210
+ targets << child_shard.master
211
+ end
212
+ end
213
+
214
+ while targets.count > 0 do
215
+ chain_length = (targets.count.to_f / sources.count.to_f).ceil
216
+ chain_length = 3 if chain_length > 3 # For sanity's sake, we only allow a copy pipeline that populates 3 instances at once.
217
+ sources.concurrent_each_with_index do |src, idx|
218
+ my_targets = targets[idx * chain_length, chain_length]
219
+ src.enslave_siblings! my_targets
220
+ chain_length.times {|n| targets[(idx * chain_length) + n] = nil}
221
+ end
222
+ targets.compact!
223
+ end
224
+ end
225
+
226
+ # Exports data that should stay on this shard, drops and re-creates tables,
227
+ # re-imports the data, and then adds slaves to the shard pool as needed.
228
+ # The optional stage param lets you skip some steps, but this is only really
229
+ # useful if you're running this manually and it failed part-way.
230
+ def rebuild!(stage=0)
231
+ # Sanity check
232
+ raise "Cannot rebuild a shard that isn't still slaving from another shard" unless @master.is_slave?
233
+ raise "Cannot rebuild an active shard" if in_config?
234
+
235
+ tables = Table.from_config 'sharded_tables'
236
+
237
+ if stage <= 1
238
+ raise "Shard is not in the expected initializing or exporting states" unless [:initializing, :exporting].include? @state
239
+ @state = :exporting
240
+ sync_configuration
241
+ export_schemata tables
242
+ export_data tables, @min_id, @max_id
243
+ end
244
+
245
+ if stage <= 2
246
+ raise "Shard is not in the expected exporting or importing states" unless [:exporting, :importing].include? @state
247
+ @state = :importing
248
+ sync_configuration
249
+ import_schemata!
250
+ alter_schemata if respond_to? :alter_schemata
251
+ import_data tables, @min_id, @max_id
252
+ start_query_killer
253
+ end
254
+
255
+ if stage <= 3
256
+ raise "Shard is not in the expected importing or replicating states" unless [:importing, :replicating].include? @state
257
+ enable_binary_logging
258
+ restart_mysql
259
+ @state = :replicating
260
+ sync_configuration
261
+ my_slaves = Jetpants.topology.claim_spares(Jetpants.standby_slaves_per_pool, role: 'standby_slave')
262
+ enslave!(my_slaves)
263
+ my_slaves.each {|slv| slv.resume_replication}
264
+ [self, my_slaves].flatten.each {|db| db.catch_up_to_master}
265
+ end
266
+
267
+ @state = :child
268
+ end
269
+
270
+ # Run this on a parent shard after the rest of a shard split is complete.
271
+ # Sets this shard's master to read-only; removes the application user from
272
+ # self (without replicating this change to children); disables replication
273
+ # between the parent and the children; and then removes rows from the
274
+ # children that replicated to the wrong shard.
275
+ def cleanup!
276
+ raise "Can only run cleanup! on a parent shard in the deprecated state" unless @state == :deprecated
277
+ raise "Cannot call cleanup! on a child shard" if @parent
278
+
279
+ tables = Table.from_config 'sharded_tables'
280
+ @master.revoke_all_access!
281
+ @children.concurrent_each do |child_shard|
282
+ raise "Child state does not indicate cleanup is needed" unless child_shard.state == :needs_cleanup
283
+ raise "Child shard master should be a slave in order to clean up" unless child_shard.is_slave?
284
+ child_shard.master.disable_replication! # stop slaving from parent
285
+ child_shard.prune_data_to_range tables, child_shard.min_id, child_shard.max_id
286
+ end
287
+
288
+ # We have to iterate over a copy of the @children array, rather than the array
289
+ # directly, since Array#each skips elements when you remove elements in-place,
290
+ # which Shard#remove_child does...
291
+ @children.dup.each do |child_shard|
292
+ child_shard.state = :ready
293
+ remove_child child_shard
294
+ child_shard.sync_configuration
295
+ end
296
+ @state = :recycle
297
+ sync_configuration
298
+ end
299
+
300
+ # Displays information about the shard
301
+ def summary(with_children=true)
302
+ super()
303
+ if with_children
304
+ children.each {|c| c.summary}
305
+ end
306
+ true
307
+ end
308
+
309
+ end
310
+ end
311
+