jetpants 0.8.0 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.rdoc +4 -9
- data/bin/jetpants +7 -6
- data/doc/capacity_plan.rdoc +77 -0
- data/doc/commands.rdoc +1 -1
- data/doc/jetpants_collins.rdoc +2 -1
- data/doc/online_schema_change.rdoc +45 -0
- data/doc/plugins.rdoc +7 -1
- data/doc/requirements.rdoc +1 -1
- data/doc/upgrade_helper.rdoc +68 -0
- data/lib/jetpants/db/client.rb +2 -1
- data/lib/jetpants/db/import_export.rb +12 -3
- data/lib/jetpants/db/replication.rb +6 -2
- data/lib/jetpants/db/schema.rb +40 -0
- data/lib/jetpants/db/server.rb +2 -2
- data/lib/jetpants/host.rb +12 -1
- data/lib/jetpants/pool.rb +41 -0
- data/lib/jetpants/shard.rb +201 -124
- data/lib/jetpants/table.rb +80 -10
- data/plugins/capacity_plan/capacity_plan.rb +353 -0
- data/plugins/capacity_plan/commandsuite.rb +19 -0
- data/plugins/capacity_plan/monkeypatch.rb +20 -0
- data/plugins/jetpants_collins/db.rb +45 -6
- data/plugins/jetpants_collins/jetpants_collins.rb +32 -21
- data/plugins/jetpants_collins/pool.rb +22 -1
- data/plugins/jetpants_collins/shard.rb +9 -2
- data/plugins/jetpants_collins/topology.rb +8 -9
- data/plugins/online_schema_change/commandsuite.rb +56 -0
- data/plugins/online_schema_change/db.rb +33 -0
- data/plugins/online_schema_change/online_schema_change.rb +5 -0
- data/plugins/online_schema_change/pool.rb +105 -0
- data/plugins/online_schema_change/topology.rb +56 -0
- data/plugins/simple_tracker/shard.rb +1 -1
- data/plugins/upgrade_helper/commandsuite.rb +212 -0
- data/plugins/upgrade_helper/db.rb +78 -0
- data/plugins/upgrade_helper/host.rb +22 -0
- data/plugins/upgrade_helper/pool.rb +259 -0
- data/plugins/upgrade_helper/shard.rb +61 -0
- data/plugins/upgrade_helper/upgrade_helper.rb +21 -0
- data/scripts/global_rowcount.rb +75 -0
- metadata +28 -15
data/lib/jetpants/host.rb
CHANGED
@@ -142,7 +142,7 @@ module Jetpants
|
|
142
142
|
# Confirm that something is listening on the given port. The timeout param
|
143
143
|
# indicates how long to wait (in seconds) for a process to be listening.
|
144
144
|
def confirm_listening_on_port(port, timeout=10)
|
145
|
-
checker_th = Thread.new { ssh_cmd "while [[ `netstat -ln | grep
|
145
|
+
checker_th = Thread.new { ssh_cmd "while [[ `netstat -ln | grep :#{port} | wc -l` -lt 1 ]] ; do sleep 1; done" }
|
146
146
|
raise "Nothing is listening on #{@ip}:#{port} after #{timeout} seconds" unless checker_th.join(timeout)
|
147
147
|
true
|
148
148
|
end
|
@@ -384,6 +384,17 @@ module Jetpants
|
|
384
384
|
@cores = (count ? count.to_i : 1)
|
385
385
|
end
|
386
386
|
|
387
|
+
# Returns the amount of memory on machine, either in bytes (default) or in GB.
|
388
|
+
# Linux-specific.
|
389
|
+
def memory(in_gb=false)
|
390
|
+
line = ssh_cmd 'cat /proc/meminfo | grep MemTotal'
|
391
|
+
matches = line.match /(?<size>\d+)\s+(?<unit>kB|mB|gB|B)/
|
392
|
+
size = matches[:size].to_i
|
393
|
+
multipliers = {kB: 1024, mB: 1024**2, gB: 1024**3, B: 1}
|
394
|
+
size *= multipliers[matches[:unit].to_sym]
|
395
|
+
in_gb ? size / 1024**3 : size
|
396
|
+
end
|
397
|
+
|
387
398
|
# Returns the machine's hostname
|
388
399
|
def hostname
|
389
400
|
return 'unknown' unless available?
|
data/lib/jetpants/pool.rb
CHANGED
@@ -56,6 +56,7 @@ module Jetpants
|
|
56
56
|
@master = master.to_db
|
57
57
|
@master_read_weight = 0
|
58
58
|
@active_slave_weights = {}
|
59
|
+
@tables = nil
|
59
60
|
end
|
60
61
|
|
61
62
|
# Returns all slaves, or pass in :active, :standby, or :backup to receive slaves
|
@@ -96,7 +97,47 @@ module Jetpants
|
|
96
97
|
def nodes
|
97
98
|
[master, slaves].flatten.compact
|
98
99
|
end
|
100
|
+
|
101
|
+
# Look at a database in the pool (preferably a standby slave, but will check
|
102
|
+
# active slave or master if nothing else is available) and retrieve a list of
|
103
|
+
# tables, detecting their schema
|
104
|
+
def probe_tables
|
105
|
+
master.probe
|
106
|
+
db = standby_slaves.last || active_slaves.last || master
|
107
|
+
if db && db.running?
|
108
|
+
output "Probing tables via #{db}"
|
109
|
+
else
|
110
|
+
output "Warning: unable to probe tables"
|
111
|
+
return
|
112
|
+
end
|
113
|
+
|
114
|
+
@tables = []
|
115
|
+
sql = "SHOW TABLES"
|
116
|
+
db.query_return_array(sql).each do |tbl|
|
117
|
+
table_name = tbl.values.first
|
118
|
+
@tables << db.detect_table_schema(table_name)
|
119
|
+
end
|
120
|
+
end
|
99
121
|
|
122
|
+
# Returns a list of table objects for this pool
|
123
|
+
def tables
|
124
|
+
self.probe_tables unless @tables
|
125
|
+
@tables
|
126
|
+
end
|
127
|
+
|
128
|
+
# Queries whether a pool has a table with a given name
|
129
|
+
# note that this is the string name of the table and not an object
|
130
|
+
def has_table?(table)
|
131
|
+
tables.map(&:to_s).include?(table)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Retrieve the table object for a given table name
|
135
|
+
def get_table(table)
|
136
|
+
raise "Pool #{self} does not have table #{table}" unless has_table? table
|
137
|
+
|
138
|
+
@tables.select{|tb| tb.to_s == table}.first
|
139
|
+
end
|
140
|
+
|
100
141
|
# Informs Jetpants that slave_db is an active slave. Potentially used by
|
101
142
|
# plugins, such as in Topology at start-up time.
|
102
143
|
def has_active_slave(slave_db, weight=100)
|
data/lib/jetpants/shard.rb
CHANGED
@@ -32,8 +32,8 @@ module Jetpants
|
|
32
32
|
# :exporting -- Child shard that is exporting its portion of the data set. Shard not in production yet.
|
33
33
|
# :importing -- Child shard that is importing its portion of the data set. Shard not in production yet.
|
34
34
|
# :replicating -- Child shard that is being cloned to new replicas. Shard not in production yet.
|
35
|
-
# :child --
|
36
|
-
# :needs_cleanup -- Child shard that is fully in production, but parent replication not torn down yet, and redundant data (from wrong range) not removed yet
|
35
|
+
# :child -- In-production shard whose master is slaving from another shard. Reads go to to this shard's master, but writes go to the master of this shard's master and replicate down.
|
36
|
+
# :needs_cleanup -- Child shard that is fully in production, but parent replication not torn down yet, and potentially has redundant data (from wrong range) not removed yet
|
37
37
|
# :deprecated -- Parent shard that has been split but children are still in :child or :needs_cleanup state. Shard may still be in production for writes / replication not torn down yet.
|
38
38
|
# :recycle -- Parent shard that has been split and children are now in the :ready state. Shard no longer in production, replication to children has been torn down.
|
39
39
|
attr_accessor :state
|
@@ -103,7 +103,27 @@ module Jetpants
|
|
103
103
|
def db(mode=:read)
|
104
104
|
(mode.to_sym == :write && @parent ? @parent.master : master)
|
105
105
|
end
|
106
|
-
|
106
|
+
|
107
|
+
# Override the probe_tables method to acommodate shard topology -
|
108
|
+
# delegate everything to the first shard.
|
109
|
+
def probe_tables
|
110
|
+
if Jetpants.topology.shards.first == self
|
111
|
+
super
|
112
|
+
else
|
113
|
+
Jetpants.topology.shards.first.probe_tables
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Override the tables accessor to acommodate shard topology - delegate
|
118
|
+
# everything to the first shard
|
119
|
+
def tables
|
120
|
+
if Jetpants.topology.shards.first == self
|
121
|
+
super
|
122
|
+
else
|
123
|
+
Jetpants.topology.shards.first.tables
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
107
127
|
# Adds a Jetpants::Shard to this shard's array of children, and sets
|
108
128
|
# the child's parent to be self.
|
109
129
|
def add_child(shard)
|
@@ -120,59 +140,38 @@ module Jetpants
|
|
120
140
|
shard.parent = nil
|
121
141
|
end
|
122
142
|
|
123
|
-
#
|
143
|
+
# Splits a shard into <pieces> child shards. The children will still be slaving
|
144
|
+
# from the parent after this point; you need to do additional things to fully
|
145
|
+
# complete the shard split. See the command suite tasks shard_split_move_reads,
|
146
|
+
# shard_split_move_writes, and shard_split_cleanup.
|
147
|
+
#
|
124
148
|
# You can optionally supply the ID ranges to use: pass in an array of arrays,
|
125
|
-
# where the outer array is of size <
|
149
|
+
# where the outer array is of size <pieces> and each inner array is [min_id, max_id].
|
126
150
|
# If you omit id_ranges, the parent's ID range will be divided evenly amongst the
|
127
151
|
# children automatically.
|
128
|
-
def
|
129
|
-
|
130
|
-
raise "
|
131
|
-
raise "Not enough standby_slave role machines in spare pool!" if count * Jetpants.standby_slaves_per_pool > Jetpants.topology.count_spares(role: :standby_slave, like: slaves.first)
|
132
|
-
|
133
|
-
# Make sure enough slaves of shard being split
|
134
|
-
raise "Must have at least #{Jetpants.standby_slaves_per_pool} slaves of shard being split" if master.slaves.count < Jetpants.standby_slaves_per_pool
|
135
|
-
|
136
|
-
# Make sure right number of id_ranges were supplied, if any were
|
137
|
-
raise "Wrong number of id_ranges supplied" if id_ranges && id_ranges.count != count
|
152
|
+
def split!(pieces=2, id_ranges=false)
|
153
|
+
raise "Cannot split a shard that is still a child!" if @parent
|
154
|
+
raise "Cannot split a shard into #{pieces} pieces!" if pieces < 2
|
138
155
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
ids_this_pool += 1 if i < (ids_total % count)
|
146
|
-
id_ranges << [current_min_id, current_min_id + ids_this_pool - 1]
|
147
|
-
current_min_id += ids_this_pool
|
148
|
-
end
|
156
|
+
# We can resume partially-failed shard splits if all children made it past
|
157
|
+
# the :initializing stage. (note: some manual cleanup may be required first,
|
158
|
+
# depending on where/how the split failed though.)
|
159
|
+
num_children_post_init = @children.count {|c| c.state != :initializing}
|
160
|
+
if (@children.size > 0 && @children.size != pieces) || (num_children_post_init > 0 && num_children_post_init != pieces)
|
161
|
+
raise "Previous shard split died at an unrecoverable stage, cannot automatically restart"
|
149
162
|
end
|
150
163
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
add_child(s)
|
157
|
-
Jetpants.topology.pools << s
|
158
|
-
s.sync_configuration
|
164
|
+
# Set up the child shard masters, unless we're resuming a partially-failed
|
165
|
+
# shard split
|
166
|
+
if num_children_post_init == 0
|
167
|
+
id_ranges ||= even_split_id_range(pieces)
|
168
|
+
init_child_shard_masters(id_ranges)
|
159
169
|
end
|
160
170
|
|
161
|
-
@children
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
# from the parent after this point; you need to do additional things to fully
|
166
|
-
# complete the shard split. See the command suite tasks shard_split_move_reads,
|
167
|
-
# shard_split_move_writes, and shard_split_cleanup.
|
168
|
-
def split!(pieces=2)
|
169
|
-
raise "Cannot split a shard that is still a child!" if @parent
|
170
|
-
|
171
|
-
init_children(pieces) unless @children.count > 0
|
172
|
-
|
173
|
-
clone_to_children!
|
174
|
-
@children.concurrent_each {|c| c.rebuild!}
|
175
|
-
@children.each {|c| c.sync_configuration}
|
171
|
+
@children.concurrent_each do |c|
|
172
|
+
c.prune_data! if [:initializing, :exporting, :importing].include? c.state
|
173
|
+
c.clone_slaves_from_master
|
174
|
+
end
|
176
175
|
|
177
176
|
@state = :deprecated
|
178
177
|
sync_configuration
|
@@ -190,113 +189,135 @@ module Jetpants
|
|
190
189
|
end
|
191
190
|
end
|
192
191
|
|
193
|
-
# Clones the current shard to its children. Uses a standby slave of self as
|
194
|
-
# the source for copying.
|
195
|
-
def clone_to_children!
|
196
|
-
# Figure out which slave(s) we can use for populating the new masters
|
197
|
-
sources = standby_slaves.dup
|
198
|
-
raise "Need to have at least 1 slave in order to create additional slaves" if sources.length < 1
|
199
|
-
|
200
|
-
# If we have 2 or more slaves, keep 1 replicating for safety's sake; don't use it for spinning up children
|
201
|
-
sources.shift if sources.length > 1
|
202
|
-
|
203
|
-
# Figure out which machines we need to turn into slaves
|
204
|
-
targets = []
|
205
|
-
@children.each do |child_shard|
|
206
|
-
if child_shard.master.is_slave? && child_shard.master.master != @master
|
207
|
-
raise "Child shard master #{child_shard.master} is already a slave of another pool"
|
208
|
-
elsif child_shard.master.is_slave?
|
209
|
-
child_shard.output "Already slaving from parent shard master"
|
210
|
-
else
|
211
|
-
targets << child_shard.master
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
while targets.count > 0 do
|
216
|
-
chain_length = (targets.count.to_f / sources.count.to_f).ceil
|
217
|
-
chain_length = 3 if chain_length > 3 # For sanity's sake, we only allow a copy pipeline that populates 3 instances at once.
|
218
|
-
sources.concurrent_each_with_index do |src, idx|
|
219
|
-
my_targets = targets[idx * chain_length, chain_length]
|
220
|
-
src.enslave_siblings! my_targets
|
221
|
-
chain_length.times {|n| targets[(idx * chain_length) + n] = nil}
|
222
|
-
end
|
223
|
-
targets.compact!
|
224
|
-
end
|
225
|
-
end
|
226
|
-
|
227
192
|
# Exports data that should stay on this shard, drops and re-creates tables,
|
228
|
-
# re-imports the data
|
229
|
-
def
|
230
|
-
|
231
|
-
|
232
|
-
|
193
|
+
# and then re-imports the data
|
194
|
+
def prune_data!
|
195
|
+
raise "Cannot prune a shard that isn't still slaving from another shard" unless @master.is_slave?
|
196
|
+
unless [:initializing, :exporting, :importing].include? @state
|
197
|
+
raise "Shard #{self} is not in a state compatible with calling prune_data! (current state=#{@state})"
|
198
|
+
end
|
233
199
|
|
234
200
|
tables = Table.from_config 'sharded_tables'
|
235
201
|
|
236
|
-
if
|
202
|
+
if @state == :initializing
|
237
203
|
@state = :exporting
|
238
204
|
sync_configuration
|
205
|
+
end
|
206
|
+
|
207
|
+
if @state == :exporting
|
239
208
|
stop_query_killer
|
240
209
|
export_schemata tables
|
241
210
|
export_data tables, @min_id, @max_id
|
242
|
-
end
|
243
|
-
|
244
|
-
if [:exporting, :importing].include? @state
|
245
211
|
@state = :importing
|
246
212
|
sync_configuration
|
213
|
+
end
|
214
|
+
|
215
|
+
if @state == :importing
|
216
|
+
stop_query_killer
|
247
217
|
import_schemata!
|
248
218
|
alter_schemata if respond_to? :alter_schemata
|
219
|
+
disable_monitoring
|
249
220
|
restart_mysql '--skip-log-bin', '--skip-log-slave-updates', '--innodb-autoinc-lock-mode=2', '--skip-slave-start'
|
250
221
|
import_data tables, @min_id, @max_id
|
251
222
|
restart_mysql # to clear out previous options '--skip-log-bin', '--skip-log-slave-updates', '--innodb-autoinc-lock-mode=2'
|
223
|
+
enable_monitoring
|
252
224
|
start_query_killer
|
253
225
|
end
|
226
|
+
end
|
227
|
+
|
228
|
+
# Creates standby slaves for a shard by cloning the master.
|
229
|
+
# Only call this on a child shard that isn't in production yet, or on
|
230
|
+
# a production shard that's been marked as offline.
|
231
|
+
def clone_slaves_from_master
|
232
|
+
# If shard is already in state :child, it may already have slaves
|
233
|
+
slaves_needed = Jetpants.standby_slaves_per_pool
|
234
|
+
slaves_needed -= standby_slaves.size if @state == :child
|
235
|
+
if slaves_needed < 1
|
236
|
+
output "Shard already has enough standby slaves, skipping step of cloning more"
|
237
|
+
return
|
238
|
+
end
|
239
|
+
|
240
|
+
slaves_available = Jetpants.topology.count_spares(role: :standby_slave, like: master)
|
241
|
+
raise "Not enough standby_slave role machines in spare pool!" if slaves_needed > slaves_available
|
254
242
|
|
255
|
-
|
243
|
+
# Handle state transitions
|
244
|
+
if @state == :child || @state == :importing
|
256
245
|
@state = :replicating
|
257
246
|
sync_configuration
|
258
|
-
|
259
|
-
|
260
|
-
enslave!(my_slaves)
|
261
|
-
my_slaves.each {|slv| slv.resume_replication}
|
262
|
-
[self, my_slaves].flatten.each {|db| db.catch_up_to_master}
|
263
|
-
else
|
264
|
-
catch_up_to_master
|
265
|
-
end
|
247
|
+
elsif @state == :offline || @state == :replicating
|
248
|
+
# intentional no-op, no need to change state
|
266
249
|
else
|
267
|
-
raise "Shard not in a state compatible with calling
|
250
|
+
raise "Shard #{self} is not in a state compatible with calling clone_slaves_from_master! (current state=#{@state})"
|
268
251
|
end
|
269
252
|
|
270
|
-
|
253
|
+
my_slaves = Jetpants.topology.claim_spares(slaves_needed, role: :standby_slave, like: master)
|
254
|
+
enslave!(my_slaves)
|
255
|
+
my_slaves.each &:resume_replication
|
256
|
+
[self, my_slaves].flatten.each {|db| db.catch_up_to_master}
|
257
|
+
|
258
|
+
# Update state, if relevant
|
259
|
+
if @state == :replicating
|
260
|
+
@state = :child
|
261
|
+
sync_configuration
|
262
|
+
end
|
263
|
+
@children
|
271
264
|
end
|
272
265
|
|
273
|
-
#
|
274
|
-
#
|
275
|
-
#
|
276
|
-
#
|
277
|
-
# children
|
266
|
+
# Cleans up the state of a shard. This has two use-cases:
|
267
|
+
# A. Run this on a parent shard after the rest of a shard split is complete.
|
268
|
+
# Sets this shard's master to read-only; removes the application user from
|
269
|
+
# self (without replicating this change to children); disables replication
|
270
|
+
# between the parent and the children; and then removes rows from the
|
271
|
+
# children that replicated to the wrong shard.
|
272
|
+
# B. Run this on a shard that just underwent a two-step promotion process which
|
273
|
+
# moved all reads, and then all writes, to a slave that has slaves of its own.
|
274
|
+
# For example, if upgrading MySQL on a shard by creating a newer-version slave
|
275
|
+
# and then adding slaves of its own to it (temp hierarchical replication setup).
|
276
|
+
# You can use this method to then "eject" the older-version master and its
|
277
|
+
# older-version slaves from the pool.
|
278
278
|
def cleanup!
|
279
|
-
raise "Can only run cleanup! on a parent shard in the deprecated state" unless @state == :deprecated
|
280
279
|
raise "Cannot call cleanup! on a child shard" if @parent
|
280
|
+
|
281
|
+
# situation A - clean up after a shard split
|
282
|
+
if @state == :deprecated && @children.size > 0
|
283
|
+
tables = Table.from_config 'sharded_tables'
|
284
|
+
@master.revoke_all_access!
|
285
|
+
@children.concurrent_each do |child_shard|
|
286
|
+
raise "Child state does not indicate cleanup is needed" unless child_shard.state == :needs_cleanup
|
287
|
+
raise "Child shard master should be a slave in order to clean up" unless child_shard.is_slave?
|
288
|
+
child_shard.master.disable_replication! # stop slaving from parent
|
289
|
+
child_shard.prune_data_to_range tables, child_shard.min_id, child_shard.max_id
|
290
|
+
end
|
281
291
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
292
|
+
# We have to iterate over a copy of the @children array, rather than the array
|
293
|
+
# directly, since Array#each skips elements when you remove elements in-place,
|
294
|
+
# which Shard#remove_child does...
|
295
|
+
@children.dup.each do |child_shard|
|
296
|
+
child_shard.state = :ready
|
297
|
+
remove_child child_shard
|
298
|
+
child_shard.sync_configuration
|
299
|
+
end
|
300
|
+
@state = :recycle
|
290
301
|
|
291
|
-
#
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
302
|
+
# situation B - clean up after a two-step shard master promotion
|
303
|
+
elsif @state == :needs_cleanup && @master.master && !@parent
|
304
|
+
eject_master = @master.master
|
305
|
+
eject_slaves = @master.slaves.reject {|s| s == @master}
|
306
|
+
eject_master.revoke_all_access!
|
307
|
+
@master.disable_replication!
|
308
|
+
|
309
|
+
# We need to update the asset tracker to no longer consider the ejected
|
310
|
+
# nodes as part of this pool. This includes ejecting the old master, which
|
311
|
+
# might be handled by Pool#after_master_promotion! instead
|
312
|
+
# of Shard#sync_configuration.
|
313
|
+
after_master_promotion!(@master, false) if respond_to? :after_master_promotion!
|
314
|
+
|
315
|
+
@state = :ready
|
316
|
+
|
317
|
+
else
|
318
|
+
raise "Shard #{self} is not in a state compatible with calling cleanup! (state=#{state}, child count=#{@children.size}"
|
298
319
|
end
|
299
|
-
|
320
|
+
|
300
321
|
sync_configuration
|
301
322
|
end
|
302
323
|
|
@@ -309,6 +330,62 @@ module Jetpants
|
|
309
330
|
true
|
310
331
|
end
|
311
332
|
|
333
|
+
|
334
|
+
###### Private methods #####################################################
|
335
|
+
private
|
336
|
+
|
337
|
+
# Splits self's ID range into num_children pieces
|
338
|
+
# Returns an array of [low_id, high_id] arrays, suitable for
|
339
|
+
# passing to Shard#init_child_shard_masters
|
340
|
+
def even_split_id_range(num_children)
|
341
|
+
raise "Cannot calculate an even split of last shard" if @max_id == 'INFINITY'
|
342
|
+
id_ranges = []
|
343
|
+
ids_total = 1 + @max_id - @min_id
|
344
|
+
current_min_id = @min_id
|
345
|
+
num_children.times do |i|
|
346
|
+
ids_this_pool = (ids_total / num_children).floor
|
347
|
+
ids_this_pool += 1 if i < (ids_total % num_children)
|
348
|
+
id_ranges << [current_min_id, current_min_id + ids_this_pool - 1]
|
349
|
+
current_min_id += ids_this_pool
|
350
|
+
end
|
351
|
+
id_ranges
|
352
|
+
end
|
353
|
+
|
354
|
+
# Early step of shard split process: initialize child shard pools, pull boxes from
|
355
|
+
# spare list to use as masters for these new shards, and then populate them with the
|
356
|
+
# full data set from self (the shard being split).
|
357
|
+
#
|
358
|
+
# Supply an array of [min_id, max_id] arrays, specifying the ID ranges to use for each
|
359
|
+
# child. For example, if self has @min_id = 1001 and @max_id = 4000, and you're splitting
|
360
|
+
# into 3 evenly-sized child shards, you'd supply [[1001,2000], [2001,3000], [3001, 4000]]
|
361
|
+
def init_child_shard_masters(id_ranges)
|
362
|
+
# Validations: make sure enough machines in spare pool; enough slaves of shard being split;
|
363
|
+
# no existing children of shard being split
|
364
|
+
# TODO: fix the first check to separately account for :role, ie check master and standby_slave counts separately
|
365
|
+
# (this is actually quite difficult since we can't provide a :like node in a sane way)
|
366
|
+
spares_needed = id_ranges.size * (1 + Jetpants.standby_slaves_per_pool)
|
367
|
+
raise "Not enough machines in spare pool!" if spares_needed > Jetpants.topology.count_spares(role: :master, like: master)
|
368
|
+
raise 'Shard split functionality requires Jetpants config setting "standby_slaves_per_pool" is at least 1' if Jetpants.standby_slaves_per_pool < 1
|
369
|
+
raise "Must have at least #{Jetpants.standby_slaves_per_pool} slaves of shard being split" if master.slaves.size < Jetpants.standby_slaves_per_pool
|
370
|
+
raise "Shard #{self} already has #{@children.size} child shards" if @children.size > 0
|
371
|
+
|
372
|
+
# Set up the child shards, and give them masters
|
373
|
+
id_ranges.each do |my_range|
|
374
|
+
spare = Jetpants.topology.claim_spare(role: :master, like: master)
|
375
|
+
spare.disable_read_only! if (spare.running? && spare.read_only?)
|
376
|
+
spare.output "Will be master for new shard with ID range of #{my_range.first} to #{my_range.last} (inclusive)"
|
377
|
+
s = Shard.new(my_range.first, my_range.last, spare, :initializing)
|
378
|
+
add_child(s)
|
379
|
+
Jetpants.topology.pools << s
|
380
|
+
s.sync_configuration
|
381
|
+
end
|
382
|
+
|
383
|
+
# We'll clone the full parent data set from a standby slave of the shard being split
|
384
|
+
source = standby_slaves.first
|
385
|
+
targets = @children.map &:master
|
386
|
+
source.enslave_siblings! targets
|
387
|
+
end
|
388
|
+
|
312
389
|
end
|
313
390
|
end
|
314
391
|
|