jetpants 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.rdoc +4 -9
- data/bin/jetpants +7 -6
- data/doc/capacity_plan.rdoc +77 -0
- data/doc/commands.rdoc +1 -1
- data/doc/jetpants_collins.rdoc +2 -1
- data/doc/online_schema_change.rdoc +45 -0
- data/doc/plugins.rdoc +7 -1
- data/doc/requirements.rdoc +1 -1
- data/doc/upgrade_helper.rdoc +68 -0
- data/lib/jetpants/db/client.rb +2 -1
- data/lib/jetpants/db/import_export.rb +12 -3
- data/lib/jetpants/db/replication.rb +6 -2
- data/lib/jetpants/db/schema.rb +40 -0
- data/lib/jetpants/db/server.rb +2 -2
- data/lib/jetpants/host.rb +12 -1
- data/lib/jetpants/pool.rb +41 -0
- data/lib/jetpants/shard.rb +201 -124
- data/lib/jetpants/table.rb +80 -10
- data/plugins/capacity_plan/capacity_plan.rb +353 -0
- data/plugins/capacity_plan/commandsuite.rb +19 -0
- data/plugins/capacity_plan/monkeypatch.rb +20 -0
- data/plugins/jetpants_collins/db.rb +45 -6
- data/plugins/jetpants_collins/jetpants_collins.rb +32 -21
- data/plugins/jetpants_collins/pool.rb +22 -1
- data/plugins/jetpants_collins/shard.rb +9 -2
- data/plugins/jetpants_collins/topology.rb +8 -9
- data/plugins/online_schema_change/commandsuite.rb +56 -0
- data/plugins/online_schema_change/db.rb +33 -0
- data/plugins/online_schema_change/online_schema_change.rb +5 -0
- data/plugins/online_schema_change/pool.rb +105 -0
- data/plugins/online_schema_change/topology.rb +56 -0
- data/plugins/simple_tracker/shard.rb +1 -1
- data/plugins/upgrade_helper/commandsuite.rb +212 -0
- data/plugins/upgrade_helper/db.rb +78 -0
- data/plugins/upgrade_helper/host.rb +22 -0
- data/plugins/upgrade_helper/pool.rb +259 -0
- data/plugins/upgrade_helper/shard.rb +61 -0
- data/plugins/upgrade_helper/upgrade_helper.rb +21 -0
- data/scripts/global_rowcount.rb +75 -0
- metadata +28 -15
data/lib/jetpants/host.rb
CHANGED
@@ -142,7 +142,7 @@ module Jetpants
|
|
142
142
|
# Confirm that something is listening on the given port. The timeout param
|
143
143
|
# indicates how long to wait (in seconds) for a process to be listening.
|
144
144
|
def confirm_listening_on_port(port, timeout=10)
|
145
|
-
checker_th = Thread.new { ssh_cmd "while [[ `netstat -ln | grep
|
145
|
+
checker_th = Thread.new { ssh_cmd "while [[ `netstat -ln | grep :#{port} | wc -l` -lt 1 ]] ; do sleep 1; done" }
|
146
146
|
raise "Nothing is listening on #{@ip}:#{port} after #{timeout} seconds" unless checker_th.join(timeout)
|
147
147
|
true
|
148
148
|
end
|
@@ -384,6 +384,17 @@ module Jetpants
|
|
384
384
|
@cores = (count ? count.to_i : 1)
|
385
385
|
end
|
386
386
|
|
387
|
+
# Returns the amount of memory on machine, either in bytes (default) or in GB.
|
388
|
+
# Linux-specific.
|
389
|
+
def memory(in_gb=false)
|
390
|
+
line = ssh_cmd 'cat /proc/meminfo | grep MemTotal'
|
391
|
+
matches = line.match /(?<size>\d+)\s+(?<unit>kB|mB|gB|B)/
|
392
|
+
size = matches[:size].to_i
|
393
|
+
multipliers = {kB: 1024, mB: 1024**2, gB: 1024**3, B: 1}
|
394
|
+
size *= multipliers[matches[:unit].to_sym]
|
395
|
+
in_gb ? size / 1024**3 : size
|
396
|
+
end
|
397
|
+
|
387
398
|
# Returns the machine's hostname
|
388
399
|
def hostname
|
389
400
|
return 'unknown' unless available?
|
data/lib/jetpants/pool.rb
CHANGED
@@ -56,6 +56,7 @@ module Jetpants
|
|
56
56
|
@master = master.to_db
|
57
57
|
@master_read_weight = 0
|
58
58
|
@active_slave_weights = {}
|
59
|
+
@tables = nil
|
59
60
|
end
|
60
61
|
|
61
62
|
# Returns all slaves, or pass in :active, :standby, or :backup to receive slaves
|
@@ -96,7 +97,47 @@ module Jetpants
|
|
96
97
|
def nodes
|
97
98
|
[master, slaves].flatten.compact
|
98
99
|
end
|
100
|
+
|
101
|
+
# Look at a database in the pool (preferably a standby slave, but will check
|
102
|
+
# active slave or master if nothing else is available) and retrieve a list of
|
103
|
+
# tables, detecting their schema
|
104
|
+
def probe_tables
|
105
|
+
master.probe
|
106
|
+
db = standby_slaves.last || active_slaves.last || master
|
107
|
+
if db && db.running?
|
108
|
+
output "Probing tables via #{db}"
|
109
|
+
else
|
110
|
+
output "Warning: unable to probe tables"
|
111
|
+
return
|
112
|
+
end
|
113
|
+
|
114
|
+
@tables = []
|
115
|
+
sql = "SHOW TABLES"
|
116
|
+
db.query_return_array(sql).each do |tbl|
|
117
|
+
table_name = tbl.values.first
|
118
|
+
@tables << db.detect_table_schema(table_name)
|
119
|
+
end
|
120
|
+
end
|
99
121
|
|
122
|
+
# Returns a list of table objects for this pool
|
123
|
+
def tables
|
124
|
+
self.probe_tables unless @tables
|
125
|
+
@tables
|
126
|
+
end
|
127
|
+
|
128
|
+
# Queries whether a pool has a table with a given name
|
129
|
+
# note that this is the string name of the table and not an object
|
130
|
+
def has_table?(table)
|
131
|
+
tables.map(&:to_s).include?(table)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Retrieve the table object for a given table name
|
135
|
+
def get_table(table)
|
136
|
+
raise "Pool #{self} does not have table #{table}" unless has_table? table
|
137
|
+
|
138
|
+
@tables.select{|tb| tb.to_s == table}.first
|
139
|
+
end
|
140
|
+
|
100
141
|
# Informs Jetpants that slave_db is an active slave. Potentially used by
|
101
142
|
# plugins, such as in Topology at start-up time.
|
102
143
|
def has_active_slave(slave_db, weight=100)
|
data/lib/jetpants/shard.rb
CHANGED
@@ -32,8 +32,8 @@ module Jetpants
|
|
32
32
|
# :exporting -- Child shard that is exporting its portion of the data set. Shard not in production yet.
|
33
33
|
# :importing -- Child shard that is importing its portion of the data set. Shard not in production yet.
|
34
34
|
# :replicating -- Child shard that is being cloned to new replicas. Shard not in production yet.
|
35
|
-
# :child --
|
36
|
-
# :needs_cleanup -- Child shard that is fully in production, but parent replication not torn down yet, and redundant data (from wrong range) not removed yet
|
35
|
+
# :child -- In-production shard whose master is slaving from another shard. Reads go to to this shard's master, but writes go to the master of this shard's master and replicate down.
|
36
|
+
# :needs_cleanup -- Child shard that is fully in production, but parent replication not torn down yet, and potentially has redundant data (from wrong range) not removed yet
|
37
37
|
# :deprecated -- Parent shard that has been split but children are still in :child or :needs_cleanup state. Shard may still be in production for writes / replication not torn down yet.
|
38
38
|
# :recycle -- Parent shard that has been split and children are now in the :ready state. Shard no longer in production, replication to children has been torn down.
|
39
39
|
attr_accessor :state
|
@@ -103,7 +103,27 @@ module Jetpants
|
|
103
103
|
def db(mode=:read)
|
104
104
|
(mode.to_sym == :write && @parent ? @parent.master : master)
|
105
105
|
end
|
106
|
-
|
106
|
+
|
107
|
+
# Override the probe_tables method to acommodate shard topology -
|
108
|
+
# delegate everything to the first shard.
|
109
|
+
def probe_tables
|
110
|
+
if Jetpants.topology.shards.first == self
|
111
|
+
super
|
112
|
+
else
|
113
|
+
Jetpants.topology.shards.first.probe_tables
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Override the tables accessor to acommodate shard topology - delegate
|
118
|
+
# everything to the first shard
|
119
|
+
def tables
|
120
|
+
if Jetpants.topology.shards.first == self
|
121
|
+
super
|
122
|
+
else
|
123
|
+
Jetpants.topology.shards.first.tables
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
107
127
|
# Adds a Jetpants::Shard to this shard's array of children, and sets
|
108
128
|
# the child's parent to be self.
|
109
129
|
def add_child(shard)
|
@@ -120,59 +140,38 @@ module Jetpants
|
|
120
140
|
shard.parent = nil
|
121
141
|
end
|
122
142
|
|
123
|
-
#
|
143
|
+
# Splits a shard into <pieces> child shards. The children will still be slaving
|
144
|
+
# from the parent after this point; you need to do additional things to fully
|
145
|
+
# complete the shard split. See the command suite tasks shard_split_move_reads,
|
146
|
+
# shard_split_move_writes, and shard_split_cleanup.
|
147
|
+
#
|
124
148
|
# You can optionally supply the ID ranges to use: pass in an array of arrays,
|
125
|
-
# where the outer array is of size <
|
149
|
+
# where the outer array is of size <pieces> and each inner array is [min_id, max_id].
|
126
150
|
# If you omit id_ranges, the parent's ID range will be divided evenly amongst the
|
127
151
|
# children automatically.
|
128
|
-
def
|
129
|
-
|
130
|
-
raise "
|
131
|
-
raise "Not enough standby_slave role machines in spare pool!" if count * Jetpants.standby_slaves_per_pool > Jetpants.topology.count_spares(role: :standby_slave, like: slaves.first)
|
132
|
-
|
133
|
-
# Make sure enough slaves of shard being split
|
134
|
-
raise "Must have at least #{Jetpants.standby_slaves_per_pool} slaves of shard being split" if master.slaves.count < Jetpants.standby_slaves_per_pool
|
135
|
-
|
136
|
-
# Make sure right number of id_ranges were supplied, if any were
|
137
|
-
raise "Wrong number of id_ranges supplied" if id_ranges && id_ranges.count != count
|
152
|
+
def split!(pieces=2, id_ranges=false)
|
153
|
+
raise "Cannot split a shard that is still a child!" if @parent
|
154
|
+
raise "Cannot split a shard into #{pieces} pieces!" if pieces < 2
|
138
155
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
ids_this_pool += 1 if i < (ids_total % count)
|
146
|
-
id_ranges << [current_min_id, current_min_id + ids_this_pool - 1]
|
147
|
-
current_min_id += ids_this_pool
|
148
|
-
end
|
156
|
+
# We can resume partially-failed shard splits if all children made it past
|
157
|
+
# the :initializing stage. (note: some manual cleanup may be required first,
|
158
|
+
# depending on where/how the split failed though.)
|
159
|
+
num_children_post_init = @children.count {|c| c.state != :initializing}
|
160
|
+
if (@children.size > 0 && @children.size != pieces) || (num_children_post_init > 0 && num_children_post_init != pieces)
|
161
|
+
raise "Previous shard split died at an unrecoverable stage, cannot automatically restart"
|
149
162
|
end
|
150
163
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
add_child(s)
|
157
|
-
Jetpants.topology.pools << s
|
158
|
-
s.sync_configuration
|
164
|
+
# Set up the child shard masters, unless we're resuming a partially-failed
|
165
|
+
# shard split
|
166
|
+
if num_children_post_init == 0
|
167
|
+
id_ranges ||= even_split_id_range(pieces)
|
168
|
+
init_child_shard_masters(id_ranges)
|
159
169
|
end
|
160
170
|
|
161
|
-
@children
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
# from the parent after this point; you need to do additional things to fully
|
166
|
-
# complete the shard split. See the command suite tasks shard_split_move_reads,
|
167
|
-
# shard_split_move_writes, and shard_split_cleanup.
|
168
|
-
def split!(pieces=2)
|
169
|
-
raise "Cannot split a shard that is still a child!" if @parent
|
170
|
-
|
171
|
-
init_children(pieces) unless @children.count > 0
|
172
|
-
|
173
|
-
clone_to_children!
|
174
|
-
@children.concurrent_each {|c| c.rebuild!}
|
175
|
-
@children.each {|c| c.sync_configuration}
|
171
|
+
@children.concurrent_each do |c|
|
172
|
+
c.prune_data! if [:initializing, :exporting, :importing].include? c.state
|
173
|
+
c.clone_slaves_from_master
|
174
|
+
end
|
176
175
|
|
177
176
|
@state = :deprecated
|
178
177
|
sync_configuration
|
@@ -190,113 +189,135 @@ module Jetpants
|
|
190
189
|
end
|
191
190
|
end
|
192
191
|
|
193
|
-
# Clones the current shard to its children. Uses a standby slave of self as
|
194
|
-
# the source for copying.
|
195
|
-
def clone_to_children!
|
196
|
-
# Figure out which slave(s) we can use for populating the new masters
|
197
|
-
sources = standby_slaves.dup
|
198
|
-
raise "Need to have at least 1 slave in order to create additional slaves" if sources.length < 1
|
199
|
-
|
200
|
-
# If we have 2 or more slaves, keep 1 replicating for safety's sake; don't use it for spinning up children
|
201
|
-
sources.shift if sources.length > 1
|
202
|
-
|
203
|
-
# Figure out which machines we need to turn into slaves
|
204
|
-
targets = []
|
205
|
-
@children.each do |child_shard|
|
206
|
-
if child_shard.master.is_slave? && child_shard.master.master != @master
|
207
|
-
raise "Child shard master #{child_shard.master} is already a slave of another pool"
|
208
|
-
elsif child_shard.master.is_slave?
|
209
|
-
child_shard.output "Already slaving from parent shard master"
|
210
|
-
else
|
211
|
-
targets << child_shard.master
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
while targets.count > 0 do
|
216
|
-
chain_length = (targets.count.to_f / sources.count.to_f).ceil
|
217
|
-
chain_length = 3 if chain_length > 3 # For sanity's sake, we only allow a copy pipeline that populates 3 instances at once.
|
218
|
-
sources.concurrent_each_with_index do |src, idx|
|
219
|
-
my_targets = targets[idx * chain_length, chain_length]
|
220
|
-
src.enslave_siblings! my_targets
|
221
|
-
chain_length.times {|n| targets[(idx * chain_length) + n] = nil}
|
222
|
-
end
|
223
|
-
targets.compact!
|
224
|
-
end
|
225
|
-
end
|
226
|
-
|
227
192
|
# Exports data that should stay on this shard, drops and re-creates tables,
|
228
|
-
# re-imports the data
|
229
|
-
def
|
230
|
-
|
231
|
-
|
232
|
-
|
193
|
+
# and then re-imports the data
|
194
|
+
def prune_data!
|
195
|
+
raise "Cannot prune a shard that isn't still slaving from another shard" unless @master.is_slave?
|
196
|
+
unless [:initializing, :exporting, :importing].include? @state
|
197
|
+
raise "Shard #{self} is not in a state compatible with calling prune_data! (current state=#{@state})"
|
198
|
+
end
|
233
199
|
|
234
200
|
tables = Table.from_config 'sharded_tables'
|
235
201
|
|
236
|
-
if
|
202
|
+
if @state == :initializing
|
237
203
|
@state = :exporting
|
238
204
|
sync_configuration
|
205
|
+
end
|
206
|
+
|
207
|
+
if @state == :exporting
|
239
208
|
stop_query_killer
|
240
209
|
export_schemata tables
|
241
210
|
export_data tables, @min_id, @max_id
|
242
|
-
end
|
243
|
-
|
244
|
-
if [:exporting, :importing].include? @state
|
245
211
|
@state = :importing
|
246
212
|
sync_configuration
|
213
|
+
end
|
214
|
+
|
215
|
+
if @state == :importing
|
216
|
+
stop_query_killer
|
247
217
|
import_schemata!
|
248
218
|
alter_schemata if respond_to? :alter_schemata
|
219
|
+
disable_monitoring
|
249
220
|
restart_mysql '--skip-log-bin', '--skip-log-slave-updates', '--innodb-autoinc-lock-mode=2', '--skip-slave-start'
|
250
221
|
import_data tables, @min_id, @max_id
|
251
222
|
restart_mysql # to clear out previous options '--skip-log-bin', '--skip-log-slave-updates', '--innodb-autoinc-lock-mode=2'
|
223
|
+
enable_monitoring
|
252
224
|
start_query_killer
|
253
225
|
end
|
226
|
+
end
|
227
|
+
|
228
|
+
# Creates standby slaves for a shard by cloning the master.
|
229
|
+
# Only call this on a child shard that isn't in production yet, or on
|
230
|
+
# a production shard that's been marked as offline.
|
231
|
+
def clone_slaves_from_master
|
232
|
+
# If shard is already in state :child, it may already have slaves
|
233
|
+
slaves_needed = Jetpants.standby_slaves_per_pool
|
234
|
+
slaves_needed -= standby_slaves.size if @state == :child
|
235
|
+
if slaves_needed < 1
|
236
|
+
output "Shard already has enough standby slaves, skipping step of cloning more"
|
237
|
+
return
|
238
|
+
end
|
239
|
+
|
240
|
+
slaves_available = Jetpants.topology.count_spares(role: :standby_slave, like: master)
|
241
|
+
raise "Not enough standby_slave role machines in spare pool!" if slaves_needed > slaves_available
|
254
242
|
|
255
|
-
|
243
|
+
# Handle state transitions
|
244
|
+
if @state == :child || @state == :importing
|
256
245
|
@state = :replicating
|
257
246
|
sync_configuration
|
258
|
-
|
259
|
-
|
260
|
-
enslave!(my_slaves)
|
261
|
-
my_slaves.each {|slv| slv.resume_replication}
|
262
|
-
[self, my_slaves].flatten.each {|db| db.catch_up_to_master}
|
263
|
-
else
|
264
|
-
catch_up_to_master
|
265
|
-
end
|
247
|
+
elsif @state == :offline || @state == :replicating
|
248
|
+
# intentional no-op, no need to change state
|
266
249
|
else
|
267
|
-
raise "Shard not in a state compatible with calling
|
250
|
+
raise "Shard #{self} is not in a state compatible with calling clone_slaves_from_master! (current state=#{@state})"
|
268
251
|
end
|
269
252
|
|
270
|
-
|
253
|
+
my_slaves = Jetpants.topology.claim_spares(slaves_needed, role: :standby_slave, like: master)
|
254
|
+
enslave!(my_slaves)
|
255
|
+
my_slaves.each &:resume_replication
|
256
|
+
[self, my_slaves].flatten.each {|db| db.catch_up_to_master}
|
257
|
+
|
258
|
+
# Update state, if relevant
|
259
|
+
if @state == :replicating
|
260
|
+
@state = :child
|
261
|
+
sync_configuration
|
262
|
+
end
|
263
|
+
@children
|
271
264
|
end
|
272
265
|
|
273
|
-
#
|
274
|
-
#
|
275
|
-
#
|
276
|
-
#
|
277
|
-
# children
|
266
|
+
# Cleans up the state of a shard. This has two use-cases:
|
267
|
+
# A. Run this on a parent shard after the rest of a shard split is complete.
|
268
|
+
# Sets this shard's master to read-only; removes the application user from
|
269
|
+
# self (without replicating this change to children); disables replication
|
270
|
+
# between the parent and the children; and then removes rows from the
|
271
|
+
# children that replicated to the wrong shard.
|
272
|
+
# B. Run this on a shard that just underwent a two-step promotion process which
|
273
|
+
# moved all reads, and then all writes, to a slave that has slaves of its own.
|
274
|
+
# For example, if upgrading MySQL on a shard by creating a newer-version slave
|
275
|
+
# and then adding slaves of its own to it (temp hierarchical replication setup).
|
276
|
+
# You can use this method to then "eject" the older-version master and its
|
277
|
+
# older-version slaves from the pool.
|
278
278
|
def cleanup!
|
279
|
-
raise "Can only run cleanup! on a parent shard in the deprecated state" unless @state == :deprecated
|
280
279
|
raise "Cannot call cleanup! on a child shard" if @parent
|
280
|
+
|
281
|
+
# situation A - clean up after a shard split
|
282
|
+
if @state == :deprecated && @children.size > 0
|
283
|
+
tables = Table.from_config 'sharded_tables'
|
284
|
+
@master.revoke_all_access!
|
285
|
+
@children.concurrent_each do |child_shard|
|
286
|
+
raise "Child state does not indicate cleanup is needed" unless child_shard.state == :needs_cleanup
|
287
|
+
raise "Child shard master should be a slave in order to clean up" unless child_shard.is_slave?
|
288
|
+
child_shard.master.disable_replication! # stop slaving from parent
|
289
|
+
child_shard.prune_data_to_range tables, child_shard.min_id, child_shard.max_id
|
290
|
+
end
|
281
291
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
292
|
+
# We have to iterate over a copy of the @children array, rather than the array
|
293
|
+
# directly, since Array#each skips elements when you remove elements in-place,
|
294
|
+
# which Shard#remove_child does...
|
295
|
+
@children.dup.each do |child_shard|
|
296
|
+
child_shard.state = :ready
|
297
|
+
remove_child child_shard
|
298
|
+
child_shard.sync_configuration
|
299
|
+
end
|
300
|
+
@state = :recycle
|
290
301
|
|
291
|
-
#
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
302
|
+
# situation B - clean up after a two-step shard master promotion
|
303
|
+
elsif @state == :needs_cleanup && @master.master && !@parent
|
304
|
+
eject_master = @master.master
|
305
|
+
eject_slaves = @master.slaves.reject {|s| s == @master}
|
306
|
+
eject_master.revoke_all_access!
|
307
|
+
@master.disable_replication!
|
308
|
+
|
309
|
+
# We need to update the asset tracker to no longer consider the ejected
|
310
|
+
# nodes as part of this pool. This includes ejecting the old master, which
|
311
|
+
# might be handled by Pool#after_master_promotion! instead
|
312
|
+
# of Shard#sync_configuration.
|
313
|
+
after_master_promotion!(@master, false) if respond_to? :after_master_promotion!
|
314
|
+
|
315
|
+
@state = :ready
|
316
|
+
|
317
|
+
else
|
318
|
+
raise "Shard #{self} is not in a state compatible with calling cleanup! (state=#{state}, child count=#{@children.size}"
|
298
319
|
end
|
299
|
-
|
320
|
+
|
300
321
|
sync_configuration
|
301
322
|
end
|
302
323
|
|
@@ -309,6 +330,62 @@ module Jetpants
|
|
309
330
|
true
|
310
331
|
end
|
311
332
|
|
333
|
+
|
334
|
+
###### Private methods #####################################################
|
335
|
+
private
|
336
|
+
|
337
|
+
# Splits self's ID range into num_children pieces
|
338
|
+
# Returns an array of [low_id, high_id] arrays, suitable for
|
339
|
+
# passing to Shard#init_child_shard_masters
|
340
|
+
def even_split_id_range(num_children)
|
341
|
+
raise "Cannot calculate an even split of last shard" if @max_id == 'INFINITY'
|
342
|
+
id_ranges = []
|
343
|
+
ids_total = 1 + @max_id - @min_id
|
344
|
+
current_min_id = @min_id
|
345
|
+
num_children.times do |i|
|
346
|
+
ids_this_pool = (ids_total / num_children).floor
|
347
|
+
ids_this_pool += 1 if i < (ids_total % num_children)
|
348
|
+
id_ranges << [current_min_id, current_min_id + ids_this_pool - 1]
|
349
|
+
current_min_id += ids_this_pool
|
350
|
+
end
|
351
|
+
id_ranges
|
352
|
+
end
|
353
|
+
|
354
|
+
# Early step of shard split process: initialize child shard pools, pull boxes from
|
355
|
+
# spare list to use as masters for these new shards, and then populate them with the
|
356
|
+
# full data set from self (the shard being split).
|
357
|
+
#
|
358
|
+
# Supply an array of [min_id, max_id] arrays, specifying the ID ranges to use for each
|
359
|
+
# child. For example, if self has @min_id = 1001 and @max_id = 4000, and you're splitting
|
360
|
+
# into 3 evenly-sized child shards, you'd supply [[1001,2000], [2001,3000], [3001, 4000]]
|
361
|
+
def init_child_shard_masters(id_ranges)
|
362
|
+
# Validations: make sure enough machines in spare pool; enough slaves of shard being split;
|
363
|
+
# no existing children of shard being split
|
364
|
+
# TODO: fix the first check to separately account for :role, ie check master and standby_slave counts separately
|
365
|
+
# (this is actually quite difficult since we can't provide a :like node in a sane way)
|
366
|
+
spares_needed = id_ranges.size * (1 + Jetpants.standby_slaves_per_pool)
|
367
|
+
raise "Not enough machines in spare pool!" if spares_needed > Jetpants.topology.count_spares(role: :master, like: master)
|
368
|
+
raise 'Shard split functionality requires Jetpants config setting "standby_slaves_per_pool" is at least 1' if Jetpants.standby_slaves_per_pool < 1
|
369
|
+
raise "Must have at least #{Jetpants.standby_slaves_per_pool} slaves of shard being split" if master.slaves.size < Jetpants.standby_slaves_per_pool
|
370
|
+
raise "Shard #{self} already has #{@children.size} child shards" if @children.size > 0
|
371
|
+
|
372
|
+
# Set up the child shards, and give them masters
|
373
|
+
id_ranges.each do |my_range|
|
374
|
+
spare = Jetpants.topology.claim_spare(role: :master, like: master)
|
375
|
+
spare.disable_read_only! if (spare.running? && spare.read_only?)
|
376
|
+
spare.output "Will be master for new shard with ID range of #{my_range.first} to #{my_range.last} (inclusive)"
|
377
|
+
s = Shard.new(my_range.first, my_range.last, spare, :initializing)
|
378
|
+
add_child(s)
|
379
|
+
Jetpants.topology.pools << s
|
380
|
+
s.sync_configuration
|
381
|
+
end
|
382
|
+
|
383
|
+
# We'll clone the full parent data set from a standby slave of the shard being split
|
384
|
+
source = standby_slaves.first
|
385
|
+
targets = @children.map &:master
|
386
|
+
source.enslave_siblings! targets
|
387
|
+
end
|
388
|
+
|
312
389
|
end
|
313
390
|
end
|
314
391
|
|