jetpants 0.7.0 → 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -66,6 +66,12 @@ Other recommended uses of plugins include integration with your site's monitorin
66
66
 
67
67
  For more information on how to write plugins and use the Jetpants::CallbackHandler system, please see doc/plugins.rdoc ({view on GitHub}[https://github.com/tumblr/jetpants/blob/master/doc/plugins.rdoc])
68
68
 
69
+ == FREQUENTLY ASKED QUESTIONS:
70
+
71
+ Please see doc/faq.rdoc ({view on GitHub}[https://github.com/tumblr/jetpants/blob/master/doc/faq.rdoc]) for answers to common questions.
72
+
73
+ If you have a question that isn't covered here, please feel free to email the authors at the addresses listed in jetpants.gemspec.
74
+
69
75
  == CREDITS:
70
76
 
71
77
  * <b>Evan Elias</b>: Lead developer. Core class implementations, shard split logic, plugin system
@@ -1,9 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  jetpants_base_dir = File.expand_path(File.dirname(__FILE__) + '/..')
3
3
  $:.unshift File.join(jetpants_base_dir, 'lib')
4
- %w[thor pry state_machine highline/import terminal-table colored].each {|g| require g}
5
- # load tasks
6
- Dir[File.join jetpants_base_dir, 'tasks', '**'].each {|f| require f}
4
+ %w[thor pry highline/import terminal-table colored].each {|g| require g}
7
5
 
8
6
  module Jetpants
9
7
 
@@ -28,6 +26,7 @@ module Jetpants
28
26
  self.send "after_#{task_name}" if self.respond_to? "after_#{task_name}"
29
27
  end
30
28
 
29
+
31
30
  desc 'console', 'Jetpants interactive console'
32
31
  def console
33
32
  Jetpants.pry
@@ -42,13 +41,82 @@ module Jetpants
42
41
  print "\n#{message}\n\n"
43
42
  end
44
43
 
45
- desc 'promotion', 'perform a master promotion'
44
+
45
+ desc 'promotion', 'perform a master promotion, changing which node is the master of a pool'
46
46
  method_option :demote, :desc => 'node to demote'
47
47
  method_option :promote, :desc => 'node to promote'
48
48
  def promotion
49
- Tasks::Promotion.new(options)
49
+ # It's not uncommon for the demoted master to be an offline/unavailable node, so relax Jetpants' normal
50
+ # checks regarding replication threads being in different states.
51
+ Jetpants.verify_replication = false
52
+
53
+ promoted = options[:promote] ? options[:promote].to_db : nil
54
+ demoted = options[:demote] ? options[:demote].to_db : nil
55
+
56
+ if promoted && !demoted
57
+ error "Node to promote #{promoted} is not a slave" unless promoted.is_slave?
58
+ demoted = promoted.master
59
+ inform "Will demote #{demoted}, the master of specified promoted node #{promoted}."
60
+ end
61
+
62
+ if demoted
63
+ demoted.probe
64
+ else
65
+ demoted = ask_node 'Please enter the IP address of the node to demote:'
66
+ if demoted.running?
67
+ error 'Cannot demote a node that has no slaves!' unless demoted.has_slaves?
68
+ else
69
+ inform "Unable to connect to node #{demoted} to demote"
70
+ error "Unable to perform promotion" unless agree "Please confirm that #{demoted} is offline [yes/no]: "
71
+
72
+ # An asset-tracker plugin may have been populated the slave list anyway
73
+ if demoted.slaves && demoted.slaves.count > 0
74
+ demoted.slaves.each {|s| s.probe}
75
+ else
76
+ replicas = ask("Please enter a comma-seperated list of IP addresses of all current replicas of #{demoted}: ").split /\s*,\s*/
77
+ error "No replicas entered" unless replicas && replicas.count > 0
78
+ error "User supplied list of replicas appears to be invalid - #{replicas}" unless replicas.all? {|replica| is_ip? replica}
79
+ demoted.instance_eval {@slaves = replicas.map &:to_db}
80
+ demoted.slaves.each do |replica|
81
+ # Validate that they are really slaves of demoted
82
+ error "#{replica} does not appear to be a valid replica of #{demoted}" unless replica.master == demoted
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ puts
89
+ inform "Summary of affected pool"
90
+ inform "Binary log positions and slave lag shown below are just a snapshot taken at the current time." if demoted.running?
91
+ puts
92
+ demoted.pool(true).summary(true)
93
+ puts
94
+
95
+ unless promoted
96
+ if demoted.running?
97
+ inform "Recommendation: promote the standby slave with the highest binary log coordinates"
98
+ else
99
+ inform "Recommendation: promote the standby slave or active slave with the highest binary log coordinates"
100
+ end
101
+ promoted = ask_node 'Please enter the IP address of the node to promote: '
102
+ end
103
+
104
+ error "Unable to determine a node to demote and a node to promote" unless demoted.kind_of?(Jetpants::DB) && promoted.kind_of?(Jetpants::DB)
105
+ error "Node to promote #{promoted} is not a slave of node to demote #{demoted}" unless promoted.master == demoted
106
+ error "Cannot promote a backup slave. Please choose another." if promoted.for_backups?
107
+
108
+ inform "Going to DEMOTE existing master #{demoted} and PROMOTE new master #{promoted}."
109
+ error "Aborting." unless agree "Proceed? [yes/no]: "
110
+ demoted.pool(true).master_promotion! promoted
50
111
  end
51
-
112
+ def self.after_promotion
113
+ reminders(
114
+ 'Commit/push the configuration in version control.',
115
+ 'Deploy the configuration to all machines.',
116
+ )
117
+ end
118
+
119
+
52
120
  desc 'show_slaves', 'show the current slaves of a master'
53
121
  method_option :node, :desc => 'node to query for slaves'
54
122
  def show_slaves
@@ -70,7 +138,8 @@ module Jetpants
70
138
  inform "node (#{node}) currently has no slaves."
71
139
  end
72
140
  end
73
-
141
+
142
+
74
143
  desc 'show_master', 'show the current master of a node'
75
144
  method_option :node, :desc => 'node to query for master'
76
145
  method_option :siblings, :desc => 'show nodes current slave siblings'
@@ -97,7 +166,8 @@ module Jetpants
97
166
  inform "node (#{node}) does not appear to be a slave"
98
167
  end
99
168
  end
100
-
169
+
170
+
101
171
  desc 'node_info', 'show information about a given node'
102
172
  method_option :node, :desc => 'node to query for information'
103
173
  def node_info
@@ -146,11 +216,13 @@ module Jetpants
146
216
  end
147
217
  end
148
218
 
219
+
149
220
  desc 'regen_config', 'regenerate the application configuration'
150
221
  def regen_config
151
222
  Jetpants.topology.write_config
152
223
  end
153
224
 
225
+
154
226
  desc 'clone_slave', 'clone a standby slave'
155
227
  method_option :source, :desc => 'IP of node to clone from'
156
228
  method_option :target, :desc => 'IP of node to clone to'
@@ -180,6 +252,7 @@ module Jetpants
180
252
  )
181
253
  end
182
254
 
255
+
183
256
  desc 'activate_slave', 'turn a standby slave into an active slave'
184
257
  method_option :node, :desc => 'IP of standby slave to activate'
185
258
  def activate_slave
@@ -195,9 +268,11 @@ module Jetpants
195
268
  Jetpants.topology.write_config
196
269
  end
197
270
 
271
+
198
272
  desc 'weigh_slave', 'change the weight of an active slave'
199
273
  alias :weigh_slave :activate_slave
200
274
 
275
+
201
276
  desc 'pull_slave', 'turn an active slave into a standby slave'
202
277
  method_option :node, :desc => 'IP of active slave to pull'
203
278
  def pull_slave
@@ -209,6 +284,7 @@ module Jetpants
209
284
  Jetpants.topology.write_config
210
285
  end
211
286
 
287
+
212
288
  desc 'destroy_slave', 'remove a standby slave from its pool'
213
289
  method_option :node, :desc => 'IP of standby slave to remove'
214
290
  def destroy_slave
@@ -221,6 +297,7 @@ module Jetpants
221
297
  node.pool.remove_slave!(node)
222
298
  end
223
299
 
300
+
224
301
  desc 'rebuild_slave', 'export and re-import data set on a standby slave'
225
302
  method_option :node, :desc => 'IP of standby slave to rebuild'
226
303
  def rebuild_slave
@@ -233,6 +310,7 @@ module Jetpants
233
310
  node.rebuild!
234
311
  end
235
312
 
313
+
236
314
  desc 'shard_read_only', 'mark a shard as read-only'
237
315
  method_option :min_id, :desc => 'Minimum ID of shard to mark as read-only'
238
316
  def shard_read_only
@@ -244,6 +322,7 @@ module Jetpants
244
322
  Jetpants.topology.write_config
245
323
  end
246
324
 
325
+
247
326
  desc 'shard_offline', 'mark a shard as offline (not readable or writable)'
248
327
  method_option :min_id, :desc => 'Minimum ID of shard to mark as offline'
249
328
  def shard_offline
@@ -255,6 +334,7 @@ module Jetpants
255
334
  Jetpants.topology.write_config
256
335
  end
257
336
 
337
+
258
338
  desc 'shard_online', 'mark a shard as fully online (readable and writable)'
259
339
  method_option :min_id, :desc => 'Minimum ID of shard to mark as fully online'
260
340
  def shard_online
@@ -266,6 +346,7 @@ module Jetpants
266
346
  Jetpants.topology.write_config
267
347
  end
268
348
 
349
+
269
350
  desc 'shard_split', 'shard split step 1 of 4: spin up child pools with different portions of data set'
270
351
  method_option :min_id, :desc => 'Minimum ID of parent shard to split'
271
352
  method_option :max_id, :desc => 'Maximum ID of parent shard to split'
@@ -315,6 +396,7 @@ module Jetpants
315
396
  )
316
397
  end
317
398
 
399
+
318
400
  # This step is only really necessary if asset-tracker changes don't immediately reflect in application configuration.
319
401
  # (ie, if app configuration is a static file that needs to be deployed to webs.)
320
402
  desc 'shard_split_child_reads', 'shard split step 2 of 4: move reads to child shards'
@@ -330,6 +412,7 @@ module Jetpants
330
412
  )
331
413
  end
332
414
 
415
+
333
416
  desc 'shard_split_child_writes', 'shard split step 3 of 4: move writes to child shards'
334
417
  method_option :min_id, :desc => 'Minimum ID of parent shard being split'
335
418
  method_option :max_id, :desc => 'Maximum ID of parent shard being split'
@@ -351,6 +434,7 @@ module Jetpants
351
434
  )
352
435
  end
353
436
 
437
+
354
438
  desc 'shard_split_cleanup', 'shard split step 4 of 4: clean up data that replicated to wrong shard'
355
439
  method_option :min_id, :desc => 'Minimum ID of parent shard being split'
356
440
  method_option :max_id, :desc => 'Maximum ID of parent shard being split'
@@ -368,6 +452,7 @@ module Jetpants
368
452
  )
369
453
  end
370
454
 
455
+
371
456
  desc 'shard_cutover', 'truncate the current last shard range, and add a new shard after it'
372
457
  method_option :cutover_id, :desc => 'Minimum ID of new last shard being created'
373
458
  def shard_cutover
@@ -412,6 +497,7 @@ module Jetpants
412
497
  )
413
498
  end
414
499
 
500
+
415
501
  no_tasks do
416
502
  def is_ip? address
417
503
  address =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/
@@ -424,6 +510,12 @@ module Jetpants
424
510
  def inform message
425
511
  puts message.blue
426
512
  end
513
+
514
+ def ask_node(prompt)
515
+ node = ask prompt
516
+ error "Node (#{node}) does not appear to be an IP address." unless is_ip? node
517
+ node.to_db
518
+ end
427
519
  end
428
520
 
429
521
  def self.reminders(*strings)
@@ -23,7 +23,9 @@ These commands change the type of a slave, or promote a slave to be a master. <
23
23
 
24
24
  <b><tt>jetpants promotion</tt></b> changes which node in a pool is the master by performing a full MySQL master promotion. This is usable even if the old master is offline or unavailable. All nodes in the pool will now slave off of the new master. If the old master is online/available, it will become a standby slave of the new master.
25
25
 
26
- Please note that the master promotion process enables global READ_ONLY mode on the old master. This is a required step of the standard MySQL master promotion technique. After doing a promotion in \Jetpants, you'll need to update/deploy your application's configuration as quickly as possible, if a plugin doesn't do it automatically for you.
26
+ Please note that the master promotion process enables global READ_ONLY mode on the old master. This is a required step of the most generic MySQL master promotion technique. After doing a promotion in \Jetpants, you'll need to update/deploy your application's configuration as quickly as possible, if a plugin doesn't do it automatically for you.
27
+
28
+ Be aware that if the old master is offline/unavailable and the pool's slaves have replicated different amounts of data (ie, their relay logs progressed to different points at the exact moment the old master died), <tt>jetpants promotion</tt> may result in minor data inconsistencies (a couple seconds of writes) because these lost transactions are not automatically replayed on slaves that missed them. You can manually replay them using <tt>mysqlbinlog</tt>; this process is difficult to automate, which is why Jetpants and many other promotion tools do not do so. This may be implemented in a future release.
27
29
 
28
30
  <b><tt>jetpants activate_slave</tt></b> turns a standby slave into an active slave. Use this if you want to generate an app configuration that now sends read queries to a slave that formerly did not receive them.
29
31
 
@@ -0,0 +1,117 @@
1
+ = Frequently Asked Questions
2
+
3
+ == Is \Jetpants a server? How do I connect to it?
4
+
5
+ \Jetpants is an automation toolkit, not a server. In this way it differs from most other large-scale MySQL sharding solutions, which tend to be middleware/proxy servers.
6
+
7
+ The benefit of a toolkit is that you can still leverage standard MySQL replication, still use InnoDB/XtraDB as a robust storage engine choice, etc. \Jetpants largely doesn't interfere with any of that, and instead just provides tools to help you manage a large MySQL topology and support a range-based sharding scheme.
8
+
9
+
10
+ == Is \Jetpants still useful if my architecture isn't sharded?
11
+
12
+ Potentially, since \Jetpants fully supports "global" pools, also known as "functional partitions". You can even use \Jetpants to help manage a standard single-pool MySQL topology (1 master and some number of slaves) for handling common operations like slave cloning and master promotions. That said, there are other tools that may be easier to use if your MySQL footprint is smaller than, say, a dozen machines.
13
+
14
+ However, \Jetpants is also very useful as a Ruby library for performing arbitrary data migrations. It comes with methods for quickly importing and exporting large amounts of data, so it can be used for this purpose regardless of what your database topology looks like.
15
+
16
+
17
+ == What is a sharding key?
18
+
19
+ A sharding key is a core foreign key column that is present in most of your large tables, which can be used to group your data into shards. For many sites this could be <tt>user_id</tt> or <tt>customer_id</tt>, but it depends entirely on your data model and access patterns.
20
+
21
+ For example, on a blogging site the sharding key might be <tt>blog_id</tt>. Most tables that contain a <tt>blog_id</tt> column can be sharded, which will mean that all data related to a particular blog (posts, comments on those posts, authors, etc) is found on the same shard. By organizing data this way, you can continue to use relational operations such as JOIN when querying data that lives on the same shard.
22
+
23
+ Regardless of sharding key, some tables will not be shardable. This includes any "global" table that doesn't contain your sharding key column, as well as any tables that have global lookup patterns. For this reason you might not be able to shard the core table which has your sharding_key as its primary key!
24
+
25
+ In other words: if your sharding key is <tt>user_id</tt>, you might not actually be able to shard your <tt>users</tt> table because you need to do global lookups (ie, by email address) on this table. Denormalization is a common work-around; you could split your users table into a "global lookup" portion in a global pool and an "extended data" portion that lives on shards.
26
+
27
+
28
+ == What is range-based sharding? Why use it, and what are the alternatives?
29
+
30
+ Range-based sharding groups data based on ranges of your sharding key. For example, with a sharding key of <tt>user_id</tt>, all sharded data for users 1-1000 may be on the first shard, users 1001-3000 on the second shard, and users 3001-infinity on the third and final shard.
31
+
32
+ The main benefit of range-based sharding is simplicity. You can express the shard ranges in a language-neutral format like YAML or JSON, and the code to route queries to the correct DB can be implemented in a trivially small amount of code. There's no need for a lookup service, so we avoid a single point of failure. It's also easy for a human to look at the ranges and figure out which DB to query when debugging a problem by hand.
33
+
34
+ Rebalancing range-based shards can be accomplished quickly as long as the primary key of each table begins with the sharding key. InnoDB stores data in order of its primary key, which means it is extremely fast and efficient to dump out a portion of your data set based on a range of your sharding key.
35
+
36
+ The main downside to the range-based approach is lack of even distribution of "hot" data. If a small handful of users on a given shard are using a disproportionate amount of resources, there's no way to move _only_ those users to a different shard. For this reason, range-based sharding can work best for "long-tail" sites where the majority of activity is created by the majority of common users.
37
+
38
+ Some alternatives to the range-based approach include:
39
+
40
+ * <b>Modulus or hash</b>: Apply a function to your sharding key to determine which shard the data lives on.
41
+
42
+ This approach helps to distribute data very evenly. Many sites find that their latest users behave differently than their oldest users, so grouping users together by ranges of ID (essentially ranges of account creation date) can be problematic. Using a modulus or hash avoids this problem.
43
+
44
+ The main issue with this approach is how to rebalance shards that are too large. A simple modulus can't do this unless you want to simultaneously split all of your shards in half, which leads to painful exponential growth. A hash function can be more versatile but can still lead to great complexity. Worse yet, there's no way to rebalance _quickly_ because data is not stored on disk in sorted order based on the hash function.
45
+
46
+ * <b>Lookup table</b>: Use a separate service or data store which takes a sharding key value as an input and returns the appropriate shard as an output.
47
+
48
+ This scheme allows you to very specifically allocate particular data to shards, and works well for sites that have a lot of "hot" data from celebrity users. However, the lookup service is essentially a single point of failure, which counteracts many of the attractive features of sharded architectures. Rebalancing can also be slow and tricky, since you need a notion of "locking" a sharding key value while its rows are being migrated.
49
+
50
+
51
+ == How does \Jetpants perform slave-cloning?
52
+
53
+ \Jetpants clones slaves by stopping replication, shutting down the MySQL daemon, and then copying the raw files to the destination(s). This is the fastest way to get a consistent clone of a data set in MySQL. After the copy operation is complete, we start MySQL back up on the source and destinations, and then make the destination instances start slaving at the appropriate binlog coordinates.
54
+
55
+ We perform the copy operation using a combination of tar (for archiving), pigz (for fast compression), and nc (for transferring the data over the network). If there are multiple destinations, we create a serial "copy chain" using tee and a fifo.
56
+
57
+ Please note that we don't encrypt the data in this process, so we assume you are using it on a private LAN or over a VPN tunnel.
58
+
59
+ Because this process shuts down MySQL, you can only use it on a standby slave. Never use it on a machine that is actively taking queries from your application. If you need to do that, use a hot-copy solution instead.
60
+
61
+
62
+ == What are standby slaves? Why run two of them per pool?
63
+
64
+ Standby slaves are standard MySQL replicas that your application doesn't send queries to. We recommend maintaining exactly 2 standby slaves in every single pool/shard for high availability reasons:
65
+
66
+ * If a pool's master fails, you promote one standby slave to be the new master, and use the second standby slave to clone a replacement for the first standby slave.
67
+
68
+ * If an active slave fails, promote one standby slave to be a new active slave in its place, and use the second standby slave to clone a replacement for the first.
69
+
70
+ * If a standby slave fails, use the other standby slave to clone a replacement.
71
+
72
+ In other words: as long as you have two standbys, you can recover from a single failure quickly, without needing to do a hot-copy (which is much slower). Faster recovery time = less time in a degraded state = lower chance that a second failure will occur while the pool is already degraded.
73
+
74
+ Resist the temptation to send any queries from your application to your standby slaves. If your application's read requirements are high enough to require additional nodes, create more active slaves as needed, but don't repurpose the standbys without replacing them. Otherwise, if a machine fails, you'd no longer have enough capacity to serve normal traffic load or no longer have a way to quickly spin up replacement nodes.
75
+
76
+ You can, however, use your standby slaves for creating backups, running ad-hoc batch/analytic queries, etc. You can also make _one_ of your standby slaves be a weaker class of hardware if desired, and just take care to only use that node for cloning slaves, never for directly promoting. \Jetpants supports this, and considers this type of slave to be a "backup slave".
77
+
78
+
79
+ == When should I split a shard?
80
+
81
+ Typically when some individual component on the shard's master is getting close to being full/saturated:
82
+
83
+ * Disk is getting full, in terms of capacity -- 80%+ impacts performance for SSDs and eventually for most filesystems as well
84
+ * Disk utilization (ie, what <tt>iostat</tt> shows you) is reaching 90%+
85
+ * Network utilization is approaching your link's saturation point
86
+
87
+ Depending on your type of disk and amount of RAM, you may find that the first two may happen at roughly the same time. An increasingly large data set usually means your working set will exceed your amount of memory, so InnoDB's cache hit rate starts to drop, and your disk utilization starts creeping upwards.
88
+
89
+
90
+ == Why does so much of the command suite functionality require an asset tracker plugin?
91
+
92
+ For any given operation that requires an asset tracker, there's one of two reasons:
93
+
94
+ * The operation involves juggling a lot of servers. For example, a shard split needs to be able to obtain a minimum of 6 spare MySQL instances, and eventually turns the original shard's 3 MySQL instances into spares. Doing this kind of operation without an automated asset tracker can easily lead to major human error.
95
+
96
+ * The operation inherently involves generating a new configuration for your application -- for example, setting a shard to read-only or promoting a standby slave to an active slave. These operations are meaningless outside of your application, since MySQL has no notion of "standby slave" or "degraded shard". \Jetpants has a notion of these things, but needs to persist the information somewhere, and it makes more sense to have \Jetpants relay this information to an external hardware management tool rather than maintain a separate (and potentially conflicting) source of truth.
97
+
98
+ If you have enough servers to be using a sharded architecture, you hopefully already have some sort of hardware management / asset tracker system in place. \Jetpants is designed to be integrated with this system, but since every site runs something different, this requires that you write some custom plugin code to achieve.
99
+
100
+
101
+ == Can I use \Jetpants with PostgreSQL?
102
+
103
+ The core functionality is currently very MySQL-specific. In theory a plugin could override a bunch of methods to target Postgres, and maybe even Redis or other persistent data stores with replication and import/export functionality. This would be a substantial effort though.
104
+
105
+ At present, several methods have "mysql" in the name. These may change to more generic names in an upcoming release; in this case the old names will still be available as aliases to the new ones.
106
+
107
+
108
+ == In the shard split process, why create the standby slaves AFTER doing the export / re-import?
109
+
110
+ We do this to avoid replicating the LOAD DATA INFILE statements. Because MySQL replication is single-threaded, these statements won't execute in parallel on slaves, so the import process would be substantially slower. Instead, we create the new shard masters, do the export/import dance on those instances, and THEN clone their final data set to 2 new standby slaves each.
111
+
112
+ This also allows us to disable binary logging during the import process, which is a very noticeable speed enhancement.
113
+
114
+
115
+ == In the cleanup stage of a shard split, why not just remove unwanted data with a single DELETE statement?
116
+
117
+ Because MySQL replication is single-threaded, it's a bad idea to execute single write queries that impact thousands of rows, since these will cause slaves to lag. Giant transactions are also not ideal in general due to how MVCC and rollbacks work in InnoDB.
@@ -6,10 +6,11 @@ Plugins may freely override these assumptions, and upstream patches are very wel
6
6
 
7
7
  == Environment
8
8
 
9
- * Using MySQL (or Percona Server), specifically version 5.1 or higher.
10
- * Using a RHEL/CentOS distribution of Linux.
9
+ * Ruby 1.9.2 or higher
10
+ * MySQL (or Percona Server), specifically version 5.1 or higher.
11
+ * a RHEL/CentOS distribution of Linux.
11
12
  * It should be easy to write a plugin supporting another distribution. The main change might be overriding Jetpants::Host#service, if your distribution doesn't have <tt>/sbin/service</tt>.
12
- * Using InnoDB / Percona XtraDB for storage engine. \Jetpants has not been tested with MyISAM, since \Jetpants is geared towards huge tables, and MyISAM is generally a bad fit.
13
+ * InnoDB / Percona XtraDB for storage engine. \Jetpants has not been tested with MyISAM, since \Jetpants is geared towards huge tables, and MyISAM is generally a bad fit.
13
14
  * All MySQL instances run on port 3306, with only one instance per logical machine.
14
15
  * A plugin could override this easily, but would require you to use the --report-host option on all slaves, so that crawling the replication topology is possible. It would also have to override various methods that specify the MySQL init script location, config file location, data directory, etc.
15
16
  * Since there's no "standard" layout for multi-instance MySQL, this won't ever be part of the \Jetpants core, but we may include one implementation as a bundled plugin in a future release.
@@ -17,21 +17,21 @@ module Jetpants
17
17
  # Establish default configuration values, and then merge in whatever we find globally
18
18
  # in /etc/jetpants.yaml and per-user in ~/.jetpants.yaml
19
19
  @config = {
20
- 'max_concurrency' => 40, # max threads/conns per database
21
- 'standby_slaves_per_pool' => 2, # number of standby slaves in every pool
22
- 'mysql_schema' => 'test', # database name
23
- 'mysql_app_user' => false, # mysql user for application
24
- 'mysql_app_password' => false, # mysql password for application
25
- 'mysql_repl_user' => false, # mysql user for replication
26
- 'mysql_repl_password' => false, # mysql password for replication
27
- 'mysql_root_password' => false, # mysql root password. omit if specified in /root/.my.cnf instead.
20
+ 'max_concurrency' => 40, # max threads/conns per database
21
+ 'standby_slaves_per_pool' => 2, # number of standby slaves in every pool
22
+ 'mysql_schema' => 'test', # database name
23
+ 'mysql_app_user' => 'appuser', # mysql user for application
24
+ 'mysql_app_password' => '', # mysql password for application
25
+ 'mysql_repl_user' => 'repluser', # mysql user for replication
26
+ 'mysql_repl_password' => '', # mysql password for replication
27
+ 'mysql_root_password' => false, # mysql root password. omit if specified in /root/.my.cnf instead.
28
28
  'mysql_grant_ips' => ['192.168.%'], # mysql user manipulations are applied to these IPs
29
- 'mysql_grant_privs' => ['ALL'], # mysql user manipulations grant this set of privileges by default
30
- 'export_location' => '/tmp', # directory to use for data dumping
31
- 'verify_replication' => true, # raise exception if the 2 repl threads are in different states, or if actual repl topology differs from Jetpants' understanding of it
32
- 'plugins' => {}, # hash of plugin name => arbitrary plugin data (usually a nested hash of settings)
33
- 'ssh_keys' => nil, # array of SSH key file locations
34
- 'sharded_tables' => [], # array of name => {sharding_key=>X, chunks=>Y} hashes
29
+ 'mysql_grant_privs' => ['ALL'], # mysql user manipulations grant this set of privileges by default
30
+ 'export_location' => '/tmp', # directory to use for data dumping
31
+ 'verify_replication' => true, # raise exception if the 2 repl threads are in different states, or if actual repl topology differs from Jetpants' understanding of it
32
+ 'plugins' => {}, # hash of plugin name => arbitrary plugin data (usually a nested hash of settings)
33
+ 'ssh_keys' => nil, # array of SSH key file locations
34
+ 'sharded_tables' => [], # array of name => {sharding_key=>X, chunks=>Y} hashes
35
35
  }
36
36
  %w(/etc/jetpants.yaml ~/.jetpants.yml ~/.jetpants.yaml).each do |path|
37
37
  overrides = YAML.load_file(File.expand_path path) rescue {}
@@ -68,19 +68,21 @@ module Jetpants
68
68
  # been split.
69
69
  def revoke_all_access!
70
70
  user_name = Jetpants.app_credentials[:user]
71
- output("Revoking access for user #{user_name} and setting global read-only.")
72
- read_only!
71
+ enable_read_only!
72
+ output "Revoking access for user #{user_name}."
73
73
  output(drop_user(user_name, true)) # drop the user without replicating the drop statement to slaves
74
74
  end
75
75
 
76
76
  # Enables global read-only mode on the database.
77
- def read_only!
77
+ def enable_read_only!
78
+ output "Enabling global read_only mode"
78
79
  mysql_root_cmd 'SET GLOBAL read_only = 1' unless read_only?
79
80
  read_only?
80
81
  end
81
82
 
82
83
  # Disables global read-only mode on the database.
83
84
  def disable_read_only!
85
+ output "Disabling global read_only mode"
84
86
  mysql_root_cmd 'SET GLOBAL read_only = 0' if read_only?
85
87
  not read_only?
86
88
  end
@@ -37,7 +37,7 @@ module Jetpants
37
37
  "MASTER_USER='#{repl_user}', " +
38
38
  "MASTER_PASSWORD='#{repl_pass}'"
39
39
 
40
- output "Changing master to #{new_master} with coordinates (#{logfile}, #{pos}): #{result}"
40
+ output "Changing master to #{new_master} with coordinates (#{logfile}, #{pos}). #{result}"
41
41
  @master.slaves.delete(self) if @master rescue nil
42
42
  @master = new_master
43
43
  @repl_paused = true
@@ -67,7 +67,7 @@ module Jetpants
67
67
  def disable_replication!
68
68
  raise "This DB object has no master" unless master
69
69
  output "Disabling replication; this db is no longer a slave."
70
- output mysql_root_cmd "STOP SLAVE; RESET SLAVE"
70
+ output mysql_root_cmd "STOP SLAVE; CHANGE MASTER TO master_host=''; RESET SLAVE"
71
71
  @master.slaves.delete(self) rescue nil
72
72
  @master = nil
73
73
  @repl_paused = nil
@@ -84,6 +84,7 @@ module Jetpants
84
84
  def enslave!(targets, repl_user=false, repl_pass=false)
85
85
  repl_user ||= (Jetpants.replication_credentials[:user] || replication_credentials[:user])
86
86
  repl_pass ||= (Jetpants.replication_credentials[:pass] || replication_credentials[:pass])
87
+ disable_monitoring
87
88
  pause_replication if master && ! @repl_paused
88
89
  file, pos = binlog_coordinates
89
90
  clone_to!(targets)
@@ -95,6 +96,7 @@ module Jetpants
95
96
  password: repl_pass )
96
97
  end
97
98
  resume_replication if @master # should already have happened from the clone_to! restart anyway, but just to be explicit
99
+ enable_monitoring
98
100
  end
99
101
 
100
102
  # Wipes out the target instances and turns them into slaves of self's master.
@@ -138,10 +140,10 @@ module Jetpants
138
140
  # database. Only useful when called on a master. This is the current
139
141
  # instance's own binlog coordinates, NOT the coordinates of replication
140
142
  # progress on a slave!
141
- def binlog_coordinates
143
+ def binlog_coordinates(display_info=true)
142
144
  hash = mysql_root_cmd('SHOW MASTER STATUS', :parse=>true)
143
145
  raise "Cannot obtain binlog coordinates of this master becaues binary logging is not enabled" unless hash[:file]
144
- output "Own binlog coordinates are (#{hash[:file]}, #{hash[:position].to_i})."
146
+ output "Own binlog coordinates are (#{hash[:file]}, #{hash[:position].to_i})." if display_info
145
147
  [hash[:file], hash[:position].to_i]
146
148
  end
147
149
 
@@ -149,7 +151,8 @@ module Jetpants
149
151
  # as reported by SHOW SLAVE STATUS.
150
152
  def seconds_behind_master
151
153
  raise "This instance is not a slave" unless master
152
- slave_status[:seconds_behind_master].to_i
154
+ lag = slave_status[:seconds_behind_master]
155
+ lag == 'NULL' ? nil : lag.to_i
153
156
  end
154
157
 
155
158
  # Waits for this instance's SECONDS_BEHIND_MASTER to reach 0 and stay at
@@ -175,6 +178,10 @@ module Jetpants
175
178
  return true
176
179
  end
177
180
  sleep poll_frequency
181
+ elsif lag.nil?
182
+ resume_replication
183
+ sleep 1
184
+ raise "Unable to restart replication" if seconds_behind_master.nil?
178
185
  else
179
186
  output "Currently #{lag} seconds behind master."
180
187
  times_at_zero = 0
@@ -61,6 +61,7 @@ module Jetpants
61
61
  probe_running
62
62
  probe_master
63
63
  probe_slaves
64
+ self
64
65
  end
65
66
 
66
67
  # Alias for probe(true)
@@ -144,10 +145,34 @@ module Jetpants
144
145
  end
145
146
 
146
147
  # Returns the Jetpants::Pool that this instance belongs to, if any.
147
- def pool
148
- Jetpants.topology.pool(self) || Jetpants.topology.pool(master)
148
+ # Can optionally create an anonymous pool if no pool was found. This anonymous
149
+ # pool intentionally has a blank sync_configuration implementation.
150
+ def pool(create_if_missing=false)
151
+ result = Jetpants.topology.pool(self) || Jetpants.topology.pool(master)
152
+ if !result && create_if_missing
153
+ pool_master = master || self
154
+ result = Pool.new('anon_pool_' + pool_master.ip.tr('.', ''), pool_master)
155
+ def result.sync_configuration; end
156
+ end
157
+ return result
149
158
  end
150
159
 
160
+ # Determines the DB's role in its pool. Returns either :master,
161
+ # :active_slave, :standby_slave, or :backup_slave.
162
+ #
163
+ # Note that we consider a node with no master and no slaves to be
164
+ # a :master, since we can't determine if it had slaves but they're
165
+ # just offline/dead, vs it being an orphaned machine.
166
+ def role
167
+ p = pool
168
+ case
169
+ when !@master then :master
170
+ when for_backups? then :backup_slave
171
+ when p && p.active_slave_weights[self] then :active_slave # if pool in topology, determine based on expected/ideal state
172
+ when !p && !is_standby? then :active_slave # if pool missing from topology, determine based on actual state
173
+ else :standby_slave
174
+ end
175
+ end
151
176
 
152
177
  ###### Private methods #####################################################
153
178
 
@@ -178,8 +203,9 @@ module Jetpants
178
203
  raise "#{self}: #{message}" if Jetpants.verify_replication
179
204
  output message
180
205
  pause_replication
206
+ else
207
+ @repl_paused = (status[:slave_io_running].downcase == 'no')
181
208
  end
182
- @repl_paused = (status[:slave_io_running].downcase == 'no')
183
209
  end
184
210
  end
185
211
 
@@ -202,7 +228,7 @@ module Jetpants
202
228
  processes.grep(/Binlog Dump/).concurrent_each do |p|
203
229
  tokens = p.split
204
230
  ip, dummy = tokens[2].split ':'
205
- db = self.class.new(ip)
231
+ db = ip.to_db
206
232
  db.probe
207
233
  slaves_mutex.synchronize {@slaves << db if db.master == self}
208
234
  end
@@ -25,6 +25,8 @@ module Jetpants
25
25
  end
26
26
 
27
27
  def initialize(ip)
28
+ # Only supporting ipv4 for now
29
+ raise "Invalid IP address: #{ip}" unless ip =~ /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
28
30
  @ip = ip
29
31
  @connection_pool = [] # array of idle Net::SSH::Connection::Session objects
30
32
  @lock = Mutex.new
@@ -94,7 +94,7 @@ module Jetpants
94
94
  # returns a flat array of all Jetpants::DB objects in the pool: the master and
95
95
  # all slaves of all types.
96
96
  def nodes
97
- [master, slaves].flatten
97
+ [master, slaves].flatten.compact
98
98
  end
99
99
 
100
100
  # Informs Jetpants that slave_db is an active slave. Potentially used by
@@ -152,52 +152,96 @@ module Jetpants
152
152
  # of returning a string, so that you can invoke something like:
153
153
  # Jetpants.topology.pools.each &:summary
154
154
  # to easily display a summary.
155
- def summary
155
+ def summary(extended_info=false)
156
156
  probe
157
- if @aliases.count > 0
158
- alias_text = ' (aliases: ' + @aliases.join(', ') + ')'
157
+
158
+ alias_text = @aliases.count > 0 ? ' (aliases: ' + @aliases.join(', ') + ')' : ''
159
+ data_size = @master.running? ? "[#{master.data_set_size(true)}GB]" : ''
160
+ print "#{name}#{alias_text} #{data_size}\n"
161
+
162
+ if extended_info
163
+ details = {}
164
+ nodes.concurrent_each do |s|
165
+ if !s.running?
166
+ details[s] = {coordinates: ['unknown'], lag: 'N/A'}
167
+ elsif s == @master
168
+ details[s] = {coordinates: s.binlog_coordinates(false), lag: 'N/A'}
169
+ else
170
+ details[s] = {coordinates: s.repl_binlog_coordinates(false), lag: s.seconds_behind_master.to_s + 's'}
171
+ end
172
+ end
159
173
  end
160
- print "#{name}#{alias_text} [#{master.data_set_size(true)}GB]\n"
161
- print "\tmaster = %-13s #{master.hostname}\n" % @master.ip
174
+
175
+ binlog_pos = extended_info ? details[@master][:coordinates].join(':') : ''
176
+ print "\tmaster = %-13s %-30s %s\n" % [@master.ip, @master.hostname, binlog_pos]
177
+
162
178
  [:active, :standby, :backup].each do |type|
163
179
  slave_list = slaves(type)
164
- slave_list.each_with_index do |s, i|
165
- print "\t%-7s slave #{i + 1} = %-13s #{s.hostname}\n" % [type, s.ip]
180
+ slave_list.sort.each_with_index do |s, i|
181
+ binlog_pos = extended_info ? details[s][:coordinates].join(':') : ''
182
+ slave_lag = extended_info ? "lag=#{details[s][:lag]}" : ''
183
+ print "\t%-7s slave #{i + 1} = %-13s %-30s %-26s %s\n" % [type, s.ip, s.hostname, binlog_pos, slave_lag]
166
184
  end
167
185
  end
168
186
  true
169
187
  end
170
188
 
171
- # Performs the last steps of the master promotion process. Do not use this
172
- # as a stand-alone method; there's other necessary logic, such as setting
173
- # the old master to read-only mode, and doing a STOP SLAVE on all slaves.
174
- # Use the "jetpants promotion" task instead to do an interactive promotion.
175
- # (In a future release, this will be refactored to be fully scriptable.)
189
+ # Demotes the pool's existing master, promoting a slave in its place.
176
190
  def master_promotion!(promoted)
177
191
  demoted = @master
178
- raise "Promoted host is not in the right pool!" unless @master.slaves.include? promoted
179
- user, password = promoted.replication_credentials.values
180
- log, position = promoted.binlog_coordinates
181
-
182
- # reset slave on promoted
183
- if demoted.available?
184
- promoted.disable_replication!
192
+ raise "Demoted node is already the master of this pool!" if demoted == promoted
193
+ raise "Promoted host is not in the right pool!" unless demoted.slaves.include?(promoted)
194
+
195
+ output "Preparing to demote master #{demoted} and promote #{promoted} in its place."
196
+
197
+ # If demoted machine is available, confirm it is read-only and binlog isn't moving,
198
+ # and then wait for slaves to catch up to this position
199
+ if demoted.running?
200
+ demoted.enable_read_only! unless demoted.read_only?
201
+ raise "Unable to enable global read-only mode on demoted machine" unless demoted.read_only?
202
+ coordinates = demoted.binlog_coordinates
203
+ raise "Demoted machine still taking writes (from superuser or replication?) despite being read-only" unless coordinates == demoted.binlog_coordinates
204
+ demoted.slaves.concurrent_each do |s|
205
+ while true do
206
+ sleep 1
207
+ break if s.repl_binlog_coordinates == coordinates
208
+ output "Still catching up to coordinates of demoted master"
209
+ end
210
+ end
211
+
212
+ # Demoted machine not available -- wait for slaves' binlogs to stop moving
185
213
  else
186
- promoted.mysql_root_cmd "STOP SLAVE; RESET SLAVE"
214
+ demoted.slaves.concurrent_each do |s|
215
+ progress = s.repl_binlog_coordinates
216
+ while true do
217
+ sleep 1
218
+ break if s.repl_binlog_coordinates == progress
219
+ s.output "Still catching up on replication"
220
+ end
221
+ end
222
+ end
223
+
224
+ # Stop replication on all slaves
225
+ replicas = demoted.slaves.dup
226
+ replicas.each do |s|
227
+ s.pause_replication if s.replicating?
187
228
  end
229
+ raise "Unable to stop replication on all slaves" if replicas.any? {|s| s.replicating?}
230
+
231
+ user, password = promoted.replication_credentials.values
232
+ log, position = promoted.binlog_coordinates
233
+
234
+ # reset slave on promoted, and make sure read_only is disabled
235
+ promoted.disable_replication!
236
+ promoted.disable_read_only!
188
237
 
189
238
  # gather our new replicas
190
- replicas = demoted.slaves.select {|replica| replica != promoted}
191
- replicas << demoted if demoted.available?
192
- replicas.flatten!
193
-
239
+ replicas.delete promoted
240
+ replicas << demoted if demoted.running?
241
+
194
242
  # perform promotion
195
- replicas.each do |replica|
196
- replica.change_master_to promoted,
197
- :user => user,
198
- :password => password,
199
- :log_file => log,
200
- :log_pos => position
243
+ replicas.each do |r|
244
+ r.change_master_to promoted, user: user, password: password, log_file: log, log_pos: position
201
245
  end
202
246
 
203
247
  # ensure our replicas are configured correctly by comparing our staged values to current values of replicas
@@ -215,13 +259,15 @@ module Jetpants
215
259
  end
216
260
 
217
261
  # Update the pool
218
- # Note: if the demoted machine is offline, plugin may need to implement an
262
+ # Note: if the demoted machine is not available, plugin may need to implement an
219
263
  # after_master_promotion! method which handles this case in configuration tracker
220
264
  @active_slave_weights.delete promoted # if promoting an active slave, remove it from read pool
221
265
  @master = promoted
222
266
  sync_configuration
223
267
  Jetpants.topology.write_config
224
268
 
269
+ output "Promotion complete. Pool master is now #{promoted}."
270
+
225
271
  replicas.all? {|r| r.replicating?}
226
272
  end
227
273
 
@@ -169,7 +169,7 @@ module Jetpants
169
169
 
170
170
  init_children(pieces) unless @children.count > 0
171
171
 
172
- @children.concurrent_each {|c| c.stop_query_killer; c.disable_binary_logging}
172
+ @children.concurrent_each {|c| c.disable_binary_logging}
173
173
  clone_to_children!
174
174
  @children.concurrent_each {|c| c.rebuild!}
175
175
  @children.each {|c| c.sync_configuration}
@@ -225,25 +225,22 @@ module Jetpants
225
225
 
226
226
  # Exports data that should stay on this shard, drops and re-creates tables,
227
227
  # re-imports the data, and then adds slaves to the shard pool as needed.
228
- # The optional stage param lets you skip some steps, but this is only really
229
- # useful if you're running this manually and it failed part-way.
230
- def rebuild!(stage=0)
228
+ def rebuild!
231
229
  # Sanity check
232
230
  raise "Cannot rebuild a shard that isn't still slaving from another shard" unless @master.is_slave?
233
231
  raise "Cannot rebuild an active shard" if in_config?
234
232
 
233
+ stop_query_killer
235
234
  tables = Table.from_config 'sharded_tables'
236
235
 
237
- if stage <= 1
238
- raise "Shard is not in the expected initializing or exporting states" unless [:initializing, :exporting].include? @state
236
+ if [:initializing, :exporting].include? @state
239
237
  @state = :exporting
240
238
  sync_configuration
241
239
  export_schemata tables
242
240
  export_data tables, @min_id, @max_id
243
241
  end
244
242
 
245
- if stage <= 2
246
- raise "Shard is not in the expected exporting or importing states" unless [:exporting, :importing].include? @state
243
+ if [:exporting, :importing].include? @state
247
244
  @state = :importing
248
245
  sync_configuration
249
246
  import_schemata!
@@ -252,8 +249,7 @@ module Jetpants
252
249
  start_query_killer
253
250
  end
254
251
 
255
- if stage <= 3
256
- raise "Shard is not in the expected importing or replicating states" unless [:importing, :replicating].include? @state
252
+ if [:importing, :replicating].include? @state
257
253
  enable_binary_logging
258
254
  restart_mysql
259
255
  @state = :replicating
@@ -262,6 +258,8 @@ module Jetpants
262
258
  enslave!(my_slaves)
263
259
  my_slaves.each {|slv| slv.resume_replication}
264
260
  [self, my_slaves].flatten.each {|db| db.catch_up_to_master}
261
+ else
262
+ raise "Shard not in a state compatible with calling rebuild! (current state=#{@state})"
265
263
  end
266
264
 
267
265
  @state = :child
@@ -298,8 +296,8 @@ module Jetpants
298
296
  end
299
297
 
300
298
  # Displays information about the shard
301
- def summary(with_children=true)
302
- super()
299
+ def summary(extended_info=false, with_children=true)
300
+ super(extended_info)
303
301
  if with_children
304
302
  children.each {|c| c.summary}
305
303
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: jetpants
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.7.0
5
+ version: 0.7.2
6
6
  platform: ruby
7
7
  authors:
8
8
  - Evan Elias
@@ -11,7 +11,7 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2012-06-07 00:00:00 Z
14
+ date: 2012-06-18 00:00:00 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: mysql2
@@ -47,7 +47,7 @@ dependencies:
47
47
  type: :runtime
48
48
  version_requirements: *id003
49
49
  - !ruby/object:Gem::Dependency
50
- name: state_machine
50
+ name: pry
51
51
  prerelease: false
52
52
  requirement: &id004 !ruby/object:Gem::Requirement
53
53
  none: false
@@ -58,7 +58,7 @@ dependencies:
58
58
  type: :runtime
59
59
  version_requirements: *id004
60
60
  - !ruby/object:Gem::Dependency
61
- name: pry
61
+ name: thor
62
62
  prerelease: false
63
63
  requirement: &id005 !ruby/object:Gem::Requirement
64
64
  none: false
@@ -69,7 +69,7 @@ dependencies:
69
69
  type: :runtime
70
70
  version_requirements: *id005
71
71
  - !ruby/object:Gem::Dependency
72
- name: thor
72
+ name: highline
73
73
  prerelease: false
74
74
  requirement: &id006 !ruby/object:Gem::Requirement
75
75
  none: false
@@ -80,7 +80,7 @@ dependencies:
80
80
  type: :runtime
81
81
  version_requirements: *id006
82
82
  - !ruby/object:Gem::Dependency
83
- name: highline
83
+ name: terminal-table
84
84
  prerelease: false
85
85
  requirement: &id007 !ruby/object:Gem::Requirement
86
86
  none: false
@@ -91,7 +91,7 @@ dependencies:
91
91
  type: :runtime
92
92
  version_requirements: *id007
93
93
  - !ruby/object:Gem::Dependency
94
- name: terminal-table
94
+ name: colored
95
95
  prerelease: false
96
96
  requirement: &id008 !ruby/object:Gem::Requirement
97
97
  none: false
@@ -101,17 +101,6 @@ dependencies:
101
101
  version: "0"
102
102
  type: :runtime
103
103
  version_requirements: *id008
104
- - !ruby/object:Gem::Dependency
105
- name: colored
106
- prerelease: false
107
- requirement: &id009 !ruby/object:Gem::Requirement
108
- none: false
109
- requirements:
110
- - - ">="
111
- - !ruby/object:Gem::Version
112
- version: "0"
113
- type: :runtime
114
- version_requirements: *id009
115
104
  description: Jetpants is an automation toolkit for handling monstrously large MySQL database topologies. It is geared towards common operational tasks like cloning slaves, rebalancing shards, and performing master promotions. It features a command suite for easy use by operations staff, though it's also a full Ruby library for use in developing custom migration scripts and database automation.
116
105
  email:
117
106
  - me@evanelias.com
@@ -122,39 +111,40 @@ extensions: []
122
111
 
123
112
  extra_rdoc_files:
124
113
  - README.rdoc
125
- - doc/plugins.rdoc
126
114
  - doc/configuration.rdoc
127
- - doc/commands.rdoc
115
+ - doc/faq.rdoc
128
116
  - doc/requirements.rdoc
117
+ - doc/commands.rdoc
118
+ - doc/plugins.rdoc
129
119
  files:
130
120
  - Gemfile
131
121
  - README.rdoc
132
- - doc/plugins.rdoc
133
122
  - doc/configuration.rdoc
134
- - doc/commands.rdoc
123
+ - doc/faq.rdoc
135
124
  - doc/requirements.rdoc
136
- - lib/jetpants/callback.rb
137
- - lib/jetpants/topology.rb
138
- - lib/jetpants/db/server.rb
139
- - lib/jetpants/db/state.rb
125
+ - doc/commands.rdoc
126
+ - doc/plugins.rdoc
127
+ - lib/jetpants/monkeypatch.rb
140
128
  - lib/jetpants/db/import_export.rb
141
129
  - lib/jetpants/db/privileges.rb
142
130
  - lib/jetpants/db/client.rb
143
131
  - lib/jetpants/db/replication.rb
144
- - lib/jetpants/shard.rb
132
+ - lib/jetpants/db/server.rb
133
+ - lib/jetpants/db/state.rb
145
134
  - lib/jetpants/db.rb
146
- - lib/jetpants/host.rb
135
+ - lib/jetpants/shard.rb
147
136
  - lib/jetpants/pool.rb
148
- - lib/jetpants/monkeypatch.rb
149
137
  - lib/jetpants/table.rb
138
+ - lib/jetpants/topology.rb
139
+ - lib/jetpants/callback.rb
140
+ - lib/jetpants/host.rb
150
141
  - lib/jetpants.rb
151
142
  - bin/jetpants
152
- - plugins/simple_tracker/topology.rb
153
- - plugins/simple_tracker/shard.rb
154
- - plugins/simple_tracker/simple_tracker.rb
155
143
  - plugins/simple_tracker/db.rb
144
+ - plugins/simple_tracker/shard.rb
156
145
  - plugins/simple_tracker/pool.rb
157
- - tasks/promotion.rb
146
+ - plugins/simple_tracker/simple_tracker.rb
147
+ - plugins/simple_tracker/topology.rb
158
148
  - etc/jetpants.yaml.sample
159
149
  homepage: https://github.com/tumblr/jetpants/
160
150
  licenses: []
@@ -1,260 +0,0 @@
1
- module Jetpants
2
- module Tasks
3
- class Promotion
4
-
5
- def initialize nodes = {}
6
- @demoted = nodes['demote']
7
- @promoted = nodes['promote']
8
- super
9
- Jetpants.verify_replication = false # since master may be offline
10
- advise
11
- establish_roles
12
- prepare
13
- end
14
-
15
- def error message
16
- abort ['ERROR:'.red, message].join ' '
17
- end
18
-
19
- def inform message
20
- puts message.blue
21
- end
22
-
23
- def is_ip? address
24
- address =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/
25
- end
26
-
27
- def establish_roles
28
- establish_demoted
29
- establish_replicas
30
- establish_promoted
31
- end
32
-
33
- def establish_demoted
34
- # derive demoted from promoted if possible
35
- if @promoted and not @demoted
36
- error "invalid ip address #{@promoted}" unless is_ip? @promoted
37
- @promoted = Jetpants::DB.new @promoted
38
-
39
- # bail the promoted node isn't a slave or we can't connect
40
- unless @promoted.is_slave?
41
- error "node (#{@promoted}) does not appear to be a replica of another node"
42
- end rescue error("unable to connect to node #{@promoted} to promote")
43
-
44
- # recommend a node to demote
45
- agreed = agree [
46
- "Would you like to demote the following node?",
47
- "address: #{@promoted.master}",
48
- "slaves : #{@promoted.master.slaves.join(', ')}",
49
- "- yes/no -"
50
- ].join "\n"
51
- error "unable to promote #{@promoted} unless you demote #{@promoted.master}" unless agreed
52
-
53
- @demoted = @promoted.master.ip
54
- end
55
-
56
- # unable to derive demoted, so ask and convert to a DB object
57
- unless @demoted.kind_of? Jetpants::DB
58
- @demoted = ask 'Please enter the node to demote:' unless @demoted
59
- error "Invalid IP address #{@demoted}" unless is_ip? @demoted
60
- @demoted = @demoted.to_db
61
- end
62
-
63
- # connect and ensure node is a master; handle offline nodes appropriately
64
- if @demoted.available?
65
- error 'Cannot demote a node that has no slaves!' unless @demoted.has_slaves?
66
- else
67
- inform "unable to connect to node #{@demoted} to demote"
68
- error "unable to perform promotion" unless agree "please confirm that #{@demoted} is offline: yes/no "
69
- @replicas = @demoted.slaves # An asset-tracker plugin may have been populated the slave list anyway
70
- if !@replicas || @replicas.count < 1
71
- replicas = ask "please provide a comma seperated list of current replicas of #{@demoted}: ", lambda {|replicas| replicas.split /,\s*/}
72
- error "user supplied list of replicas appears to be invalid - #{replicas}" unless replicas.all? {|replica| is_ip? replica}
73
- @replicas = replicas.collect {|replica| replica.to_db}
74
-
75
- # ensure they were replicas of @demoted
76
- @replicas.each do |replica|
77
- error "#{replica} does not appear to be a valid replica of #{@demoted}" unless replica.master == @demoted
78
- end
79
- end
80
- end
81
-
82
- error 'unable to establish demoteable node' unless @demoted.kind_of? Jetpants::DB
83
- end
84
-
85
- def establish_replicas
86
- @replicas ||= @demoted.slaves
87
- error 'no replicas to promote' if @replicas.empty?
88
- error 'replicas appear to be invalid' unless @replicas.all? {|replica| replica.kind_of? Jetpants::DB}
89
- inform "#{@demoted} has the following replicas: #{@replicas.join(', ')}"
90
- end
91
-
92
- def establish_promoted
93
- # user supplied node to promote
94
- if @promoted and not @promoted.kind_of? Jetpants::DB
95
- error "invalid ip address #{@promoted}" unless is_ip? @promoted
96
- @promoted = Jetpants::DB.new @promoted
97
- end
98
-
99
- # user hasn't supplied a valid node to promote
100
- unless @replicas.include? @promoted
101
- inform "unable to promote node (#{@promoted}) that is not a replica of #{@demoted}" if @promoted
102
-
103
- # recommend a node
104
- puts "\nREPLICA LIST:"
105
- @replicas.sort_by {|replica| replica.seconds_behind_master}.each do |node|
106
- file, pos = node.repl_binlog_coordinates(false)
107
- puts " * %-13s %-30s lag: %2ds coordinates: (%-13s, %d)" % [node.ip, node.hostname, node.seconds_behind_master, file, pos]
108
- end
109
- puts
110
- recommended = @replicas.sort_by {|replica| replica.seconds_behind_master}.reject {|r| r.for_backups?}.first
111
- agreed = agree [
112
- "Would you like to promote the following replica?",
113
- "#{recommended.ip} (#{recommended.hostname})",
114
- "- yes/no -"
115
- ].join "\n"
116
- @promoted = recommended if agreed
117
-
118
- # choose a new node if they disagreed with our recommendation
119
- unless agreed
120
- choose do |promote|
121
- promote.prompt = 'Please choose a replica to promote:'
122
- @replicas.each do |replica|
123
- promote.choice "#{replica} - replication lag: #{replica.seconds_behind_master} seconds" do
124
- @promoted = replica
125
- end
126
- end
127
- end
128
- raise "You chose a backup slave. These are not suitable for promotion. Please try again." if @promoted.for_backups?
129
- end
130
- end
131
-
132
- error "unable to establish node to promote" unless @promoted.kind_of? Jetpants::DB
133
- end
134
-
135
- def advise
136
- @states = {
137
- preparing: "processing promotion requirements",
138
- prepared: "preparing to disable writes on #{@demoted}",
139
- read_only: "writes have been disabled on #{@demoted}, preparing to demote #{@demoted} and promote #{@promoted}",
140
- promoted: "#{@promoted} has been promoted, please prepare database config for deploy.",
141
- deployable: "promotion is complete, please commit and deploy.",
142
- }
143
- inform @states[@state.to_sym]
144
- end
145
-
146
- state_machine :initial => :preparing do
147
- after_transition any => any, :do => :advise
148
-
149
- event :prepare do
150
- transition :preparing => :prepared, :if => :roles_populated?
151
- end
152
- after_transition :preparing => :prepared, :do => :disable_writes
153
-
154
- event :disable_writes do
155
- transition :prepared => :read_only, :if => :read_only!
156
- end
157
- after_transition :prepared => :read_only, :do => :promote
158
-
159
- event :promote do
160
- transition :read_only => :promoted, :if => :execute_promotion
161
- end
162
- after_transition :read_only => :promoted, :do => :prepare_config
163
-
164
- event :prepare_config do
165
- transition :promoted => :deployable, :if => :nodes_consistent?
166
- end
167
- after_transition :promoted => :deployable, :do => :summarize_promotion
168
-
169
- state :preparing, :prepared do
170
- def is_db? node
171
- node.kind_of? Jetpants::DB
172
- end
173
-
174
- def roles_populated?
175
- # ensure our roles are populated with dbs
176
- [@demoted, @promoted, @replicas].all? do |role|
177
- is_db? role or role.all? do |node|
178
- is_db? node
179
- end
180
- end
181
- end
182
-
183
- def read_only!
184
- unless @demoted.available?
185
- status = @promoted.slave_status
186
- @log, @position = status[:master_log_file], status[:exec_master_log_pos].to_i
187
- return true
188
- end
189
-
190
- # set read_only if needed
191
- @demoted.read_only! unless @demoted.read_only?
192
- # bail if we're unable to set read_only
193
- error "unable to set 'read_only' on #{@demoted}" unless @demoted.read_only?
194
- # record the current log possition to ensure writes are not taking place later.
195
- @log, @position = @demoted.binlog_coordinates
196
- error "#{@demoted} is still taking writes, unable to promote #{@promoted}" unless writes_disabled?
197
- @demoted.read_only?
198
- end
199
-
200
- def writes_disabled?
201
- return true unless @demoted.available?
202
-
203
- # ensure no writes have been logged since read_only!
204
- [@log, @position] == @demoted.binlog_coordinates
205
- end
206
-
207
- end
208
-
209
- state :read_only, :promoted, :promoted, :deployable do
210
- def nodes_consistent?
211
- return true unless @demoted.available?
212
- @replicas.all? {|replica| replica.slave_status[:exec_master_log_pos].to_i == @position}
213
- end
214
-
215
- def ensure_nodes_consistent?
216
- inform "ensuring replicas are in a consistent state"
217
- until nodes_consistent? do
218
- print '.'
219
- sleep 0.5
220
- end
221
- nodes_consistent?
222
- end
223
-
224
- def promotable?
225
- disable_replication if ensure_nodes_consistent? and @promoted.disable_read_only!
226
- end
227
-
228
- def execute_promotion
229
- error 'nodes are not in a promotable state.' unless promotable?
230
- error 'replicas are not in a consistent state' unless nodes_consistent?
231
-
232
- @demoted.pool.master_promotion! @promoted
233
- end
234
-
235
- def replicas_replicating? replicas = @replicas
236
- replicas.all? {|replica| replica.replicating?}
237
- end
238
-
239
- def disable_replication replicas = @replicas
240
- replicas.each do |replica|
241
- replica.pause_replication if replica.replicating?
242
- end
243
- not replicas_replicating? replicas
244
- end
245
-
246
- def summarize_promotion transition
247
- summary = Terminal::Table.new :title => 'Promotion Summary:' do |rows|
248
- rows << ['demoted', @demoted]
249
- rows << ['promoted', @promoted]
250
- rows << ["replicas of #{@promoted}", @promoted.slaves.join(', ')]
251
- end
252
- puts summary
253
- exit
254
- end
255
- end
256
- end
257
-
258
- end
259
- end
260
- end