RubyGems - jetpants - Versions diffs - 0.7.0 → 0.7.2 - Mend

jetpants 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/README.rdoc +6 -0
data/bin/jetpants +100 -8
data/doc/commands.rdoc +3 -1
data/doc/faq.rdoc +117 -0
data/doc/requirements.rdoc +4 -3
data/lib/jetpants.rb +14 -14
data/lib/jetpants/db/privileges.rb +5 -3
data/lib/jetpants/db/replication.rb +12 -5
data/lib/jetpants/db/state.rb +30 -4
data/lib/jetpants/host.rb +2 -0
data/lib/jetpants/pool.rb +78 -32
data/lib/jetpants/shard.rb +10 -12
metadata +23 -33
data/tasks/promotion.rb +0 -260

data/README.rdoc CHANGED

@@ -66,6 +66,12 @@ Other recommended uses of plugins include integration with your site's monitorin
 For more information on how to write plugins and use the Jetpants::CallbackHandler system, please see doc/plugins.rdoc ({view on GitHub}[https://github.com/tumblr/jetpants/blob/master/doc/plugins.rdoc])
+== FREQUENTLY ASKED QUESTIONS:
+Please see doc/faq.rdoc ({view on GitHub}[https://github.com/tumblr/jetpants/blob/master/doc/faq.rdoc]) for answers to common questions.
+If you have a question that isn't covered here, please feel free to email the authors at the addresses listed in jetpants.gemspec.
 == CREDITS:
 * <b>Evan Elias</b>: Lead developer. Core class implementations, shard split logic, plugin system

data/bin/jetpants CHANGED

@@ -1,9 +1,7 @@
 #!/usr/bin/env ruby
 jetpants_base_dir = File.expand_path(File.dirname(__FILE__) + '/..')
 $:.unshift File.join(jetpants_base_dir, 'lib')
-%w[thor pry state_machine highline/import terminal-table colored].each {|g| require g}
-# load tasks
-Dir[File.join jetpants_base_dir, 'tasks', '**'].each {|f| require f}
+%w[thor pry highline/import terminal-table colored].each {|g| require g}
 module Jetpants
@@ -28,6 +26,7 @@ module Jetpants
       self.send "after_#{task_name}" if self.respond_to? "after_#{task_name}"
     end
     desc 'console', 'Jetpants interactive console'
     def console
       Jetpants.pry
@@ -42,13 +41,82 @@ module Jetpants
       print "\n#{message}\n\n"
     end
-    desc 'promotion', 'perform a master promotion'
+    desc 'promotion', 'perform a master promotion, changing which node is the master of a pool'
     method_option :demote,  :desc => 'node to demote'
     method_option :promote, :desc => 'node to promote'
     def promotion
-      Tasks::Promotion.new(options)
+      # It's not uncommon for the demoted master to be an offline/unavailable node, so relax Jetpants' normal
+      # checks regarding replication threads being in different states.
+      Jetpants.verify_replication = false
+      promoted = options[:promote] ? options[:promote].to_db : nil
+      demoted  = options[:demote]  ? options[:demote].to_db  : nil
+      if promoted && !demoted
+        error "Node to promote #{promoted} is not a slave" unless promoted.is_slave?
+        demoted = promoted.master
+        inform "Will demote #{demoted}, the master of specified promoted node #{promoted}."
+      end
+      if demoted
+        demoted.probe
+      else
+        demoted = ask_node 'Please enter the IP address of the node to demote:'
+        if demoted.running?
+          error 'Cannot demote a node that has no slaves!' unless demoted.has_slaves?
+        else
+          inform "Unable to connect to node #{demoted} to demote"
+          error  "Unable to perform promotion" unless agree "Please confirm that #{demoted} is offline [yes/no]: "
+          # An asset-tracker plugin may have been populated the slave list anyway
+          if demoted.slaves && demoted.slaves.count > 0
+            demoted.slaves.each {|s| s.probe}
+          else
+            replicas = ask("Please enter a comma-seperated list of IP addresses of all current replicas of #{demoted}: ").split /\s*,\s*/
+            error "No replicas entered" unless replicas && replicas.count > 0
+            error "User supplied list of replicas appears to be invalid - #{replicas}" unless replicas.all? {|replica| is_ip? replica}
+            demoted.instance_eval {@slaves = replicas.map &:to_db}
+            demoted.slaves.each do |replica|
+              # Validate that they are really slaves of demoted
+              error "#{replica} does not appear to be a valid replica of #{demoted}" unless replica.master == demoted
+            end
+          end
+        end
+      end
+      puts
+      inform "Summary of affected pool"
+      inform "Binary log positions and slave lag shown below are just a snapshot taken at the current time." if demoted.running?
+      puts
+      demoted.pool(true).summary(true)
+      puts
+      unless promoted
+        if demoted.running?
+          inform "Recommendation: promote the standby slave with the highest binary log coordinates"
+        else
+          inform "Recommendation: promote the standby slave or active slave with the highest binary log coordinates"
+        end
+        promoted = ask_node 'Please enter the IP address of the node to promote: '
+      end
+      error "Unable to determine a node to demote and a node to promote" unless demoted.kind_of?(Jetpants::DB) && promoted.kind_of?(Jetpants::DB)
+      error "Node to promote #{promoted} is not a slave of node to demote #{demoted}" unless promoted.master == demoted
+      error "Cannot promote a backup slave. Please choose another." if promoted.for_backups?
+      inform "Going to DEMOTE existing master #{demoted} and PROMOTE new master #{promoted}."
+      error "Aborting." unless agree "Proceed? [yes/no]: "
+      demoted.pool(true).master_promotion! promoted
     end
+    def self.after_promotion
+      reminders(
+        'Commit/push the configuration in version control.',
+        'Deploy the configuration to all machines.',
+      )
+    end
     desc 'show_slaves', 'show the current slaves of a master'
     method_option :node, :desc => 'node to query for slaves'
     def show_slaves
@@ -70,7 +138,8 @@ module Jetpants
         inform "node (#{node}) currently has no slaves."
       end
     end
     desc 'show_master', 'show the current master of a node'
     method_option :node, :desc => 'node to query for master'
     method_option :siblings, :desc => 'show nodes current slave siblings'
@@ -97,7 +166,8 @@ module Jetpants
         inform "node (#{node}) does not appear to be a slave"
       end
     end
     desc 'node_info', 'show information about a given node'
     method_option :node, :desc => 'node to query for information'
     def node_info
@@ -146,11 +216,13 @@ module Jetpants
       end
     end
     desc 'regen_config', 'regenerate the application configuration'
     def regen_config
       Jetpants.topology.write_config
     end
     desc 'clone_slave', 'clone a standby slave'
     method_option :source, :desc => 'IP of node to clone from'
     method_option :target, :desc => 'IP of node to clone to'
@@ -180,6 +252,7 @@ module Jetpants
       )
     end
     desc 'activate_slave', 'turn a standby slave into an active slave'
     method_option :node, :desc => 'IP of standby slave to activate'
     def activate_slave
@@ -195,9 +268,11 @@ module Jetpants
       Jetpants.topology.write_config
     end
     desc 'weigh_slave', 'change the weight of an active slave'
     alias :weigh_slave :activate_slave
     desc 'pull_slave', 'turn an active slave into a standby slave'
     method_option :node, :desc => 'IP of active slave to pull'
     def pull_slave
@@ -209,6 +284,7 @@ module Jetpants
       Jetpants.topology.write_config
     end
     desc 'destroy_slave', 'remove a standby slave from its pool'
     method_option :node, :desc => 'IP of standby slave to remove'
     def destroy_slave
@@ -221,6 +297,7 @@ module Jetpants
       node.pool.remove_slave!(node)
     end
     desc 'rebuild_slave', 'export and re-import data set on a standby slave'
     method_option :node, :desc => 'IP of standby slave to rebuild'
     def rebuild_slave
@@ -233,6 +310,7 @@ module Jetpants
       node.rebuild!
     end
     desc 'shard_read_only', 'mark a shard as read-only'
     method_option :min_id, :desc => 'Minimum ID of shard to mark as read-only'
     def shard_read_only
@@ -244,6 +322,7 @@ module Jetpants
       Jetpants.topology.write_config
     end
     desc 'shard_offline', 'mark a shard as offline (not readable or writable)'
     method_option :min_id, :desc => 'Minimum ID of shard to mark as offline'
     def shard_offline
@@ -255,6 +334,7 @@ module Jetpants
       Jetpants.topology.write_config
     end
     desc 'shard_online', 'mark a shard as fully online (readable and writable)'
     method_option :min_id, :desc => 'Minimum ID of shard to mark as fully online'
     def shard_online
@@ -266,6 +346,7 @@ module Jetpants
       Jetpants.topology.write_config
     end
     desc 'shard_split', 'shard split step 1 of 4: spin up child pools with different portions of data set'
     method_option :min_id, :desc => 'Minimum ID of parent shard to split'
     method_option :max_id, :desc => 'Maximum ID of parent shard to split'
@@ -315,6 +396,7 @@ module Jetpants
       )
     end
     # This step is only really necessary if asset-tracker changes don't immediately reflect in application configuration.
     # (ie, if app configuration is a static file that needs to be deployed to webs.)
     desc 'shard_split_child_reads', 'shard split step 2 of 4: move reads to child shards'
@@ -330,6 +412,7 @@ module Jetpants
       )
     end
     desc 'shard_split_child_writes', 'shard split step 3 of 4: move writes to child shards'
     method_option :min_id, :desc => 'Minimum ID of parent shard being split'
     method_option :max_id, :desc => 'Maximum ID of parent shard being split'
@@ -351,6 +434,7 @@ module Jetpants
       )
     end
     desc 'shard_split_cleanup', 'shard split step 4 of 4: clean up data that replicated to wrong shard'
     method_option :min_id, :desc => 'Minimum ID of parent shard being split'
     method_option :max_id, :desc => 'Maximum ID of parent shard being split'
@@ -368,6 +452,7 @@ module Jetpants
       )
     end
     desc 'shard_cutover', 'truncate the current last shard range, and add a new shard after it'
     method_option :cutover_id, :desc => 'Minimum ID of new last shard being created'
     def shard_cutover
@@ -412,6 +497,7 @@ module Jetpants
       )
     end
     no_tasks do
       def is_ip? address
         address =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/
@@ -424,6 +510,12 @@ module Jetpants
       def inform message
         puts message.blue
       end
+      def ask_node(prompt)
+        node = ask prompt
+        error "Node (#{node}) does not appear to be an IP address." unless is_ip? node
+        node.to_db
+      end
     end
     def self.reminders(*strings)

data/doc/commands.rdoc CHANGED

@@ -23,7 +23,9 @@ These commands change the type of a slave, or promote a slave to be a master.  <
 <b><tt>jetpants promotion</tt></b> changes which node in a pool is the master by performing a full MySQL master promotion. This is usable even if the old master is offline or unavailable. All nodes in the pool will now slave off of the new master. If the old master is online/available, it will become a standby slave of the new master.
-Please note that the master promotion process enables global READ_ONLY mode on the old master. This is a required step of the standard MySQL master promotion technique. After doing a promotion in \Jetpants, you'll need to update/deploy your application's configuration as quickly as possible, if a plugin doesn't do it automatically for you.
+Please note that the master promotion process enables global READ_ONLY mode on the old master. This is a required step of the most generic MySQL master promotion technique. After doing a promotion in \Jetpants, you'll need to update/deploy your application's configuration as quickly as possible, if a plugin doesn't do it automatically for you.
+Be aware that if the old master is offline/unavailable and the pool's slaves have replicated different amounts of data (ie, their relay logs progressed to different points at the exact moment the old master died), <tt>jetpants promotion</tt> may result in minor data inconsistencies (a couple seconds of writes) because these lost transactions are not automatically replayed on slaves that missed them. You can manually replay them using <tt>mysqlbinlog</tt>; this process is difficult to automate, which is why Jetpants and many other promotion tools do not do so. This may be implemented in a future release.
 <b><tt>jetpants activate_slave</tt></b> turns a standby slave into an active slave. Use this if you want to generate an app configuration that now sends read queries to a slave that formerly did not receive them.

data/doc/faq.rdoc ADDED

@@ -0,0 +1,117 @@
+= Frequently Asked Questions
+== Is \Jetpants a server? How do I connect to it?
+\Jetpants is an automation toolkit, not a server. In this way it differs from most other large-scale MySQL sharding solutions, which tend to be middleware/proxy servers.
+The benefit of a toolkit is that you can still leverage standard MySQL replication, still use InnoDB/XtraDB as a robust storage engine choice, etc. \Jetpants largely doesn't interfere with any of that, and instead just provides tools to help you manage a large MySQL topology and support a range-based sharding scheme.
+== Is \Jetpants still useful if my architecture isn't sharded?
+Potentially, since \Jetpants fully supports "global" pools, also known as "functional partitions". You can even use \Jetpants to help manage a standard single-pool MySQL topology (1 master and some number of slaves) for handling common operations like slave cloning and master promotions. That said, there are other tools that may be easier to use if your MySQL footprint is smaller than, say, a dozen machines.
+However, \Jetpants is also very useful as a Ruby library for performing arbitrary data migrations. It comes with methods for quickly importing and exporting large amounts of data, so it can be used for this purpose regardless of what your database topology looks like.
+== What is a sharding key?
+A sharding key is a core foreign key column that is present in most of your large tables, which can be used to group your data into shards. For many sites this could be <tt>user_id</tt> or <tt>customer_id</tt>, but it depends entirely on your data model and access patterns.
+For example, on a blogging site the sharding key might be <tt>blog_id</tt>. Most tables that contain a <tt>blog_id</tt> column can be sharded, which will mean that all data related to a particular blog (posts, comments on those posts, authors, etc) is found on the same shard. By organizing data this way, you can continue to use relational operations such as JOIN when querying data that lives on the same shard.
+Regardless of sharding key, some tables will not be shardable. This includes any "global" table that doesn't contain your sharding key column, as well as any tables that have global lookup patterns. For this reason you might not be able to shard the core table which has your sharding_key as its primary key!
+In other words: if your sharding key is <tt>user_id</tt>, you might not actually be able to shard your <tt>users</tt> table because you need to do global lookups (ie, by email address) on this table. Denormalization is a common work-around; you could split your users table into a "global lookup" portion in a global pool and an "extended data" portion that lives on shards.
+== What is range-based sharding? Why use it, and what are the alternatives?
+Range-based sharding groups data based on ranges of your sharding key. For example, with a sharding key of <tt>user_id</tt>, all sharded data for users 1-1000 may be on the first shard, users 1001-3000 on the second shard, and users 3001-infinity on the third and final shard.
+The main benefit of range-based sharding is simplicity. You can express the shard ranges in a language-neutral format like YAML or JSON, and the code to route queries to the correct DB can be implemented in a trivially small amount of code. There's no need for a lookup service, so we avoid a single point of failure. It's also easy for a human to look at the ranges and figure out which DB to query when debugging a problem by hand.
+Rebalancing range-based shards can be accomplished quickly as long as the primary key of each table begins with the sharding key. InnoDB stores data in order of its primary key, which means it is extremely fast and efficient to dump out a portion of your data set based on a range of your sharding key.
+The main downside to the range-based approach is lack of even distribution of "hot" data. If a small handful of users on a given shard are using a disproportionate amount of resources, there's no way to move _only_ those users to a different shard. For this reason, range-based sharding can work best for "long-tail" sites where the majority of activity is created by the majority of common users.
+Some alternatives to the range-based approach include:
+* <b>Modulus or hash</b>: Apply a function to your sharding key to determine which shard the data lives on.
+  This approach helps to distribute data very evenly. Many sites find that their latest users behave differently than their oldest users, so grouping users together by ranges of ID (essentially ranges of account creation date) can be problematic.  Using a modulus or hash avoids this problem.
+  The main issue with this approach is how to rebalance shards that are too large. A simple modulus can't do this unless you want to simultaneously split all of your shards in half, which leads to painful exponential growth. A hash function can be more versatile but can still lead to great complexity. Worse yet, there's no way to rebalance _quickly_ because data is not stored on disk in sorted order based on the hash function.
+* <b>Lookup table</b>: Use a separate service or data store which takes a sharding key value as an input and returns the appropriate shard as an output.
+  This scheme allows you to very specifically allocate particular data to shards, and works well for sites that have a lot of "hot" data from celebrity users. However, the lookup service is essentially a single point of failure, which counteracts many of the attractive features of sharded architectures. Rebalancing can also be slow and tricky, since you need a notion of "locking" a sharding key value while its rows are being migrated.
+== How does \Jetpants perform slave-cloning?
+\Jetpants clones slaves by stopping replication, shutting down the MySQL daemon, and then copying the raw files to the destination(s). This is the fastest way to get a consistent clone of a data set in MySQL. After the copy operation is complete, we start MySQL back up on the source and destinations, and then make the destination instances start slaving at the appropriate binlog coordinates.
+We perform the copy operation using a combination of tar (for archiving), pigz (for fast compression), and nc (for transferring the data over the network). If there are multiple destinations, we create a serial "copy chain" using tee and a fifo.
+Please note that we don't encrypt the data in this process, so we assume you are using it on a private LAN or over a VPN tunnel.
+Because this process shuts down MySQL, you can only use it on a standby slave. Never use it on a machine that is actively taking queries from your application. If you need to do that, use a hot-copy solution instead.
+== What are standby slaves? Why run two of them per pool?
+Standby slaves are standard MySQL replicas that your application doesn't send queries to. We recommend maintaining exactly 2 standby slaves in every single pool/shard for high availability reasons:
+* If a pool's master fails, you promote one standby slave to be the new master, and use the second standby slave to clone a replacement for the first standby slave.
+* If an active slave fails, promote one standby slave to be a new active slave in its place, and use the second standby slave to clone a replacement for the first.
+* If a standby slave fails, use the other standby slave to clone a replacement.
+In other words: as long as you have two standbys, you can recover from a single failure quickly, without needing to do a hot-copy (which is much slower). Faster recovery time = less time in a degraded state = lower chance that a second failure will occur while the pool is already degraded.
+Resist the temptation to send any queries from your application to your standby slaves. If your application's read requirements are high enough to require additional nodes, create more active slaves as needed, but don't repurpose the standbys without replacing them. Otherwise, if a machine fails, you'd no longer have enough capacity to serve normal traffic load or no longer have a way to quickly spin up replacement nodes.
+You can, however, use your standby slaves for creating backups, running ad-hoc batch/analytic queries, etc. You can also make _one_ of your standby slaves be a weaker class of hardware if desired, and just take care to only use that node for cloning slaves, never for directly promoting. \Jetpants supports this, and considers this type of slave to be a "backup slave".
+== When should I split a shard?
+Typically when some individual component on the shard's master is getting close to being full/saturated:
+* Disk is getting full, in terms of capacity -- 80%+ impacts performance for SSDs and eventually for most filesystems as well
+* Disk utilization (ie, what <tt>iostat</tt> shows you) is reaching 90%+
+* Network utilization is approaching your link's saturation point
+Depending on your type of disk and amount of RAM, you may find that the first two may happen at roughly the same time. An increasingly large data set usually means your working set will exceed your amount of memory, so InnoDB's cache hit rate starts to drop, and your disk utilization starts creeping upwards.
+== Why does so much of the command suite functionality require an asset tracker plugin?
+For any given operation that requires an asset tracker, there's one of two reasons:
+* The operation involves juggling a lot of servers. For example, a shard split needs to be able to obtain a minimum of 6 spare MySQL instances, and eventually turns the original shard's 3 MySQL instances into spares. Doing this kind of operation without an automated asset tracker can easily lead to major human error.
+* The operation inherently involves generating a new configuration for your application -- for example, setting a shard to read-only or promoting a standby slave to an active slave. These operations are meaningless outside of your application, since MySQL has no notion of "standby slave" or "degraded shard". \Jetpants has a notion of these things, but needs to persist the information somewhere, and it makes more sense to have \Jetpants relay this information to an external hardware management tool rather than maintain a separate (and potentially conflicting) source of truth.
+If you have enough servers to be using a sharded architecture, you hopefully already have some sort of hardware management / asset tracker system in place. \Jetpants is designed to be integrated with this system, but since every site runs something different, this requires that you write some custom plugin code to achieve.
+== Can I use \Jetpants with PostgreSQL?
+The core functionality is currently very MySQL-specific. In theory a plugin could override a bunch of methods to target Postgres, and maybe even Redis or other persistent data stores with replication and import/export functionality. This would be a substantial effort though.
+At present, several methods have "mysql" in the name. These may change to more generic names in an upcoming release; in this case the old names will still be available as aliases to the new ones.
+== In the shard split process, why create the standby slaves AFTER doing the export / re-import?
+We do this to avoid replicating the LOAD DATA INFILE statements. Because MySQL replication is single-threaded, these statements won't execute in parallel on slaves, so the import process would be substantially slower.  Instead, we create the new shard masters, do the export/import dance on those instances, and THEN clone their final data set to 2 new standby slaves each.
+This also allows us to disable binary logging during the import process, which is a very noticeable speed enhancement.
+== In the cleanup stage of a shard split, why not just remove unwanted data with a single DELETE statement?
+Because MySQL replication is single-threaded, it's a bad idea to execute single write queries that impact thousands of rows, since these will cause slaves to lag. Giant transactions are also not ideal in general due to how MVCC and rollbacks work in InnoDB.

data/doc/requirements.rdoc CHANGED

@@ -6,10 +6,11 @@ Plugins may freely override these assumptions, and upstream patches are very wel
 == Environment
-* Using MySQL (or Percona Server), specifically version 5.1 or higher.
-* Using a RHEL/CentOS distribution of Linux.
+* Ruby 1.9.2 or higher
+* MySQL (or Percona Server), specifically version 5.1 or higher.
+* a RHEL/CentOS distribution of Linux.
   * It should be easy to write a plugin supporting another distribution. The main change might be overriding Jetpants::Host#service, if your distribution doesn't have <tt>/sbin/service</tt>.
-* Using InnoDB / Percona XtraDB for storage engine. \Jetpants has not been tested with MyISAM, since \Jetpants is geared towards huge tables, and MyISAM is generally a bad fit.
+* InnoDB / Percona XtraDB for storage engine. \Jetpants has not been tested with MyISAM, since \Jetpants is geared towards huge tables, and MyISAM is generally a bad fit.
 * All MySQL instances run on port 3306, with only one instance per logical machine.
   * A plugin could override this easily, but would require you to use the --report-host option on all slaves, so that crawling the replication topology is possible. It would also have to override various methods that specify the MySQL init script location, config file location, data directory, etc.
   * Since there's no "standard" layout for multi-instance MySQL, this won't ever be part of the \Jetpants core, but we may include one implementation as a bundled plugin in a future release.

data/lib/jetpants.rb CHANGED

@@ -17,21 +17,21 @@ module Jetpants
   # Establish default configuration values, and then merge in whatever we find globally
   # in /etc/jetpants.yaml and per-user in ~/.jetpants.yaml
   @config = {
-    'max_concurrency'         =>  40,       # max threads/conns per database
-    'standby_slaves_per_pool' =>  2,        # number of standby slaves in every pool
-    'mysql_schema'            =>  'test',   # database name
-    'mysql_app_user'          =>  false,    # mysql user for application
-    'mysql_app_password'      =>  false,    # mysql password for application
-    'mysql_repl_user'         =>  false,    # mysql user for replication
-    'mysql_repl_password'     =>  false,    # mysql password for replication
-    'mysql_root_password'     =>  false,    # mysql root password. omit if specified in /root/.my.cnf instead.
+    'max_concurrency'         =>  40,         # max threads/conns per database
+    'standby_slaves_per_pool' =>  2,          # number of standby slaves in every pool
+    'mysql_schema'            =>  'test',     # database name
+    'mysql_app_user'          =>  'appuser',  # mysql user for application
+    'mysql_app_password'      =>  '',         # mysql password for application
+    'mysql_repl_user'         =>  'repluser', # mysql user for replication
+    'mysql_repl_password'     =>  '',         # mysql password for replication
+    'mysql_root_password'     =>  false,      # mysql root password. omit if specified in /root/.my.cnf instead.
     'mysql_grant_ips'         =>  ['192.168.%'],  # mysql user manipulations are applied to these IPs
-    'mysql_grant_privs'       =>  ['ALL'],  # mysql user manipulations grant this set of privileges by default
-    'export_location'         =>  '/tmp',   # directory to use for data dumping
-    'verify_replication'      =>  true,     # raise exception if the 2 repl threads are in different states, or if actual repl topology differs from Jetpants' understanding of it
-    'plugins'                 =>  {},       # hash of plugin name => arbitrary plugin data (usually a nested hash of settings)
-    'ssh_keys'                =>  nil,      # array of SSH key file locations
-    'sharded_tables'          =>  [],       # array of name => {sharding_key=>X, chunks=>Y} hashes
+    'mysql_grant_privs'       =>  ['ALL'],    # mysql user manipulations grant this set of privileges by default
+    'export_location'         =>  '/tmp',     # directory to use for data dumping
+    'verify_replication'      =>  true,       # raise exception if the 2 repl threads are in different states, or if actual repl topology differs from Jetpants' understanding of it
+    'plugins'                 =>  {},         # hash of plugin name => arbitrary plugin data (usually a nested hash of settings)
+    'ssh_keys'                =>  nil,        # array of SSH key file locations
+    'sharded_tables'          =>  [],         # array of name => {sharding_key=>X, chunks=>Y} hashes
   }
   %w(/etc/jetpants.yaml ~/.jetpants.yml ~/.jetpants.yaml).each do |path|
     overrides = YAML.load_file(File.expand_path path) rescue {}

data/lib/jetpants/db/privileges.rb CHANGED

@@ -68,19 +68,21 @@ module Jetpants
     # been split.
     def revoke_all_access!
       user_name = Jetpants.app_credentials[:user]
-      output("Revoking access for user #{user_name} and setting global read-only.")
-      read_only!
+      enable_read_only!
+      output "Revoking access for user #{user_name}."
       output(drop_user(user_name, true)) # drop the user without replicating the drop statement to slaves
     end
     # Enables global read-only mode on the database.
-    def read_only!
+    def enable_read_only!
+      output "Enabling global read_only mode"
       mysql_root_cmd 'SET GLOBAL read_only = 1' unless read_only?
       read_only?
     end
     # Disables global read-only mode on the database.
     def disable_read_only!
+      output "Disabling global read_only mode"
       mysql_root_cmd 'SET GLOBAL read_only = 0' if read_only?
       not read_only?
     end

data/lib/jetpants/db/replication.rb CHANGED

@@ -37,7 +37,7 @@ module Jetpants
         "MASTER_USER='#{repl_user}', " +
         "MASTER_PASSWORD='#{repl_pass}'"
-      output "Changing master to #{new_master} with coordinates (#{logfile}, #{pos}): #{result}"
+      output "Changing master to #{new_master} with coordinates (#{logfile}, #{pos}). #{result}"
       @master.slaves.delete(self) if @master rescue nil
       @master = new_master
       @repl_paused = true
@@ -67,7 +67,7 @@ module Jetpants
     def disable_replication!
       raise "This DB object has no master" unless master
       output "Disabling replication; this db is no longer a slave."
-      output mysql_root_cmd "STOP SLAVE; RESET SLAVE"
+      output mysql_root_cmd "STOP SLAVE; CHANGE MASTER TO master_host=''; RESET SLAVE"
       @master.slaves.delete(self) rescue nil
       @master = nil
       @repl_paused = nil
@@ -84,6 +84,7 @@ module Jetpants
     def enslave!(targets, repl_user=false, repl_pass=false)
       repl_user ||= (Jetpants.replication_credentials[:user] || replication_credentials[:user])
       repl_pass ||= (Jetpants.replication_credentials[:pass] || replication_credentials[:pass])
+      disable_monitoring
       pause_replication if master && ! @repl_paused
       file, pos = binlog_coordinates
       clone_to!(targets)
@@ -95,6 +96,7 @@ module Jetpants
                             password: repl_pass  )
       end
       resume_replication if @master # should already have happened from the clone_to! restart anyway, but just to be explicit
+      enable_monitoring
     end
     # Wipes out the target instances and turns them into slaves of self's master.
@@ -138,10 +140,10 @@ module Jetpants
     # database. Only useful when called on a master. This is the current
     # instance's own binlog coordinates, NOT the coordinates of replication
     # progress on a slave!
-    def binlog_coordinates
+    def binlog_coordinates(display_info=true)
       hash = mysql_root_cmd('SHOW MASTER STATUS', :parse=>true)
       raise "Cannot obtain binlog coordinates of this master becaues binary logging is not enabled" unless hash[:file]
-      output "Own binlog coordinates are (#{hash[:file]}, #{hash[:position].to_i})."
+      output "Own binlog coordinates are (#{hash[:file]}, #{hash[:position].to_i})." if display_info
       [hash[:file], hash[:position].to_i]
     end
@@ -149,7 +151,8 @@ module Jetpants
     # as reported by SHOW SLAVE STATUS.
     def seconds_behind_master
       raise "This instance is not a slave" unless master
-      slave_status[:seconds_behind_master].to_i
+      lag = slave_status[:seconds_behind_master]
+      lag == 'NULL' ? nil : lag.to_i
     end
     # Waits for this instance's SECONDS_BEHIND_MASTER to reach 0 and stay at
@@ -175,6 +178,10 @@ module Jetpants
             return true
           end
           sleep poll_frequency
+        elsif lag.nil?
+          resume_replication
+          sleep 1
+          raise "Unable to restart replication" if seconds_behind_master.nil?
         else
           output "Currently #{lag} seconds behind master."
           times_at_zero = 0

data/lib/jetpants/db/state.rb CHANGED

@@ -61,6 +61,7 @@ module Jetpants
       probe_running
       probe_master
       probe_slaves
+      self
     end
     # Alias for probe(true)
@@ -144,10 +145,34 @@ module Jetpants
     end
     # Returns the Jetpants::Pool that this instance belongs to, if any.
-    def pool
-      Jetpants.topology.pool(self) || Jetpants.topology.pool(master)
+    # Can optionally create an anonymous pool if no pool was found. This anonymous
+    # pool intentionally has a blank sync_configuration implementation.
+    def pool(create_if_missing=false)
+      result = Jetpants.topology.pool(self) || Jetpants.topology.pool(master)
+      if !result && create_if_missing
+        pool_master = master || self
+        result = Pool.new('anon_pool_' + pool_master.ip.tr('.', ''), pool_master)
+        def result.sync_configuration; end
+      end
+      return result
     end
+    # Determines the DB's role in its pool. Returns either :master,
+    # :active_slave, :standby_slave, or :backup_slave.
+    #
+    # Note that we consider a node with no master and no slaves to be
+    # a :master, since we can't determine if it had slaves but they're
+    # just offline/dead, vs it being an orphaned machine.
+    def role
+      p = pool
+      case
+      when !@master then :master
+      when for_backups? then :backup_slave
+      when p && p.active_slave_weights[self] then :active_slave # if pool in topology, determine based on expected/ideal state
+      when !p && !is_standby? then :active_slave                # if pool missing from topology, determine based on actual state
+      else :standby_slave
+      end
+    end
     ###### Private methods #####################################################
@@ -178,8 +203,9 @@ module Jetpants
           raise "#{self}: #{message}" if Jetpants.verify_replication
           output message
           pause_replication
+        else
+          @repl_paused = (status[:slave_io_running].downcase == 'no')
         end
-        @repl_paused = (status[:slave_io_running].downcase == 'no')
       end
     end
@@ -202,7 +228,7 @@ module Jetpants
       processes.grep(/Binlog Dump/).concurrent_each do |p|
         tokens = p.split
         ip, dummy = tokens[2].split ':'
-        db = self.class.new(ip)
+        db = ip.to_db
         db.probe
         slaves_mutex.synchronize {@slaves << db if db.master == self}
       end

data/lib/jetpants/host.rb CHANGED

@@ -25,6 +25,8 @@ module Jetpants
     end
     def initialize(ip)
+      # Only supporting ipv4 for now
+      raise "Invalid IP address: #{ip}" unless ip =~ /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
       @ip = ip
       @connection_pool = [] # array of idle Net::SSH::Connection::Session objects
       @lock = Mutex.new

data/lib/jetpants/pool.rb CHANGED

@@ -94,7 +94,7 @@ module Jetpants
     # returns a flat array of all Jetpants::DB objects in the pool: the master and
     # all slaves of all types.
     def nodes
-      [master, slaves].flatten
+      [master, slaves].flatten.compact
     end
     # Informs Jetpants that slave_db is an active slave. Potentially used by
@@ -152,52 +152,96 @@ module Jetpants
     # of returning a string, so that you can invoke something like:
     #    Jetpants.topology.pools.each &:summary
     # to easily display a summary.
-    def summary
+    def summary(extended_info=false)
       probe
-      if @aliases.count > 0
-        alias_text = '  (aliases: ' + @aliases.join(', ') + ')'
+      alias_text = @aliases.count > 0 ? '  (aliases: ' + @aliases.join(', ') + ')' : ''
+      data_size = @master.running? ? "[#{master.data_set_size(true)}GB]" : ''
+      print "#{name}#{alias_text}  #{data_size}\n"
+      if extended_info
+        details = {}
+        nodes.concurrent_each do |s|
+          if !s.running?
+            details[s] = {coordinates: ['unknown'], lag: 'N/A'}
+          elsif s == @master
+            details[s] = {coordinates: s.binlog_coordinates(false), lag: 'N/A'}
+          else
+            details[s] = {coordinates: s.repl_binlog_coordinates(false), lag: s.seconds_behind_master.to_s + 's'}
+          end
+        end
       end
-      print "#{name}#{alias_text}  [#{master.data_set_size(true)}GB]\n"
-      print "\tmaster          = %-13s #{master.hostname}\n" % @master.ip
+      binlog_pos = extended_info ? details[@master][:coordinates].join(':') : ''
+      print "\tmaster          = %-13s %-30s %s\n" % [@master.ip, @master.hostname, binlog_pos]
       [:active, :standby, :backup].each do |type|
         slave_list = slaves(type)
-        slave_list.each_with_index do |s, i|
-          print "\t%-7s slave #{i + 1} = %-13s #{s.hostname}\n" % [type, s.ip]
+        slave_list.sort.each_with_index do |s, i|
+          binlog_pos = extended_info ? details[s][:coordinates].join(':') : ''
+          slave_lag = extended_info ? "lag=#{details[s][:lag]}" : ''
+          print "\t%-7s slave #{i + 1} = %-13s %-30s %-26s %s\n" % [type, s.ip, s.hostname, binlog_pos, slave_lag]
         end
       end
       true
     end
-    # Performs the last steps of the master promotion process. Do not use this
-    # as a stand-alone method; there's other necessary logic, such as setting
-    # the old master to read-only mode, and doing a STOP SLAVE on all slaves.
-    # Use the "jetpants promotion" task instead to do an interactive promotion.
-    # (In a future release, this will be refactored to be fully scriptable.)
+    # Demotes the pool's existing master, promoting a slave in its place.
     def master_promotion!(promoted)
       demoted = @master
-      raise "Promoted host is not in the right pool!" unless @master.slaves.include? promoted
-      user, password = promoted.replication_credentials.values
-      log,  position = promoted.binlog_coordinates
-      # reset slave on promoted
-      if demoted.available?
-        promoted.disable_replication!
+      raise "Demoted node is already the master of this pool!" if demoted == promoted
+      raise "Promoted host is not in the right pool!" unless demoted.slaves.include?(promoted)
+      output "Preparing to demote master #{demoted} and promote #{promoted} in its place."
+      # If demoted machine is available, confirm it is read-only and binlog isn't moving,
+      # and then wait for slaves to catch up to this position
+      if demoted.running?
+        demoted.enable_read_only! unless demoted.read_only?
+        raise "Unable to enable global read-only mode on demoted machine" unless demoted.read_only?
+        coordinates = demoted.binlog_coordinates
+        raise "Demoted machine still taking writes (from superuser or replication?) despite being read-only" unless coordinates == demoted.binlog_coordinates
+        demoted.slaves.concurrent_each do |s|
+          while true do
+            sleep 1
+            break if s.repl_binlog_coordinates == coordinates
+            output "Still catching up to coordinates of demoted master"
+          end
+        end
+      # Demoted machine not available -- wait for slaves' binlogs to stop moving
       else
-        promoted.mysql_root_cmd "STOP SLAVE; RESET SLAVE"
+        demoted.slaves.concurrent_each do |s|
+          progress = s.repl_binlog_coordinates
+          while true do
+            sleep 1
+            break if s.repl_binlog_coordinates == progress
+            s.output "Still catching up on replication"
+          end
+        end
+      end
+      # Stop replication on all slaves
+      replicas = demoted.slaves.dup
+      replicas.each do |s|
+        s.pause_replication if s.replicating?
       end
+      raise "Unable to stop replication on all slaves" if replicas.any? {|s| s.replicating?}
+      user, password = promoted.replication_credentials.values
+      log,  position = promoted.binlog_coordinates
+      # reset slave on promoted, and make sure read_only is disabled
+      promoted.disable_replication!
+      promoted.disable_read_only!
       # gather our new replicas
-      replicas = demoted.slaves.select {|replica| replica != promoted}
-      replicas << demoted if demoted.available?
-      replicas.flatten!
+      replicas.delete promoted
+      replicas << demoted if demoted.running?
       # perform promotion
-      replicas.each do |replica|
-        replica.change_master_to promoted,
-          :user => user,
-          :password => password,
-          :log_file => log,
-          :log_pos  => position
+      replicas.each do |r|
+        r.change_master_to promoted, user: user, password: password, log_file: log, log_pos: position
       end
       # ensure our replicas are configured correctly by comparing our staged values to current values of replicas
@@ -215,13 +259,15 @@ module Jetpants
       end
       # Update the pool
-      # Note: if the demoted machine is offline, plugin may need to implement an
+      # Note: if the demoted machine is not available, plugin may need to implement an
       # after_master_promotion! method which handles this case in configuration tracker
       @active_slave_weights.delete promoted # if promoting an active slave, remove it from read pool
       @master = promoted
       sync_configuration
       Jetpants.topology.write_config
+      output "Promotion complete. Pool master is now #{promoted}."
       replicas.all? {|r| r.replicating?}
     end

data/lib/jetpants/shard.rb CHANGED

@@ -169,7 +169,7 @@ module Jetpants
       init_children(pieces) unless @children.count > 0
-      @children.concurrent_each {|c| c.stop_query_killer; c.disable_binary_logging}
+      @children.concurrent_each {|c| c.disable_binary_logging}
       clone_to_children!
       @children.concurrent_each {|c| c.rebuild!}
       @children.each {|c| c.sync_configuration}
@@ -225,25 +225,22 @@ module Jetpants
     # Exports data that should stay on this shard, drops and re-creates tables,
     # re-imports the data, and then adds slaves to the shard pool as needed.
-    # The optional stage param lets you skip some steps, but this is only really
-    # useful if you're running this manually and it failed part-way.
-    def rebuild!(stage=0)
+    def rebuild!
       # Sanity check
       raise "Cannot rebuild a shard that isn't still slaving from another shard" unless @master.is_slave?
       raise "Cannot rebuild an active shard" if in_config?
+      stop_query_killer
       tables = Table.from_config 'sharded_tables'
-      if stage <= 1
-        raise "Shard is not in the expected initializing or exporting states" unless [:initializing, :exporting].include? @state
+      if [:initializing, :exporting].include? @state
         @state = :exporting
         sync_configuration
         export_schemata tables
         export_data tables, @min_id, @max_id
       end
-      if stage <= 2
-        raise "Shard is not in the expected exporting or importing states" unless [:exporting, :importing].include? @state
+      if [:exporting, :importing].include? @state
         @state = :importing
         sync_configuration
         import_schemata!
@@ -252,8 +249,7 @@ module Jetpants
         start_query_killer
       end
-      if stage <= 3
-        raise "Shard is not in the expected importing or replicating states" unless [:importing, :replicating].include? @state
+      if [:importing, :replicating].include? @state
         enable_binary_logging
         restart_mysql
         @state = :replicating
@@ -262,6 +258,8 @@ module Jetpants
         enslave!(my_slaves)
         my_slaves.each {|slv| slv.resume_replication}
         [self, my_slaves].flatten.each {|db| db.catch_up_to_master}
+      else
+        raise "Shard not in a state compatible with calling rebuild! (current state=#{@state})"
       end
       @state = :child
@@ -298,8 +296,8 @@ module Jetpants
     end
     # Displays information about the shard
-    def summary(with_children=true)
-      super()
+    def summary(extended_info=false, with_children=true)
+      super(extended_info)
       if with_children
         children.each {|c| c.summary}
       end

metadata CHANGED

@@ -2,7 +2,7 @@
 name: jetpants
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 0.7.0
+  version: 0.7.2
 platform: ruby
 authors:
 - Evan Elias
@@ -11,7 +11,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-06-07 00:00:00 Z
+date: 2012-06-18 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mysql2
@@ -47,7 +47,7 @@ dependencies:
   type: :runtime
   version_requirements: *id003
 - !ruby/object:Gem::Dependency
-  name: state_machine
+  name: pry
   prerelease: false
   requirement: &id004 !ruby/object:Gem::Requirement
     none: false
@@ -58,7 +58,7 @@ dependencies:
   type: :runtime
   version_requirements: *id004
 - !ruby/object:Gem::Dependency
-  name: pry
+  name: thor
   prerelease: false
   requirement: &id005 !ruby/object:Gem::Requirement
     none: false
@@ -69,7 +69,7 @@ dependencies:
   type: :runtime
   version_requirements: *id005
 - !ruby/object:Gem::Dependency
-  name: thor
+  name: highline
   prerelease: false
   requirement: &id006 !ruby/object:Gem::Requirement
     none: false
@@ -80,7 +80,7 @@ dependencies:
   type: :runtime
   version_requirements: *id006
 - !ruby/object:Gem::Dependency
-  name: highline
+  name: terminal-table
   prerelease: false
   requirement: &id007 !ruby/object:Gem::Requirement
     none: false
@@ -91,7 +91,7 @@ dependencies:
   type: :runtime
   version_requirements: *id007
 - !ruby/object:Gem::Dependency
-  name: terminal-table
+  name: colored
   prerelease: false
   requirement: &id008 !ruby/object:Gem::Requirement
     none: false
@@ -101,17 +101,6 @@ dependencies:
         version: "0"
   type: :runtime
   version_requirements: *id008
-- !ruby/object:Gem::Dependency
-  name: colored
-  prerelease: false
-  requirement: &id009 !ruby/object:Gem::Requirement
-    none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: "0"
-  type: :runtime
-  version_requirements: *id009
 description: Jetpants is an automation toolkit for handling monstrously large MySQL database topologies. It is geared towards common operational tasks like cloning slaves, rebalancing shards, and performing master promotions. It features a command suite for easy use by operations staff, though it's also a full Ruby library for use in developing custom migration scripts and database automation.
 email:
 - me@evanelias.com
@@ -122,39 +111,40 @@ extensions: []
 extra_rdoc_files:
 - README.rdoc
-- doc/plugins.rdoc
 - doc/configuration.rdoc
-- doc/commands.rdoc
+- doc/faq.rdoc
 - doc/requirements.rdoc
+- doc/commands.rdoc
+- doc/plugins.rdoc
 files:
 - Gemfile
 - README.rdoc
-- doc/plugins.rdoc
 - doc/configuration.rdoc
-- doc/commands.rdoc
+- doc/faq.rdoc
 - doc/requirements.rdoc
-- lib/jetpants/callback.rb
-- lib/jetpants/topology.rb
-- lib/jetpants/db/server.rb
-- lib/jetpants/db/state.rb
+- doc/commands.rdoc
+- doc/plugins.rdoc
+- lib/jetpants/monkeypatch.rb
 - lib/jetpants/db/import_export.rb
 - lib/jetpants/db/privileges.rb
 - lib/jetpants/db/client.rb
 - lib/jetpants/db/replication.rb
-- lib/jetpants/shard.rb
+- lib/jetpants/db/server.rb
+- lib/jetpants/db/state.rb
 - lib/jetpants/db.rb
-- lib/jetpants/host.rb
+- lib/jetpants/shard.rb
 - lib/jetpants/pool.rb
-- lib/jetpants/monkeypatch.rb
 - lib/jetpants/table.rb
+- lib/jetpants/topology.rb
+- lib/jetpants/callback.rb
+- lib/jetpants/host.rb
 - lib/jetpants.rb
 - bin/jetpants
-- plugins/simple_tracker/topology.rb
-- plugins/simple_tracker/shard.rb
-- plugins/simple_tracker/simple_tracker.rb
 - plugins/simple_tracker/db.rb
+- plugins/simple_tracker/shard.rb
 - plugins/simple_tracker/pool.rb
-- tasks/promotion.rb
+- plugins/simple_tracker/simple_tracker.rb
+- plugins/simple_tracker/topology.rb
 - etc/jetpants.yaml.sample
 homepage: https://github.com/tumblr/jetpants/
 licenses: []

data/tasks/promotion.rb DELETED

@@ -1,260 +0,0 @@
-module Jetpants
-  module Tasks
-    class Promotion
-      def initialize nodes = {}
-        @demoted  = nodes['demote']
-        @promoted = nodes['promote']
-        super
-        Jetpants.verify_replication = false # since master may be offline
-        advise
-        establish_roles
-        prepare
-      end
-      def error message
-        abort ['ERROR:'.red, message].join ' '
-      end
-      def inform message
-        puts message.blue
-      end
-      def is_ip? address
-        address =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/
-      end
-      def establish_roles
-        establish_demoted
-        establish_replicas
-        establish_promoted
-      end
-      def establish_demoted
-        # derive demoted from promoted if possible
-        if @promoted and not @demoted
-          error "invalid ip address #{@promoted}" unless is_ip? @promoted
-          @promoted = Jetpants::DB.new @promoted
-          # bail the promoted node isn't a slave or we can't connect
-          unless @promoted.is_slave?
-            error "node (#{@promoted}) does not appear to be a replica of another node"
-          end rescue error("unable to connect to node #{@promoted} to promote")
-          # recommend a node to demote
-          agreed = agree [
-            "Would you like to demote the following node?",
-            "address: #{@promoted.master}",
-            "slaves : #{@promoted.master.slaves.join(', ')}",
-            "- yes/no -"
-          ].join "\n"
-          error "unable to promote #{@promoted} unless you demote #{@promoted.master}" unless agreed
-          @demoted = @promoted.master.ip
-        end
-        # unable to derive demoted, so ask and convert to a DB object
-        unless @demoted.kind_of? Jetpants::DB
-          @demoted = ask 'Please enter the node to demote:' unless @demoted
-          error "Invalid IP address #{@demoted}" unless is_ip? @demoted
-          @demoted = @demoted.to_db
-        end
-        # connect and ensure node is a master; handle offline nodes appropriately
-        if @demoted.available?
-          error 'Cannot demote a node that has no slaves!' unless @demoted.has_slaves?
-        else
-          inform "unable to connect to node #{@demoted} to demote"
-          error  "unable to perform promotion" unless agree "please confirm that #{@demoted} is offline: yes/no "
-          @replicas = @demoted.slaves # An asset-tracker plugin may have been populated the slave list anyway
-          if !@replicas || @replicas.count < 1
-            replicas = ask "please provide a comma seperated list of current replicas of #{@demoted}: ", lambda {|replicas| replicas.split /,\s*/}
-            error "user supplied list of replicas appears to be invalid - #{replicas}" unless replicas.all? {|replica| is_ip? replica}
-            @replicas = replicas.collect {|replica| replica.to_db}
-            # ensure they were replicas of @demoted
-            @replicas.each do |replica|
-              error "#{replica} does not appear to be a valid replica of #{@demoted}" unless replica.master == @demoted
-            end
-          end
-        end
-        error 'unable to establish demoteable node' unless @demoted.kind_of? Jetpants::DB
-      end
-      def establish_replicas
-        @replicas ||= @demoted.slaves
-        error 'no replicas to promote' if @replicas.empty?
-        error 'replicas appear to be invalid' unless @replicas.all? {|replica| replica.kind_of? Jetpants::DB}
-        inform "#{@demoted} has the following replicas: #{@replicas.join(', ')}"
-      end
-      def establish_promoted
-        # user supplied node to promote
-        if @promoted and not @promoted.kind_of? Jetpants::DB
-          error "invalid ip address #{@promoted}" unless is_ip? @promoted
-          @promoted = Jetpants::DB.new @promoted
-        end
-        # user hasn't supplied a valid node to promote
-        unless @replicas.include? @promoted
-          inform "unable to promote node (#{@promoted}) that is not a replica of #{@demoted}" if @promoted
-          # recommend a node
-          puts "\nREPLICA LIST:"
-          @replicas.sort_by {|replica| replica.seconds_behind_master}.each do |node|
-            file, pos = node.repl_binlog_coordinates(false)
-            puts " * %-13s %-30s  lag: %2ds   coordinates: (%-13s, %d)" % [node.ip, node.hostname, node.seconds_behind_master, file, pos]
-          end
-          puts
-          recommended = @replicas.sort_by {|replica| replica.seconds_behind_master}.reject {|r| r.for_backups?}.first
-          agreed = agree [
-            "Would you like to promote the following replica?",
-            "#{recommended.ip} (#{recommended.hostname})",
-            "- yes/no -"
-          ].join "\n"
-          @promoted = recommended if agreed
-          # choose a new node if they disagreed with our recommendation
-          unless agreed
-            choose do |promote|
-              promote.prompt = 'Please choose a replica to promote:'
-              @replicas.each do |replica|
-                promote.choice "#{replica} - replication lag: #{replica.seconds_behind_master} seconds" do
-                  @promoted = replica
-                end
-              end
-            end
-            raise "You chose a backup slave. These are not suitable for promotion. Please try again." if @promoted.for_backups?
-          end
-        end
-        error "unable to establish node to promote" unless @promoted.kind_of? Jetpants::DB
-      end
-      def advise
-        @states = {
-          preparing:  "processing promotion requirements",
-          prepared:   "preparing to disable writes on #{@demoted}",
-          read_only:  "writes have been disabled on #{@demoted}, preparing to demote #{@demoted} and promote #{@promoted}",
-          promoted:   "#{@promoted} has been promoted, please prepare database config for deploy.",
-          deployable: "promotion is complete, please commit and deploy.",
-        }
-        inform @states[@state.to_sym]
-      end
-      state_machine :initial => :preparing do
-        after_transition any => any, :do => :advise
-        event :prepare do
-          transition :preparing => :prepared, :if => :roles_populated?
-        end
-        after_transition :preparing => :prepared, :do => :disable_writes
-        event :disable_writes do
-          transition :prepared  => :read_only, :if => :read_only!
-        end
-        after_transition :prepared => :read_only, :do => :promote
-        event :promote do
-          transition :read_only => :promoted, :if => :execute_promotion
-        end
-        after_transition :read_only => :promoted, :do => :prepare_config
-        event :prepare_config do
-          transition :promoted => :deployable, :if => :nodes_consistent?
-        end
-        after_transition :promoted => :deployable, :do => :summarize_promotion
-        state :preparing, :prepared do
-          def is_db? node
-            node.kind_of? Jetpants::DB
-          end
-          def roles_populated?
-            # ensure our roles are populated with dbs
-            [@demoted, @promoted, @replicas].all? do |role|
-              is_db? role or role.all? do |node|
-                is_db? node
-              end
-            end
-          end
-          def read_only!
-            unless @demoted.available?
-              status = @promoted.slave_status
-              @log, @position = status[:master_log_file], status[:exec_master_log_pos].to_i
-              return true
-            end
-            # set read_only if needed
-            @demoted.read_only! unless @demoted.read_only?
-            # bail if we're unable to set read_only
-            error "unable to set 'read_only' on #{@demoted}" unless @demoted.read_only?
-            # record the current log possition to ensure writes are not taking place later.
-            @log, @position = @demoted.binlog_coordinates
-            error "#{@demoted} is still taking writes, unable to promote #{@promoted}" unless writes_disabled?
-            @demoted.read_only?
-          end
-          def writes_disabled?
-            return true unless @demoted.available?
-            # ensure no writes have been logged since read_only!
-            [@log, @position] == @demoted.binlog_coordinates
-          end
-        end
-        state :read_only, :promoted, :promoted, :deployable do
-          def nodes_consistent?
-            return true unless @demoted.available?
-            @replicas.all? {|replica| replica.slave_status[:exec_master_log_pos].to_i == @position}
-          end
-          def ensure_nodes_consistent?
-            inform "ensuring replicas are in a consistent state"
-            until nodes_consistent? do
-              print '.'
-              sleep 0.5
-            end
-            nodes_consistent?
-          end
-          def promotable?
-            disable_replication if ensure_nodes_consistent? and @promoted.disable_read_only!
-          end
-          def execute_promotion
-            error 'nodes are not in a promotable state.' unless promotable?
-            error 'replicas are not in a consistent state' unless nodes_consistent?
-            @demoted.pool.master_promotion! @promoted
-          end
-          def replicas_replicating? replicas = @replicas
-            replicas.all? {|replica| replica.replicating?}
-          end
-          def disable_replication replicas = @replicas
-            replicas.each do |replica|
-              replica.pause_replication if replica.replicating?
-            end
-            not replicas_replicating? replicas
-          end
-          def summarize_promotion transition
-            summary = Terminal::Table.new :title => 'Promotion Summary:' do |rows|
-              rows << ['demoted',  @demoted]
-              rows << ['promoted', @promoted]
-              rows << ["replicas of #{@promoted}", @promoted.slaves.join(', ')]
-            end
-            puts summary
-            exit
-          end
-        end
-      end
-    end
-  end
-end