RubyGems - jetpants - Versions diffs - 0.7.0 - Mend

jetpants 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

data/Gemfile +3 -0
data/README.rdoc +88 -0
data/bin/jetpants +442 -0
data/doc/commands.rdoc +119 -0
data/doc/configuration.rdoc +27 -0
data/doc/plugins.rdoc +120 -0
data/doc/requirements.rdoc +54 -0
data/etc/jetpants.yaml.sample +58 -0
data/lib/jetpants.rb +100 -0
data/lib/jetpants/callback.rb +131 -0
data/lib/jetpants/db.rb +122 -0
data/lib/jetpants/db/client.rb +103 -0
data/lib/jetpants/db/import_export.rb +330 -0
data/lib/jetpants/db/privileges.rb +89 -0
data/lib/jetpants/db/replication.rb +226 -0
data/lib/jetpants/db/server.rb +79 -0
data/lib/jetpants/db/state.rb +212 -0
data/lib/jetpants/host.rb +396 -0
data/lib/jetpants/monkeypatch.rb +74 -0
data/lib/jetpants/pool.rb +272 -0
data/lib/jetpants/shard.rb +311 -0
data/lib/jetpants/table.rb +146 -0
data/lib/jetpants/topology.rb +144 -0
data/plugins/simple_tracker/db.rb +23 -0
data/plugins/simple_tracker/pool.rb +70 -0
data/plugins/simple_tracker/shard.rb +76 -0
data/plugins/simple_tracker/simple_tracker.rb +74 -0
data/plugins/simple_tracker/topology.rb +66 -0
data/tasks/promotion.rb +260 -0
metadata +191 -0

data/lib/jetpants/pool.rb ADDED

@@ -0,0 +1,272 @@
+require 'json'
+require 'db'
+module Jetpants
+  # a Pool represents a group of database instances (Jetpants::DB objects).
+  #
+  # The default implementation assumes that a Pool contains:
+  # * 1 master
+  # * 0 or more slaves, falling into one of these categories:
+  #   * active slaves (actively taking production read queries)
+  #   * standby slaves (for HA, promotable if a master or active slave fails + used to clone new replacements)
+  #   * backup slaves (dedicated for backups and background jobs, never put into prod, potentially different hardware spec)
+  #
+  # Plugins may of course override this extensively, to support different
+  # topologies, such as master-master trees.
+  #
+  # Many of these methods are only useful in conjunction with an asset-tracker /
+  # configuration-generator plugin
+  class Pool
+    include CallbackHandler
+    # human-readable String name of pool
+    attr_reader   :name
+    # Jetpants::DB object that is the pool's master
+    attr_reader   :master
+    # Array of strings containing other equivalent names for this pool
+    attr_reader   :aliases
+    # Can be used to store a name that refers to just the active_slaves, for
+    # instance if your framework isn't smart enough to know about master/slave
+    # relationships.  Safe to leave as nil otherwise. Has no effect in Jetpants,
+    # but an asset tracker / config generator plugin may include this in the
+    # generated config file.
+    attr_accessor :slave_name
+    # Hash mapping DB object => weight, for active (read) slaves. Default weight
+    # is 100. Safe to leave at default if your app framework doesn't support
+    # different weights for individual read slaves. Weights have no effect inside
+    # Jetpants, but any asset tracker / config generator plugin can carry them
+    # through to the config file.
+    attr_reader   :active_slave_weights
+    # If the master also receives read queries, this stores its weight. Set to 0
+    # if the master does not receive read queries (which is the default). This
+    # has no effect inside of Jetpants, but can be used by an asset tracker /
+    # config generator plugin to carry the value through to the config file.
+    attr_accessor :master_read_weight
+    def initialize(name, master)
+      @name = name
+      @slave_name = false
+      @aliases = []
+      @master = master.to_db
+      @master_read_weight = 0
+      @active_slave_weights = {}
+    end
+    # Returns all slaves, or pass in :active, :standby, or :backup to receive slaves
+    # just of a particular type
+    def slaves(type=false)
+      case type
+      when :active  then active_slaves
+      when :standby then standby_slaves
+      when :backup  then backup_slaves
+      when false    then @master.slaves
+      else []
+      end
+    end
+    # Returns an array of Jetpants::DB objects.
+    # Active slaves are ones that receive read queries from your application.
+    def active_slaves
+      @master.slaves.select {|sl| @active_slave_weights[sl]}
+    end
+    # Returns an array of Jetpants::DB objects.
+    # Standby slaves do not receive queries from your application. These are for high availability.
+    # They can be turned into active slaves or even the master, and can also be used for cloning
+    # additional slaves.
+    def standby_slaves
+      @master.slaves.reject {|sl| @active_slave_weights[sl] || sl.for_backups?}
+    end
+    # Returns an array of Jetpants::DB objects.
+    # Backup slaves are never promoted to active or master. They are for dedicated backup purposes.
+    # They may be a different/cheaper hardware spec than other slaves.
+    def backup_slaves
+      @master.slaves.reject {|sl| @active_slave_weights[sl] || !sl.for_backups?}
+    end
+    # returns a flat array of all Jetpants::DB objects in the pool: the master and
+    # all slaves of all types.
+    def nodes
+      [master, slaves].flatten
+    end
+    # Informs Jetpants that slave_db is an active slave. Potentially used by
+    # plugins, such as in Topology at start-up time.
+    def has_active_slave(slave_db, weight=100)
+      slave_db = slave_db.to_db
+      raise "Attempt to mark a DB as its own active slave" if slave_db == @master
+      @active_slave_weights[slave_db] = weight
+    end
+    # Turns a standby slave into an active slave, giving it the specified read weight.
+    # Syncs the pool's configuration afterwards. It's up to your asset tracker plugin to
+    # actually do something with this information.
+    def mark_slave_active(slave_db, weight=100)
+      raise "Attempt to make a backup slave be an active slave" if slave_db.for_backups?
+      has_active_slave slave_db, weight
+      sync_configuration
+    end
+    # Turns an active slave into a standby slave. Syncs the pool's configuration afterwards.
+    # It's up to your asset tracker plugin to actually do something with this information.
+    def mark_slave_standby(slave_db)
+      slave_db = slave_db.to_db
+      raise "Cannot call mark_slave_standby on a master" if slave_db == @master
+      @active_slave_weights.delete(slave_db)
+      sync_configuration
+    end
+    # Remove a slave from a pool entirely. This is destructive, ie, it does a
+    # RESET SLAVE on the db.
+    # Note that a plugin may want to override this (or implement after_remove_slave!)
+    # to actually sync the change to an asset tracker, depending on how the plugin
+    # implements Pool#sync_configuration. (If the implementation makes sync_configuration
+    # work by iterating over the pool's current slaves, it won't see any slaves that have
+    # been removed.)
+    def remove_slave!(slave_db)
+      raise "Slave is not in this pool" unless slave_db.pool == self
+      slave_db.disable_monitoring
+      slave_db.stop_replication
+      slave_db.repl_binlog_coordinates # displays how far we replicated, in case you need to roll back this change manually
+      slave_db.disable_replication!
+    end
+    # Informs this pool that it has an alias. A pool may have any number of aliases.
+    def add_alias(name)
+      if @aliases.include? name
+        false
+      else
+        @aliases << name
+        true
+      end
+    end
+    # Displays a summary of the pool's members. This outputs immediately instead
+    # of returning a string, so that you can invoke something like:
+    #    Jetpants.topology.pools.each &:summary
+    # to easily display a summary.
+    def summary
+      probe
+      if @aliases.count > 0
+        alias_text = '  (aliases: ' + @aliases.join(', ') + ')'
+      end
+      print "#{name}#{alias_text}  [#{master.data_set_size(true)}GB]\n"
+      print "\tmaster          = %-13s #{master.hostname}\n" % @master.ip
+      [:active, :standby, :backup].each do |type|
+        slave_list = slaves(type)
+        slave_list.each_with_index do |s, i|
+          print "\t%-7s slave #{i + 1} = %-13s #{s.hostname}\n" % [type, s.ip]
+        end
+      end
+      true
+    end
+    # Performs the last steps of the master promotion process. Do not use this
+    # as a stand-alone method; there's other necessary logic, such as setting
+    # the old master to read-only mode, and doing a STOP SLAVE on all slaves.
+    # Use the "jetpants promotion" task instead to do an interactive promotion.
+    # (In a future release, this will be refactored to be fully scriptable.)
+    def master_promotion!(promoted)
+      demoted = @master
+      raise "Promoted host is not in the right pool!" unless @master.slaves.include? promoted
+      user, password = promoted.replication_credentials.values
+      log,  position = promoted.binlog_coordinates
+      # reset slave on promoted
+      if demoted.available?
+        promoted.disable_replication!
+      else
+        promoted.mysql_root_cmd "STOP SLAVE; RESET SLAVE"
+      end
+      # gather our new replicas
+      replicas = demoted.slaves.select {|replica| replica != promoted}
+      replicas << demoted if demoted.available?
+      replicas.flatten!
+      # perform promotion
+      replicas.each do |replica|
+        replica.change_master_to promoted,
+          :user => user,
+          :password => password,
+          :log_file => log,
+          :log_pos  => position
+      end
+      # ensure our replicas are configured correctly by comparing our staged values to current values of replicas
+      promoted_replication_config = {
+        master_host: promoted.ip,
+        master_user: user,
+        master_log_file:  log,
+        exec_master_log_pos: position.to_s
+      }
+      replicas.each do |r|
+        promoted_replication_config.each do |option, value|
+          raise "Unexpected slave status value for #{option} in replica #{r} after promotion" unless r.slave_status[option] == value
+        end
+        r.resume_replication unless r.replicating?
+      end
+      # Update the pool
+      # Note: if the demoted machine is offline, plugin may need to implement an
+      # after_master_promotion! method which handles this case in configuration tracker
+      @active_slave_weights.delete promoted # if promoting an active slave, remove it from read pool
+      @master = promoted
+      sync_configuration
+      Jetpants.topology.write_config
+      replicas.all? {|r| r.replicating?}
+    end
+    # Informs your asset tracker about any changes in the pool's state or members.
+    # Plugins should override this, or use before_sync_configuration / after_sync_configuration
+    # callbacks, to provide an implementation of this method.
+    def sync_configuration
+    end
+    # Callback to ensure that a sync'ed pool is already in Topology.pools
+    def before_sync_configuration
+      unless Jetpants.topology.pools.include? self
+        Jetpants.topology.pools << self
+      end
+    end
+    # Returns the name of the pool.
+    def to_s
+      @name
+    end
+    # Displays the provided output, along with information about the current time,
+    # and self (the name of this Pool)
+    def output(str)
+      str = str.to_s.strip
+      str = nil if str && str.length == 0
+      str ||= "Completed (no output)"
+      output = Time.now.strftime("%H:%M:%S") + " [#{self}] "
+      output << str
+      print output + "\n"
+      output
+    end
+    # Jetpants::Pool proxies missing methods to the pool's @master Jetpants::DB instance.
+    def method_missing(name, *args, &block)
+      if @master.respond_to? name
+        @master.send name, *args, &block
+      else
+        super
+      end
+    end
+    def respond_to?(name, include_private=false)
+      super || @master.respond_to?(name)
+    end
+  end
+end

data/lib/jetpants/shard.rb ADDED

@@ -0,0 +1,311 @@
+require 'json'
+require 'db'
+require 'table'
+require 'pool'
+module Jetpants
+  # a Shard in Jetpants is a range-based Pool.  All Shards have the exact same
+  # set of tables, just they only contain data that falls within within their
+  # range.
+  class Shard < Pool
+    include CallbackHandler
+    # min ID for this shard
+    attr_reader :min_id
+    # max ID for this shard, or string "INFINITY"
+    attr_reader :max_id
+    # if this shard is being split, this is an array of "child" Shard objects.
+    attr_reader :children
+    # if this shard is a child of one being split, this links back to the parent Shard.
+    attr_accessor :parent
+    # A symbol representing the shard's state. Possible state values include:
+    #   :ready          --  Normal shard, online / in production, optimal codition, no current operation/maintenance.
+    #   :read_only      --  Online / in production but not currently writable due to maintenance or emergency.
+    #   :offline        --  In production but not current readable or writable due to maintenance or emergency.
+    #   :initializing   --  New child shard, being created, not in production.
+    #   :exporting      --  Child shard that is exporting its portion of the data set. Shard not in production yet.
+    #   :importing      --  Child shard that is importing its portion of the data set. Shard not in production yet.
+    #   :replicating    --  Child shard that is being cloned to new replicas. Shard not in production yet.
+    #   :child          --  Child shard that is in production for reads, but still slaving from its parent for writes.
+    #   :needs_cleanup  --  Child shard that is fully in production, but parent replication not torn down yet, and redundant data (from wrong range) not removed yet
+    #   :deprecated     --  Parent shard that has been split but children are still in :child or :needs_cleanup state. Shard may still be in production for writes.
+    #   :recycle        --  Parent shard that has been split and children are now in the :ready state. Shard no longer in production.
+    attr_accessor :state
+    # Constructor for Shard --
+    # * min_id: int
+    # * max_id: int or the string "INFINITY"
+    # * master: string (IP address) or a Jetpants::DB object
+    # * state:  one of the above state symbols
+    def initialize(min_id, max_id, master, state=:ready)
+      @min_id = min_id
+      @max_id = max_id
+      @state = state
+      @children = []    # array of shards being initialized by splitting this one
+      @parent = nil
+      super(generate_name, master)
+    end
+    # Generates a string containing the shard's min and max IDs. Plugin may want to override.
+    def generate_name
+      "shard-#{min_id}-#{max_id.to_s.downcase}"
+    end
+    # Returns true if the shard state is one of the values that indicates it's
+    # a live / in-production shard. These states include :ready, :child,
+    # :needs_cleanup, :read_only, and :offline.
+    def in_config?
+      [:ready, :child, :needs_cleanup, :read_only, :offline].include? @state
+    end
+    # In default Jetpants, we assume each Shard has 1 master and N standby slaves;
+    # we never have active (read) slaves for shards. So calling mark_slave_active
+    # on a Shard generates an exception. Plugins may override this behavior, which
+    # may be necessary for sites spanning two or more active data centers.
+    def mark_slave_active(slave_db, weight=100)
+      raise "Shards do not support active slaves"
+    end
+    # Returns an empty array, because we assume that shard pools have no active
+    # slaves. (If your read volume would require active slaves, think about
+    # splitting your shard instead...)
+    #
+    # Plugins may of course override this behavior.
+    def active_slaves
+     []
+    end
+    # Returns the master's standby slaves, ignoring any child shards since they
+    # are a special case of slaves.
+    def standby_slaves
+      result = super
+      if @children.count > 0
+        is_child_master = {}
+        @children.each {|c| is_child_master[c.master] = true}
+        result.reject {|sl| is_child_master[sl]}
+      else
+        result
+      end
+    end
+    # Returns the Jetpants::DB object corresponding to the requested access
+    # mode (either :read or :write).  Ordinarily this will be the shard's
+    # @master, unless this shard is still a child, in which case we send
+    # writes the the shard's parent's master instead.
+    def db(mode=:read)
+      (mode.to_sym == :write && @parent ? @parent.master : master)
+    end
+    # Adds a Jetpants::Shard to this shard's array of children, and sets
+    # the child's parent to be self.
+    def add_child(shard)
+      raise "Shard #{shard} already has a parent!" if shard.parent
+      @children << shard
+      shard.parent = self
+    end
+    # Removes a Jetpants::Shard from this shard's array of children, and sets
+    # the child's parent to nil.
+    def remove_child(shard)
+      raise "Shard #{shard} isn't a child of this shard!" unless shard.parent == self
+      @children.delete shard
+      shard.parent = nil
+    end
+    # Creates and returns <count> child shards, pulling boxes for masters from spare list.
+    # You can optionally supply the ID ranges to use: pass in an array of arrays,
+    # where the outer array is of size <count> and each inner array is [min_id, max_id].
+    # If you omit id_ranges, the parent's ID range will be divided evenly amongst the
+    # children automatically.
+    def init_children(count, id_ranges=false)
+      # Make sure we have enough machines in spare pool
+      raise "Not enough master role machines in spare pool!" if count > Jetpants.topology.count_spares(role: 'master')
+      raise "Not enough standby_slave role machines in spare pool!" if count * Jetpants.standby_slaves_per_pool > Jetpants.topology.count_spares(role: 'standby_slave')
+      # Make sure enough slaves of shard being split
+      raise "Must have at least #{Jetpants.standby_slaves_per_pool} slaves of shard being split" if master.slaves.count < Jetpants.standby_slaves_per_pool
+      # Make sure right number of id_ranges were supplied, if any were
+      raise "Wrong number of id_ranges supplied" if id_ranges && id_ranges.count != count
+      unless id_ranges
+        id_ranges = []
+        ids_total = 1 + @max_id - @min_id
+        current_min_id = @min_id
+        count.times do |i|
+          ids_this_pool = (ids_total / count).floor
+          ids_this_pool += 1 if i < (ids_total % count)
+          id_ranges << [current_min_id, current_min_id + ids_this_pool - 1]
+          current_min_id += ids_this_pool
+        end
+      end
+      count.times do |i|
+        spare = Jetpants.topology.claim_spare(role: 'master')
+        spare.output "Using ID range of #{id_ranges[i][0]} to #{id_ranges[i][1]} (inclusive)"
+        s = Shard.new(id_ranges[i][0], id_ranges[i][1], spare, :initializing)
+        add_child(s)
+        Jetpants.topology.pools << s
+        s.sync_configuration
+      end
+      @children
+    end
+    # Splits a shard into <pieces> child shards.  The children will still be slaving
+    # from the parent after this point; you need to do additional things to fully
+    # complete the shard split.  See the command suite tasks shard_split_move_reads,
+    # shard_split_move_writes, and shard_split_cleanup.
+    def split!(pieces=2)
+      raise "Cannot split a shard that is still a child!" if @parent
+      init_children(pieces) unless @children.count > 0
+      @children.concurrent_each {|c| c.stop_query_killer; c.disable_binary_logging}
+      clone_to_children!
+      @children.concurrent_each {|c| c.rebuild!}
+      @children.each {|c| c.sync_configuration}
+      @state = :deprecated
+      sync_configuration
+      output "Initial split complete."
+    end
+    # Transitions the shard's children into the :needs_cleanup state. It is the
+    # responsibility of an asset tracker plugin / config generator to implement
+    # config generation in a way that actually makes writes go to shards
+    # in the :needs_cleanup state.
+    def move_writes_to_children
+      @children.each do |c|
+        c.state = :needs_cleanup
+        c.sync_configuration
+      end
+    end
+    # Clones the current shard to its children.  Uses a standby slave of self as
+    # the source for copying.
+    def clone_to_children!
+      # Figure out which slave(s) we can use for populating the new masters
+      sources = standby_slaves.dup
+      sources.shift
+      raise "Need to have at least 1 slave in order to create additional slaves" if sources.length < 1
+      # Figure out which machines we need to turn into slaves
+      targets = []
+      @children.each do |child_shard|
+        if child_shard.master.is_slave? && child_shard.master.master != @master
+          raise "Child shard master #{child_shard.master} is already a slave of another pool"
+        elsif child_shard.master.is_slave?
+          child_shard.output "Already slaving from parent shard master"
+          child_shard.restart_mysql # to make previous disable of binary logging take effect
+        else
+          targets << child_shard.master
+        end
+      end
+      while targets.count > 0 do
+        chain_length = (targets.count.to_f / sources.count.to_f).ceil
+        chain_length = 3 if chain_length > 3 # For sanity's sake, we only allow a copy pipeline that populates 3 instances at once.
+        sources.concurrent_each_with_index do |src, idx|
+          my_targets = targets[idx * chain_length, chain_length]
+          src.enslave_siblings! my_targets
+          chain_length.times {|n| targets[(idx * chain_length) + n] = nil}
+        end
+        targets.compact!
+      end
+    end
+    # Exports data that should stay on this shard, drops and re-creates tables,
+    # re-imports the data, and then adds slaves to the shard pool as needed.
+    # The optional stage param lets you skip some steps, but this is only really
+    # useful if you're running this manually and it failed part-way.
+    def rebuild!(stage=0)
+      # Sanity check
+      raise "Cannot rebuild a shard that isn't still slaving from another shard" unless @master.is_slave?
+      raise "Cannot rebuild an active shard" if in_config?
+      tables = Table.from_config 'sharded_tables'
+      if stage <= 1
+        raise "Shard is not in the expected initializing or exporting states" unless [:initializing, :exporting].include? @state
+        @state = :exporting
+        sync_configuration
+        export_schemata tables
+        export_data tables, @min_id, @max_id
+      end
+      if stage <= 2
+        raise "Shard is not in the expected exporting or importing states" unless [:exporting, :importing].include? @state
+        @state = :importing
+        sync_configuration
+        import_schemata!
+        alter_schemata if respond_to? :alter_schemata
+        import_data tables, @min_id, @max_id
+        start_query_killer
+      end
+      if stage <= 3
+        raise "Shard is not in the expected importing or replicating states" unless [:importing, :replicating].include? @state
+        enable_binary_logging
+        restart_mysql
+        @state = :replicating
+        sync_configuration
+        my_slaves = Jetpants.topology.claim_spares(Jetpants.standby_slaves_per_pool, role: 'standby_slave')
+        enslave!(my_slaves)
+        my_slaves.each {|slv| slv.resume_replication}
+        [self, my_slaves].flatten.each {|db| db.catch_up_to_master}
+      end
+      @state = :child
+    end
+    # Run this on a parent shard after the rest of a shard split is complete.
+    # Sets this shard's master to read-only; removes the application user from
+    # self (without replicating this change to children); disables replication
+    # between the parent and the children; and then removes rows from the
+    # children that replicated to the wrong shard.
+    def cleanup!
+      raise "Can only run cleanup! on a parent shard in the deprecated state" unless @state == :deprecated
+      raise "Cannot call cleanup! on a child shard" if @parent
+      tables = Table.from_config 'sharded_tables'
+      @master.revoke_all_access!
+      @children.concurrent_each do |child_shard|
+        raise "Child state does not indicate cleanup is needed" unless child_shard.state == :needs_cleanup
+        raise "Child shard master should be a slave in order to clean up" unless child_shard.is_slave?
+        child_shard.master.disable_replication! # stop slaving from parent
+        child_shard.prune_data_to_range tables, child_shard.min_id, child_shard.max_id
+      end
+      # We have to iterate over a copy of the @children array, rather than the array
+      # directly, since Array#each skips elements when you remove elements in-place,
+      # which Shard#remove_child does...
+      @children.dup.each do |child_shard|
+        child_shard.state = :ready
+        remove_child child_shard
+        child_shard.sync_configuration
+      end
+      @state = :recycle
+      sync_configuration
+    end
+    # Displays information about the shard
+    def summary(with_children=true)
+      super()
+      if with_children
+        children.each {|c| c.summary}
+      end
+      true
+    end
+  end
+end