RubyGems - jetpants - Versions diffs - 0.8.0 → 0.8.2 - Mend

jetpants 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

checksums.yaml +7 -0
data/README.rdoc +4 -9
data/bin/jetpants +7 -6
data/doc/capacity_plan.rdoc +77 -0
data/doc/commands.rdoc +1 -1
data/doc/jetpants_collins.rdoc +2 -1
data/doc/online_schema_change.rdoc +45 -0
data/doc/plugins.rdoc +7 -1
data/doc/requirements.rdoc +1 -1
data/doc/upgrade_helper.rdoc +68 -0
data/lib/jetpants/db/client.rb +2 -1
data/lib/jetpants/db/import_export.rb +12 -3
data/lib/jetpants/db/replication.rb +6 -2
data/lib/jetpants/db/schema.rb +40 -0
data/lib/jetpants/db/server.rb +2 -2
data/lib/jetpants/host.rb +12 -1
data/lib/jetpants/pool.rb +41 -0
data/lib/jetpants/shard.rb +201 -124
data/lib/jetpants/table.rb +80 -10
data/plugins/capacity_plan/capacity_plan.rb +353 -0
data/plugins/capacity_plan/commandsuite.rb +19 -0
data/plugins/capacity_plan/monkeypatch.rb +20 -0
data/plugins/jetpants_collins/db.rb +45 -6
data/plugins/jetpants_collins/jetpants_collins.rb +32 -21
data/plugins/jetpants_collins/pool.rb +22 -1
data/plugins/jetpants_collins/shard.rb +9 -2
data/plugins/jetpants_collins/topology.rb +8 -9
data/plugins/online_schema_change/commandsuite.rb +56 -0
data/plugins/online_schema_change/db.rb +33 -0
data/plugins/online_schema_change/online_schema_change.rb +5 -0
data/plugins/online_schema_change/pool.rb +105 -0
data/plugins/online_schema_change/topology.rb +56 -0
data/plugins/simple_tracker/shard.rb +1 -1
data/plugins/upgrade_helper/commandsuite.rb +212 -0
data/plugins/upgrade_helper/db.rb +78 -0
data/plugins/upgrade_helper/host.rb +22 -0
data/plugins/upgrade_helper/pool.rb +259 -0
data/plugins/upgrade_helper/shard.rb +61 -0
data/plugins/upgrade_helper/upgrade_helper.rb +21 -0
data/scripts/global_rowcount.rb +75 -0
metadata +28 -15

data/lib/jetpants/table.rb CHANGED

@@ -41,24 +41,86 @@ module Jetpants
     # the default of 1 (meaning no chunking). For tables with hundreds of millions
     # of rows, you may want to do exports/imports in a few hundred chunks to speed
     # things up and keep the transactions smaller.
-    attr_reader :chunks
+    attr_accessor :chunks
-    # Create a Table. Params should have string keys, not symbols. Possible keys include
-    # 'sharding_key' (or equivalently 'primary_key'), 'chunks', and 'order_by'.
+    # The SQL statement read from the DB via SHOW CREATE TABLE
+    attr_reader :create_table_sql
+    # The primary key of the table, returns an array on a multi-
+    # column PK
+    attr_reader :primary_key
+    # A list of indexes mapped to the columns in them
+    attr_reader :indexes
+    # A list of the table column names
+    attr_reader :columns
+    # Pool object this Table is related to
+    attr_reader :pool
+    # Create a Table. Possible keys include 'sharding_key', 'chunks', 'order_by',
+    # 'create_table', 'pool', 'indexes', and anything else handled by plugins
     def initialize(name, params={})
       @name = name
       parse_params(params)
     end
     def parse_params(params = {})
-      params['sharding_key'] ||= params['primary_keys'] || params['primary_key'] || 'user_id'
-      @sharding_keys = (params['sharding_key'].is_a?(Array) ? params['sharding_key'] : [params['sharding_key']])
+      # Convert symbols to strings
+      params.keys.select {|k| k.is_a? Symbol}.each do |symbol_key|
+        params[symbol_key.to_s] = params[symbol_key]
+        params.delete symbol_key
+      end
+      # accept singular or plural for some params
+      params['sharding_key'] ||= params['sharding_keys']
+      params['primary_key']  ||= params['primary_keys']
+      @sharding_keys = (params['sharding_key'].is_a?(Array) ? params['sharding_key'] : [params['sharding_key']]) if params['sharding_key']
+      @sharding_keys ||= []
+      @primary_key = params['primary_key']
       @chunks = params['chunks'] || 1
       @order_by = params['order_by']
+      @create_table_sql = params['create_table'] || params['create_table_sql']
+      @pool = params['pool']
+      @indexes = params['indexes']
+      @columns = params['columns']
     end
+    # Returns the current maximum primary key value, returns
+    # the values of the record when ordered by the key fields
+    # in order, descending on a multi-value PK
+    def max_pk_val_query
+      if @primary_key.is_a?(Array)
+        pk_str = @primary_key.join(",")
+        pk_ordering = @primary_key.map{|key| "#{key} DESC"}.join(',')
+        sql = "SELECT #{pk_str} FROM #{@name} ORDER BY #{pk_ordering} LIMIT 1"
+      else
+        sql = "SELECT MAX(#{@primary_key}) FROM #{@name}"
+      end
+      return sql
+    end
+    # Returns the first column of the primary key, or nil if there isn't one
+    def first_pk_col
+      if @primary_key.is_a? Array
+        @primary_key.first
+      else
+        @primary_key
+      end
+    end
+    # Returns true if the table is associated with the supplied pool
+    def belongs_to?(pool)
+      return @pool == pool
+    end
     # Return an array of Table objects based on the contents of Jetpants' config file entry
     # of the given label.
+    # TODO: integrate better with table schema detection code. Consider auto-detecting chunk
+    # count based on file size and row count estimate.
     def Table.from_config(label)
       result = []
       Jetpants.send(label).map {|name, attributes| Table.new name, attributes}
@@ -127,13 +189,21 @@ module Jetpants
       return sql
     end
-    # Counts number of rows between the given ID ranges.  Warning: will give
-    # potentially misleading counts on multi-sharding-key tables.
+    # Returns SQL to counts number of rows between the given ID ranges.
+    # Warning: will give potentially misleading counts on multi-sharding-key tables.
     def sql_count_rows(min_id, max_id)
-      sql = "SELECT COUNT(*) FROM #{@name} WHERE "
+      sql = "SELECT COUNT(*) FROM #{@name}"
+      return sql unless min_id && max_id
       wheres = []
-      @sharding_keys.each {|col| wheres << "(#{col} >= #{min_id} AND #{col} <= #{max_id})"}
-      sql << wheres.join(" OR ")
+      if @sharding_keys.size > 0
+        @sharding_keys.each {|col| wheres << "(#{col} >= #{min_id} AND #{col} <= #{max_id})"}
+        sql << ' WHERE ' + wheres.join(" OR ")
+      elsif first_pk_col
+        sql << " WHERE #{first_pk_col} >= #{min_id} AND #{first_pk_col} <= #{max_id}"
+      end
+      sql
     end
     # Returns a file path (as a String) for the export dumpfile of the given ID range.

data/plugins/capacity_plan/capacity_plan.rb ADDED

@@ -0,0 +1,353 @@
+require 'capacity_plan/commandsuite'
+require 'json'
+require 'pony'
+require 'capacity_plan/monkeypatch'
+module Jetpants
+  module Plugin
+    class Capacity
+      @@db
+      # set the db and connect
+      def initialize
+        @@db = Jetpants.topology.pool(Jetpants.plugins['capacity_plan']['pool_name']).master
+        @@db.connect(user: Jetpants.plugins['capacity_plan']['user'], schema: Jetpants.plugins['capacity_plan']['schema'], pass: Jetpants.plugins['capacity_plan']['pass'])
+      end
+      ## grab snapshot of data and store it in mysql
+      def snapshot
+        storage_sizes = {}
+        timestamp = Time.now.to_i
+        current_sizes_storage = current_sizes
+        all_mounts.each do |key, value|
+          storage_sizes[key] = value
+          storage_sizes[key]['db_sizes'] = current_sizes_storage[key]
+        end
+        store_data(storage_sizes, timestamp)
+      end
+      ## generate the capacity plan and if email is true also send it to the email address listed
+      def plan(email=false)
+        history = get_history
+        mount_stats_storage = all_mounts
+        now = Time.now.to_i
+        output = ''
+        if Jetpants.topology.respond_to? :capacity_plan_notices
+          output += "\n\n________________________________________________________________________________________________________\n"
+          output += "Notices\n\n"
+          output += Jetpants.topology.capacity_plan_notices
+        end
+        criticals = []
+        warnings = []
+        ## check to see if any mounts are currently over the usage points
+        mount_stats_storage.each do |key, value|
+          if value['used'].to_f/value['total'].to_f > Jetpants.plugins['capacity_plan']['critical_mount']
+            criticals << key
+          elsif value['used'].to_f/value['total'].to_f > Jetpants.plugins['capacity_plan']['warning_mount']
+            warnings << key
+          end
+        end
+        if criticals.count > 0
+          output += "\n\n________________________________________________________________________________________________________\n"
+          output += "Critical Mounts\n\n"
+          criticals.each do |mount|
+            output += mount + "\n"
+          end
+        end
+        if warnings.count > 0
+          output += "\n\n________________________________________________________________________________________________________\n"
+          output += "Warning Mounts\n\n"
+          warnings.each do |mount|
+            output += mount + "\n"
+          end
+        end
+        output += "\n\n________________________________________________________________________________________________________\n"
+        output += "Usage and Time Left\n"
+        output += " --------- The 'GB per day' and 'Days left' fields are using a growth rate that is calulated by taking \n --------- a exponically decaying avg\n\n"
+        ##get segments for 24 hour blocks
+        segments = segmentify(history, 60 * 60 * 24)
+        output += "%30s %20s %10s %10s %16s\n" % ["pool name","Current Data Size","GB per day","Days left","(until critical)"]
+        output += "%30s %20s %10s %10s\n" % ["---------","-----------------","----------","---------"]
+        mount_stats_storage.each do |name, temp|
+          growth_rate = false
+          segments[name].each do |range, value|
+            growth_rate = calc_avg(growth_rate || value, value)
+          end
+          critical = mount_stats_storage[name]['total'].to_f * Jetpants.plugins['capacity_plan']['critical_mount']
+          if (per_day(bytes_to_gb(growth_rate))) <= 0 || ((critical - mount_stats_storage[name]['used'].to_f)/ per_day(growth_rate)) > 999
+            output += "%30s %20.2f %10.2f %10s\n" % [name, bytes_to_gb(mount_stats_storage[name]['used'].to_f), (per_day(bytes_to_gb(growth_rate+0))), 'N/A']
+          else
+            output += "%30s %20.2f %10.2f %10.2f\n" % [name, bytes_to_gb(mount_stats_storage[name]['used'].to_f), (per_day(bytes_to_gb(growth_rate+0))),((critical - mount_stats_storage[name]['used'].to_f)/ per_day(growth_rate))]
+          end
+        end
+        output += "\n\n________________________________________________________________________________________________________\nDay Over Day\n\n"
+        output += "%30s %10s %10s %10s %10s %11s\n" % ["pool name", "today", "1 day ago", "2 days ago", "7 days ago", "14 days ago"]
+        output += "%30s %10s %10s %10s %10s %11s\n" % ["---------", "-----", "---------", "----------", "----------", "-----------"]
+        mount_stats_storage.each do |name, temp|
+          out_array = []
+          segments[name].each do |range, value|
+            out_array << per_day(bytes_to_gb(value))+0
+          end
+          output += "%30s %10s %10s %10s %10s %11s\n" % [name, (out_array.reverse[0] ? "%.2f" % out_array.reverse[0] : 'N/A'), (out_array.reverse[1] ? "%.2f" % out_array.reverse[1] : 'N/A'), (out_array.reverse[2] ? "%.2f" % out_array.reverse[2] : 'N/A'), (out_array.reverse[7] ? "%.2f" % out_array.reverse[7] : 'N/A'), (out_array.reverse[14] ? "%.2f" % out_array.reverse[14] : 'N/A')]
+        end
+        output += outliers
+        collins_results = get_hardware_stats
+        output += collins_results
+        puts output
+        html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><pre style="font-size=20px;">' + output + '</pre></body></html>'
+        if email
+          Pony.mail(:to => email, :from => 'jetpants', :subject => 'Jetpants Capacity Plan - '+Time.now.strftime("%m/%d/%Y %H:%M:%S"), :html_body => html)
+        end
+      end
+      def bytes_to_gb(size)
+        size.to_f / 1024.0 / 1049000.0
+      end
+      def bytes_to_mb(size)
+        size.to_f / 1024.0 / 1024.0
+      end
+      def per_day(size)
+        size * 60 * 60 * 24
+      end
+      def per_week(size)
+        size * 60 * 60 * 24 * 7
+      end
+      def per_month(size)
+        size * 60 * 60 * 24 * 30
+      end
+      #use a exponically decaying avg unless there is a count then use a cummulative moving avg
+      def calc_avg(avg, new_value, count=false)
+        unless count
+          (new_value * 0.5) + (avg * (1.0 - 0.5))
+        else
+          avg + ((new_value - avg) / count)
+        end
+      end
+      ## grab the current sizes from actuall data set size including logs (in bytes)
+      def current_sizes
+        pool_sizes = {}
+        Jetpants.pools.each do |p|
+          pool_sizes[p.name] = p.data_set_size
+        end
+        pool_sizes
+      end
+      ## get all mount's data in kilobytes
+      def all_mounts
+        mount_stats = {}
+        Jetpants.pools.each do |p|
+          mount_stats[p.name] ||= p.mount_stats
+        end
+        mount_stats
+      end
+      ## loop through data and enter it in mysql
+      def store_data(mount_data,timestamp)
+        mount_data.each do |key, value|
+          @@db.query('INSERT INTO storage (`timestamp`, `pool`, `total`, `used`, `available`, `db_sizes`) VALUES ( ? , ? , ? , ? , ? , ? )', timestamp.to_s, key, value['total'].to_s, value['used'].to_s, value['available'].to_s, value['db_sizes'].to_s)
+        end
+      end
+      ## get history from mysql of all data right now
+      def get_history
+        history = {}
+        @@db.query_return_array('select timestamp, pool, total, used, available, db_sizes from storage order by id').each do |row|
+          history[row[:pool]] ||= {}
+          history[row[:pool]][row[:timestamp]] ||= {}
+          history[row[:pool]][row[:timestamp]]['total'] = row[:total]
+          history[row[:pool]][row[:timestamp]]['used'] = row[:used]
+          history[row[:pool]][row[:timestamp]]['available'] = row[:available]
+          history[row[:pool]][row[:timestamp]]['db_sizes'] = row[:db_sizes]
+        end
+        history
+      end
+      ## segment out groups to a given time period
+      def segmentify(hash, timeperiod)
+        new_hash = {}
+        hash.each do |name, temp|
+          before_timestamp = false
+          keeper = []
+          last_timestamp = nil
+          last_value = nil
+          hash[name].sort.each do |timestamp, value|
+            new_hash[name] ||= {}
+            last_timestamp = timestamp
+            last_value = value
+            unless before_timestamp && timestamp > (timeperiod - 60 ) + before_timestamp
+              unless before_timestamp
+                before_timestamp = timestamp
+              end
+              keeper << value
+            else
+              new_hash[name][before_timestamp.to_s+"-"+timestamp.to_s] = (keeper[0]['used'].to_f - value['used'].to_f )/(before_timestamp.to_f - timestamp.to_f)
+              before_timestamp = timestamp
+              keeper = []
+              keeper << value
+            end
+          end
+          if keeper.length > 1
+              new_hash[name][before_timestamp.to_s+"-"+last_timestamp.to_s] = (keeper[0]['used'].to_f - last_value['used'].to_f )/(before_timestamp.to_f - last_timestamp.to_f)
+          end
+        end
+        new_hash
+      end
+      # get a hash of machines to display at then end of the email
+      # you need to have a method in Jetpants.topology.machine_status_counts to get
+      # your machine types and states
+      def get_hardware_stats
+        #see if function exists
+        return '' unless Jetpants.topology.respond_to? :machine_status_counts
+        data = Jetpants.topology.machine_status_counts
+        output = ''
+        output += "\n________________________________________________________________________________________________________\n"
+        output += "Hardware status\n\n"
+        headers = ['status'].concat(data.first[1].keys).concat(['total'])
+        output += (headers.map { |i| "%20s"}.join(" ")+"\n") % headers
+        output += (headers.map { |i| "%20s"}.join(" ")+"\n") % headers.map { |i| '------------------'}
+        data.each do |key, status|
+          unless key == 'unallocated'
+            total = 0
+            status.each do |nodeclass, value|
+              total += value.to_i
+            end
+            output += (headers.map { |i| "%20s"}.join(" ")+"\n") % [key].concat(status.values).concat([total])
+          end
+        end
+        output += "\nTotal Unallocated nodes - " + data['unallocated'] + "\n\n"
+        output
+      end
+      # figure out the outliers for the last 3 days
+      def outliers
+        output = ''
+        output += "\n________________________________________________________________________________________________________\n"
+        output += "New Outliers\n"
+        output += "--Compare the last 3 days in 2 hour blocks to the same 2 hour block 7, 14, 21, 28 days ago\n\n"
+        output += "%30s %25s %25s %10s %11s\n" % ['Pool Name', 'Start Time', 'End Time', 'Usage', 'Prev Weeks']
+        output += "%30s %25s %25s %10s %11s\n" % ['---------', '----------', '--------', '-----', '----------']
+        block_sizes = 60 * 60 * 2 + 120
+        days_from = [7,14,21,28]
+        Jetpants.pools.each do |p|
+          start_time = Time.now.to_i - 3 * 24 * 60 * 60
+          counter = 0
+          counter_time = 0
+          output_buffer = ''
+          last_per = nil
+          name = p.name
+          while start_time + (60 * 62) < Time.now.to_i
+            temp_array = []
+            from_blocks = {}
+            from_per = {}
+            now_block = get_history_block(name, start_time, start_time + block_sizes)
+            unless now_block.count == 0
+              now_per = (now_block.first[1]['used'].to_f - now_block.values.last['used'].to_f)/(now_block.first[0].to_f - now_block.keys.last.to_f)
+              days_from.each do |days|
+                temp = get_history_block(name, start_time - (days * 24 * 60 * 60), start_time - (days * 24 * 60 * 60) + block_sizes)
+                if temp.count >= 2
+                  from_blocks[days] = temp
+                  from_per[days] = (from_blocks[days].first[1]['used'].to_f - from_blocks[days].values.last['used'].to_f)/(from_blocks[days].first[0].to_f - from_blocks[days].keys.last.to_f)
+                end
+              end
+              # remove outliers from compare array because we only care about current outliers not old outliers
+              from_per.each do |day, value|
+                if(value > from_per.values.mean * 5.0 || value < from_per.values.mean * -5.0)
+                  from_per.delete(day)
+                end
+              end
+              if from_per.count > 0
+                if((now_per > (from_per.values.mean * 2.2) && from_per.values.mean != 0) || (from_per.values.mean == 0 && now_per > 1048576))
+                  if counter == 0
+                    counter_time = start_time
+                  end
+                  counter += 1
+                  if counter > 3
+                    output_buffer = "%30s %25s %25s %10.2f %11.2f\n" % [name, Time.at(counter_time.to_i).strftime("%m/%d/%Y %H:%M:%S"), Time.at(start_time + block_sizes).strftime("%m/%d/%Y %H:%M:%S"), per_day(bytes_to_gb(now_per)), per_day(bytes_to_gb(from_per.values.mean))]
+                  end
+                else
+                  counter = 0
+                  unless output_buffer == ''
+                    output += output_buffer
+                    output_buffer = ''
+                  end
+                end
+                if((now_per > (from_per.values.mean * 5.0) && from_per.values.mean != 0) || (from_per.values.mean == 0 && now_per > 1048576))
+                  output += "%30s %25s %25s %10.2f %11.2f\n" % [name, Time.at(start_time).strftime("%m/%d/%Y %H:%M:%S"), Time.at(start_time + block_sizes).strftime("%m/%d/%Y %H:%M:%S"), per_day(bytes_to_gb(now_per)), per_day(bytes_to_gb(from_per.values.mean))]
+                end
+              end # end if hash has values
+            end
+            start_time += block_sizes - 120
+          end # end while loop for last 3 days
+          output_buffer = ''
+          counter = 0
+          counter_time = 0
+        end
+        output
+      end
+      ## get history from mysql of all data right now
+      def get_history_block(pool,time_start,time_stop)
+        history = {}
+        @@db.query_return_array('select timestamp, pool, total, used, available, db_sizes from storage where pool = ? and timestamp >= ? and timestamp <= ? order by id', pool, time_start, time_stop).each do |row|
+          history[row[:timestamp]] ||= {}
+          history[row[:timestamp]]['total'] = row[:total]
+          history[row[:timestamp]]['used'] = row[:used]
+          history[row[:timestamp]]['available'] = row[:available]
+          history[row[:timestamp]]['db_sizes'] = row[:db_sizes]
+        end
+        history
+      end
+    end
+  end
+end