RubyGems - nose-cli - Versions diffs - 0.1.0pre - Mend

nose-cli 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +7 -0
data/bin/nose +26 -0
data/bin/random_rubis +105 -0
data/bin/restart-cassandra.sh +20 -0
data/bin/run-experiments.sh +61 -0
data/data/nose-cli/nose.yml.example +32 -0
data/lib/nose_cli.rb +364 -0
data/lib/nose_cli/analyze.rb +94 -0
data/lib/nose_cli/benchmark.rb +145 -0
data/lib/nose_cli/collect_results.rb +55 -0
data/lib/nose_cli/console.rb +50 -0
data/lib/nose_cli/create.rb +35 -0
data/lib/nose_cli/diff_plans.rb +39 -0
data/lib/nose_cli/dump.rb +67 -0
data/lib/nose_cli/execute.rb +241 -0
data/lib/nose_cli/export.rb +39 -0
data/lib/nose_cli/genworkload.rb +24 -0
data/lib/nose_cli/graph.rb +24 -0
data/lib/nose_cli/load.rb +44 -0
data/lib/nose_cli/measurements.rb +36 -0
data/lib/nose_cli/plan_schema.rb +84 -0
data/lib/nose_cli/proxy.rb +32 -0
data/lib/nose_cli/random_plans.rb +82 -0
data/lib/nose_cli/recost.rb +45 -0
data/lib/nose_cli/reformat.rb +22 -0
data/lib/nose_cli/repl.rb +144 -0
data/lib/nose_cli/search.rb +77 -0
data/lib/nose_cli/search_all.rb +120 -0
data/lib/nose_cli/search_bench.rb +52 -0
data/lib/nose_cli/shared_options.rb +30 -0
data/lib/nose_cli/texify.rb +141 -0
data/lib/nose_cli/why.rb +70 -0
data/templates/completions.erb +56 -0
data/templates/man.erb +33 -0
data/templates/report.erb +138 -0
data/templates/subman.erb +19 -0
metadata +345 -0

data/lib/nose_cli/execute.rb ADDED Viewed

@@ -0,0 +1,241 @@
+# frozen_string_literal: true
+require 'table_print'
+module NoSE
+  module CLI
+    # Run performance tests on plans for a particular schema
+    class NoSECLI < Thor
+      desc 'execute PLANS', 'test performance of the named PLANS'
+      long_desc <<-LONGDESC
+        `nose execute` is similar to `nose benchmark`. It will take manually
+        defined plans with the given name stored in the `plans` subdirectory,
+        execute each statement, and output a summary of the execution times.
+        Before runnng execute, `nose create` and `nose load` must be used to
+        prepare the target database.
+      LONGDESC
+      shared_option :mix
+      option :num_iterations, type: :numeric, default: 100,
+                              banner: 'the number of times to execute each ' \
+                                      'statement'
+      option :repeat, type: :numeric, default: 1,
+                      banner: 'how many times to repeat the benchmark'
+      option :group, type: :string, default: nil, aliases: '-g',
+                     banner: 'restrict the benchmark to statements in the ' \
+                             'given group'
+      option :fail_on_empty, type: :boolean, default: true,
+                             banner: 'abort if a column family is empty'
+      option :totals, type: :boolean, default: false, aliases: '-t',
+                      banner: 'whether to include group totals in the output'
+      option :format, type: :string, default: 'txt',
+                      enum: %w(txt csv), aliases: '-f',
+                      banner: 'the format of the output data'
+      def execute(plans_name)
+        # Load the execution plans
+        plans = Plans::ExecutionPlans.load plans_name
+        # Construct an instance of the backend
+        result = OpenStruct.new
+        result.workload = Workload.new plans.schema.model
+        result.workload.mix = options[:mix].to_sym \
+          unless options[:mix] == 'default' && result.workload.mix != :default
+        result.model = result.workload.model
+        result.indexes = plans.schema.indexes.values
+        backend = get_backend(options, result)
+        # Get sample index values to use in queries
+        index_values = index_values plans.schema.indexes.values, backend,
+                                    options[:num_iterations],
+                                    options[:fail_on_empty]
+        table = []
+        total = 0
+        plans.groups.each do |group, group_plans|
+          next if options[:group] && group != options[:group]
+          group_table = []
+          group_total = 0
+          group_weight = plans.weights[group][result.workload.mix]
+          next unless group_weight
+          group_plans.each do |plan|
+            next if options[:plan] && plan.name != options[:plan]
+            update = !plan.steps.last.is_a?(Plans::IndexLookupPlanStep)
+            method = update ? :bench_update : :bench_query
+            measurement = send method, backend, plans.schema.indexes.values,
+                               plan, index_values,
+                               options[:num_iterations],
+                               options[:repeat], weight: group_weight
+            # Run the query and get the total time
+            group_total += measurement.mean
+            group_table << measurement
+          end
+          if options[:totals]
+            total_measurement = Measurements::Measurement.new nil, 'TOTAL'
+            total_measurement << group_table.map(&:weighted_mean) \
+                                 .inject(0, &:+)
+            group_table << total_measurement
+          end
+          table << OpenStruct.new(label: plans_name, group: group,
+                                  measurements: group_table)
+          group_total *= group_weight
+          total += group_total
+        end
+        if options[:totals]
+          total_measurement = Measurements::Measurement.new nil, 'TOTAL'
+          total_measurement << table.map do |group|
+            group.measurements.find { |m| m.name == 'TOTAL' }.mean
+          end.inject(0, &:+)
+          table << OpenStruct.new(label: plans_name, group: 'TOTAL',
+                                  measurements: [total_measurement])
+        end
+        case options[:format]
+        when 'txt'
+          output_table table
+        else
+          output_csv table
+        end
+      end
+      private
+      # Output the table of results
+      # @return [void]
+      def output_table(table)
+        columns = [
+          'label', 'group',
+          { 'measurements.name' => { display_name: 'name' } },
+          { 'measurements.weight' => { display_name: 'weight' } },
+          { 'measurements.mean' => { display_name: 'mean' } },
+          { 'measurements.estimate' => { display_name: 'cost' } }
+        ]
+        tp table, *columns
+      end
+      # Output a CSV file of results
+      # @return [void]
+      def output_csv(table)
+        csv_str = CSV.generate do |csv|
+          csv << %w(label group name weight mean cost)
+          table.each do |group|
+            group.measurements.each do |measurement|
+              csv << [
+                group.label,
+                group.group,
+                measurement.name,
+                measurement.weight,
+                measurement.mean,
+                measurement.estimate
+              ]
+            end
+          end
+        end
+        puts csv_str
+      end
+      # Get the average execution time for a single query plan
+      # @return [Measurements::Measurement]
+      def bench_query(backend, indexes, plan, index_values, iterations, repeat,
+                      weight: 1.0)
+        condition_list = execute_conditions plan.params, indexes, index_values,
+                                            iterations
+        prepared = backend.prepare_query nil, plan.select_fields, plan.params,
+                                         [plan.steps]
+        measurement = Measurements::Measurement.new plan, weight: weight
+        1.upto(repeat) do
+          # Execute each plan and measure the time
+          start_time = Time.now.utc
+          condition_list.each { |conditions| prepared.execute conditions }
+          elapsed = Time.now.utc - start_time
+          measurement << (elapsed / iterations)
+        end
+        measurement
+      end
+      # Get the average execution time for a single update plan
+      # @return [Measurements::Measurement]
+      def bench_update(backend, indexes, plan, index_values,
+                       iterations, repeat, weight: 1.0)
+        condition_list = execute_conditions plan.params, indexes, index_values,
+                                            iterations
+        # Get values for the fields which were provided as parameters
+        fields = plan.update_steps.last.fields.select do |field|
+          plan.params.key? field.id
+        end
+        setting_list = 1.upto(iterations).map do |i|
+          fields.map do |field|
+            # First check for IDs given as part of the query otherwise
+            # get the backend to generate a random ID or take a random value
+            condition = condition_list[i - 1][field.id]
+            value = if !condition.nil? && field.is_a?(Fields::IDField)
+                      condition.value
+                    elsif field.is_a?(Fields::IDField)
+                      backend.generate_id
+                    else
+                      field.random_value
+                    end
+            FieldSetting.new(field, value)
+          end
+        end
+        prepared = backend.prepare_update nil, [plan]
+        measurement = Measurements::Measurement.new plan, weight: weight
+        1.upto(repeat) do
+          # Execute each plan and measure the time
+          start_time = Time.now.utc
+          setting_list.zip(condition_list).each do |settings, conditions|
+            prepared.each { |p| p.execute settings, conditions }
+          end
+          elapsed = Time.now.utc - start_time
+          measurement << (elapsed / iterations)
+        end
+        measurement
+      end
+      # Construct a list of values to be substituted in the plan
+      # @return [Array<Hash>]
+      def execute_conditions(params, indexes, index_values, iterations)
+        1.upto(iterations).map do |i|
+          Hash[params.map do |field_id, condition|
+            value = nil
+            indexes.each do |index|
+              values = index_values[index]
+              next if values.empty?
+              value = values[i % values.length][condition.field.id]
+              break unless value.nil?
+            end
+            [
+              field_id,
+              Condition.new(condition.field, condition.operator, value)
+            ]
+          end]
+        end
+      end
+    end
+  end
+end

data/lib/nose_cli/export.rb ADDED Viewed

@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+module NoSE
+  module CLI
+    # Add a command to generate a graphic of the schema from a workload
+    class NoSECLI < Thor
+      desc 'export', 'export the configuration as environment variables'
+      long_desc <<-LONGDESC
+        `nose export` reads the configuration file and outputs key-value pairs
+        suitable for use as environment variables.
+      LONGDESC
+      def export
+        export_value [], options
+      end
+      private
+      # Recursively export the values
+      # @return [void]
+      def export_value(path, value)
+        if value.is_a? Hash
+          value.each do |key, nested_value|
+            export_value path + [key], nested_value
+          end
+        elsif value.is_a? Array
+          # Append an integer index to each element of the array
+          export_value path + ['count'], value.length
+          value.each_with_index do |nested_value, i|
+            export_value path + [i], nested_value
+          end
+        else
+          puts "#{path.join('_').upcase}=\"#{value}\""
+        end
+      end
+    end
+  end
+end

data/lib/nose_cli/genworkload.rb ADDED Viewed

@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+module NoSE
+  module CLI
+    # Add a command to generate a workload file from a given loader
+    class NoSECLI < Thor
+      desc 'genworkload NAME',
+           'generate a workload called NAME from the configured loader'
+      long_desc <<-LONGDESC
+        `nose genworkload` will produce a new file in the `workloads` directory
+        containing information on the workload from the configured loader.
+      LONGDESC
+      def genworkload(name)
+        loader_class = get_class 'loader', options
+        workload = loader_class.new.workload options[:loader]
+        File.open("./workloads/#{name}.rb", 'w') do |file|
+          file.write workload.source_code
+        end
+      end
+    end
+  end
+end

data/lib/nose_cli/graph.rb ADDED Viewed

@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+module NoSE
+  module CLI
+    # Add a command to generate a graphic of the schema from a workload
+    class NoSECLI < Thor
+      desc 'graph WORKLOAD FILE', 'output a FILE of the given WORKLOAD'
+      long_desc <<-LONGDESC
+        `nose graph` will produce a visual representation of the schema for the
+        named workload in the `workloads` directory.
+      LONGDESC
+      option :include_fields, type: :boolean, default: false, aliases: '-i',
+                              desc: 'include each field in the output graph'
+      def graph(workload_name, filename)
+        workload = Workload.load workload_name
+        type = filename.split('.').last.to_sym
+        workload.model.output type, filename, options[:include_fields]
+      end
+    end
+  end
+end

data/lib/nose_cli/load.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+module NoSE
+  module CLI
+    # Add a command to load index data into a backend from a configured loader
+    class NoSECLI < Thor
+      desc 'load PLAN_FILE_OR_SCHEMA',
+           'create indexes from the given PLAN_FILE_OR_SCHEMA'
+      long_desc <<-LONGDESC
+        `nose load` will load a schema either from generated plan file from
+        `nose search` or a named schema in the `schemas` directory. It will
+        then populate the backend indexes as defined by the schema using data
+        from the configured loader. It assumes that the indexes have already
+        been created by `nose create`.
+      LONGDESC
+      option :progress, type: :boolean, default: true, aliases: '-p',
+                        desc: 'whether to display an indication of progress'
+      option :limit, type: :numeric, default: nil, aliases: '-l',
+                     desc: 'limit the number of entries loaded ' \
+                           '(useful for testing)'
+      option :skip_nonempty, type: :boolean, default: true, aliases: '-s',
+                             desc: 'ignore indexes which are not empty'
+      def load(*plan_files)
+        plan_files.each { |plan_file| load_plan plan_file, options }
+      end
+      private
+      # Load data from a single plan file
+      # @return [void]
+      def load_plan(plan_file, options)
+        result, backend = load_plans plan_file, options
+        # Create a new instance of the loader class and execute
+        loader = get_class('loader', options).new result.workload, backend
+        loader.load result.indexes, options[:loader], options[:progress],
+                    options[:limit], options[:skip_nonempty]
+      end
+    end
+  end
+end

data/lib/nose_cli/measurements.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+require 'descriptive_statistics/safe'
+require 'forwardable'
+module NoSE
+  # Storage and presentation of value from performance measusrements
+  module Measurements
+    # A measurement of a single statement execution time
+    class Measurement
+      attr_accessor :estimate
+      attr_reader :plan, :name, :weight
+      # Allow the values array to store numbers and compute stats
+      extend Forwardable
+      def_delegators :@values, :each, :<<, :size, :count, :length, :empty?
+      include Enumerable
+      include DescriptiveStatistics
+      def initialize(plan, name = nil, estimate = nil, weight: 1.0)
+        @plan = plan
+        @name = name || (plan && plan.name)
+        @estimate = estimate
+        @weight = weight
+        @values = []
+      end
+      # The mean weighted by this measurement weight
+      # @return [Fixnum]
+      def weighted_mean
+        @weight * mean
+      end
+    end
+  end
+end

data/lib/nose_cli/plan_schema.rb ADDED Viewed

@@ -0,0 +1,84 @@
+# frozen_string_literal: true
+module NoSE
+  module CLI
+    # Add a command to generate a graphic of the schema from a workload
+    class NoSECLI < Thor
+      desc 'plan-schema WORKLOAD SCHEMA',
+           'output plans for the given WORKLOAD using SCHEMA'
+      long_desc <<-LONGDESC
+        `nose plan-schema` produces a set of plans for the given WORKLOAD
+        using the manually-defined SCHEMA.
+        This is useful to compare manually-defined execution plans with the
+        plans that NoSE would produce for the same schema.
+      LONGDESC
+      shared_option :format
+      shared_option :mix
+      def plan_schema(workload_name, schema_name)
+        workload = Workload.load workload_name
+        workload.mix = options[:mix].to_sym \
+          unless options[:mix] == 'default' && workload.mix != :default
+        schema = Schema.load schema_name
+        indexes = schema.indexes.values
+        # Build the statement plans
+        cost_model = get_class_from_config options, 'cost', :cost_model
+        planner = Plans::QueryPlanner.new workload, indexes, cost_model
+        trees = workload.queries.map { |q| planner.find_plans_for_query q }
+        plans = trees.map(&:min)
+        update_plans = build_update_plans workload.statements, indexes,
+                                          workload.model, trees, cost_model
+        # Construct a result set
+        results = plan_schema_results workload, indexes, plans, update_plans,
+                                      cost_model
+        # Output the results in the specified format
+        send(('output_' + options[:format]).to_sym, results)
+      end
+      private
+      # Construct a result set
+      # @return [OpenStruct]
+      def plan_schema_results(workload, indexes, plans, update_plans,
+                              cost_model)
+        results = OpenStruct.new
+        results.workload = workload
+        results.model = workload.model
+        results.indexes = indexes
+        results.enumerated_indexes = []
+        results.plans = plans
+        results.update_plans = update_plans
+        results.cost_model = cost_model
+        results.weights = workload.statement_weights
+        results.total_size = results.indexes.sum_by(&:size)
+        results.total_cost = plans.sum_by { |plan| plan.cost * plan.weight }
+        results
+      end
+      # Produce all update plans for the schema
+      # @return [Array<Plans::UpdatePlan>]
+      def build_update_plans(statements, indexes, model, trees, cost_model)
+        planner = Plans::UpdatePlanner.new model, trees, cost_model
+        update_plans = []
+        statements.each do |statement|
+          next if statement.is_a? Query
+          planner.find_plans_for_update(statement, indexes).each do |plan|
+            plan.select_query_plans(indexes)
+            update_plans << plan
+          end
+        end
+        update_plans
+      end
+    end
+  end
+end