RubyGems - nose-cli - Versions diffs - 0.1.0pre - Mend

nose-cli 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +7 -0
data/bin/nose +26 -0
data/bin/random_rubis +105 -0
data/bin/restart-cassandra.sh +20 -0
data/bin/run-experiments.sh +61 -0
data/data/nose-cli/nose.yml.example +32 -0
data/lib/nose_cli.rb +364 -0
data/lib/nose_cli/analyze.rb +94 -0
data/lib/nose_cli/benchmark.rb +145 -0
data/lib/nose_cli/collect_results.rb +55 -0
data/lib/nose_cli/console.rb +50 -0
data/lib/nose_cli/create.rb +35 -0
data/lib/nose_cli/diff_plans.rb +39 -0
data/lib/nose_cli/dump.rb +67 -0
data/lib/nose_cli/execute.rb +241 -0
data/lib/nose_cli/export.rb +39 -0
data/lib/nose_cli/genworkload.rb +24 -0
data/lib/nose_cli/graph.rb +24 -0
data/lib/nose_cli/load.rb +44 -0
data/lib/nose_cli/measurements.rb +36 -0
data/lib/nose_cli/plan_schema.rb +84 -0
data/lib/nose_cli/proxy.rb +32 -0
data/lib/nose_cli/random_plans.rb +82 -0
data/lib/nose_cli/recost.rb +45 -0
data/lib/nose_cli/reformat.rb +22 -0
data/lib/nose_cli/repl.rb +144 -0
data/lib/nose_cli/search.rb +77 -0
data/lib/nose_cli/search_all.rb +120 -0
data/lib/nose_cli/search_bench.rb +52 -0
data/lib/nose_cli/shared_options.rb +30 -0
data/lib/nose_cli/texify.rb +141 -0
data/lib/nose_cli/why.rb +70 -0
data/templates/completions.erb +56 -0
data/templates/man.erb +33 -0
data/templates/report.erb +138 -0
data/templates/subman.erb +19 -0
metadata +345 -0

data/lib/nose_cli/analyze.rb ADDED Viewed

@@ -0,0 +1,94 @@
+# frozen_string_literal: true
+require 'csv'
+require 'gruff'
+module NoSE
+  module CLI
+    # Add a command to generate a graphic of the schema from a workload
+    class NoSECLI < Thor
+      desc 'analyze OUTPUT_FILE CSV_FILES',
+           'output a graph to OUTPUT_FILE comparing the CSV_FILES'
+      long_desc <<-LONGDESC
+        `nose analyze` will create a graph comparing the runtimes from multiple
+        runs of different schemas for a particular workload. The CSV files
+        should be of the format produced from either `nose benchmark` or
+        `nose execute`.
+      LONGDESC
+      option :total, type: :boolean, default: false, aliases: '-t',
+                     desc: 'whether to include a line for totals in the '\
+                           'graph'
+      def analyze(output_file, *csv_files)
+        # Load data from the files
+        data = load_data csv_files, options[:total]
+        # Set graph properties
+        g = Gruff::Bar.new '2000x800'
+        g.title = 'NoSE Schema Performance'
+        g.x_axis_label = '\nWorkload group'
+        g.y_axis_label = 'Weighted execution time (s)'
+        g.title_font_size = 20
+        g.legend_font_size = 10
+        g.marker_font_size = 10
+        g.label_stagger_height = 15
+        g.legend_box_size = 10
+        g.bar_spacing = 0.5
+        # Add each data element to the graph
+        data.each do |datum|
+          g.data datum.first['label'], datum.map { |row| row['mean'] }
+        end
+        g.labels = Hash[data.first.map.with_index do |row, n|
+          [n, row['group']]
+        end]
+        g.write output_file
+      end
+      private
+      # Load the data from the given list of CSV files
+      # @return [Array<Hash>]
+      def load_data(csv_files, total = false)
+        headers = nil
+        csv_files.map do |file|
+          lines = CSV.read(file)
+          headers = lines.first if headers.nil?
+          lines = lines[1..-1].map do |row|
+            Hash[headers.zip row]
+          end
+          grouped_lines = lines.group_by { |row| row['group'] }
+          rows = grouped_lines.map do |group, grouped_rows|
+            mean = grouped_rows.inject(0) { |sum, row| sum + row['mean'].to_f }
+            {
+              'label' => grouped_rows.first['label'],
+              'group' => group,
+              'weight' => grouped_rows.first['weight'].to_f,
+              'mean' => mean
+            }
+          end
+          # Add an additional row for the total
+          if total
+            total_weight = rows.map { |row| row['weight'] }.inject(0, &:+)
+            rows << {
+              'label' => rows.first['label'],
+              'group' => 'TOTAL',
+              'weight' => 1.0,
+              'mean' => rows.inject(0) do |sum, row|
+                sum + row['mean'] * row['weight'] / total_weight
+              end
+            }
+          end
+          rows
+        end
+      end
+    end
+  end
+end

data/lib/nose_cli/benchmark.rb ADDED Viewed

@@ -0,0 +1,145 @@
+# frozen_string_literal: true
+require 'csv'
+require 'table_print'
+module NoSE
+  module CLI
+    # Run performance tests on plans for a particular schema
+    class NoSECLI < Thor
+      desc 'benchmark PLAN_FILE', 'test performance of plans in PLAN_FILE'
+      long_desc <<-LONGDESC
+        `nose benchmark` will take a JSON file output by `nose search`,
+        execute each statement, and output a summary of the execution times.
+        Before runnng benchmark, `nose create` and `nose load` must be used to
+        prepare the target database.
+      LONGDESC
+      shared_option :mix
+      option :num_iterations, type: :numeric, default: 100,
+                              banner: 'ITERATIONS',
+                              desc: 'the number of times to execute each ' \
+                                    'statement'
+      option :repeat, type: :numeric, default: 1,
+                      desc: 'how many times to repeat the benchmark'
+      option :group, type: :string, default: nil, aliases: '-g',
+                     desc: 'restrict the benchmark to statements in the ' \
+                           'given group'
+      option :fail_on_empty, type: :boolean, default: true,
+                             desc: 'abort if a column family is empty'
+      option :totals, type: :boolean, default: false, aliases: '-t',
+                      desc: 'whether to include group totals in the output'
+      option :format, type: :string, default: 'txt',
+                      enum: %w(txt csv), aliases: '-f',
+                      desc: 'the format of the output data'
+      def benchmark(plan_file)
+        label = File.basename plan_file, '.*'
+        result = load_results plan_file, options[:mix]
+        backend = get_backend(options, result)
+        index_values = index_values result.indexes, backend,
+                                    options[:num_iterations],
+                                    options[:fail_on_empty]
+        group_tables = Hash.new { |h, k| h[k] = [] }
+        group_totals = Hash.new { |h, k| h[k] = 0 }
+        result.plans.each do |plan|
+          query = plan.query
+          weight = result.workload.statement_weights[query]
+          next if query.is_a?(SupportQuery) || !weight
+          @logger.debug { "Executing #{query.text}" }
+          next unless options[:group].nil? || plan.group == options[:group]
+          indexes = plan.select do |step|
+            step.is_a? Plans::IndexLookupPlanStep
+          end.map(&:index)
+          measurement = bench_query backend, indexes, plan, index_values,
+                                    options[:num_iterations], options[:repeat],
+                                    weight: weight
+          next if measurement.empty?
+          measurement.estimate = plan.cost
+          group_totals[plan.group] += measurement.mean
+          group_tables[plan.group] << measurement
+        end
+        result.workload.updates.each do |update|
+          weight = result.workload.statement_weights[update]
+          next unless weight
+          plans = (result.update_plans || []).select do |possible_plan|
+            possible_plan.statement == update
+          end
+          next if plans.empty?
+          @logger.debug { "Executing #{update.text}" }
+          plans.each do |plan|
+            next unless options[:group].nil? || plan.group == options[:group]
+            # Get all indexes used by support queries
+            indexes = plan.query_plans.flat_map(&:indexes) << plan.index
+            measurement = bench_update backend, indexes, plan, index_values,
+                                       options[:num_iterations],
+                                       options[:repeat], weight: weight
+            next if measurement.empty?
+            measurement.estimate = plan.cost
+            group_totals[plan.group] += measurement.mean
+            group_tables[plan.group] << measurement
+          end
+        end
+        total = 0
+        table = []
+        group_totals.each do |group, group_total|
+          total += group_total
+          total_measurement = Measurements::Measurement.new nil, 'TOTAL'
+          group_table = group_tables[group]
+          total_measurement << group_table.map(&:weighted_mean) \
+                               .inject(0, &:+)
+          group_table << total_measurement if options[:totals]
+          table << OpenStruct.new(label: label, group: group,
+                                  measurements: group_table)
+        end
+        if options[:totals]
+          total_measurement = Measurements::Measurement.new nil, 'TOTAL'
+          total_measurement << table.map do |group|
+            group.measurements.find { |m| m.name == 'TOTAL' }.mean
+          end.inject(0, &:+)
+          table << OpenStruct.new(label: label, group: 'TOTAL',
+                                  measurements: [total_measurement])
+        end
+        case options[:format]
+        when 'txt'
+          output_table table
+        else
+          output_csv table
+        end
+      end
+      private
+      # Get a sample of values from each index used by the queries
+      # @return [Hash]
+      def index_values(indexes, backend, iterations, fail_on_empty = true)
+        Hash[indexes.map do |index|
+          values = backend.index_sample(index, iterations).to_a
+          fail "Index #{index.key} is empty and will produce no results" \
+            if values.empty? && fail_on_empty
+          [index, values]
+        end]
+      end
+    end
+  end
+end

data/lib/nose_cli/collect_results.rb ADDED Viewed

@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+module NoSE
+  module CLI
+    # Add a command to generate a graphic of the schema from a workload
+    class NoSECLI < Thor
+      desc 'collect-results CSV_FILES',
+           'collect results from CSV_FILES and produce a `gnuplot` data file'
+      long_desc <<-LONGDESC
+        `nose collect-results` combines results from multiple statement
+        execution runs and produces a data file which can be used to generate
+        clustered bar charts in `gnuplot`.
+      LONGDESC
+      option :total, type: :boolean, default: false, aliases: '-t',
+                     desc: 'whether to include a line for totals in the '\
+                           'graph'
+      def collect_results(*csv_files)
+        # Load the data and output the header
+        data = load_data csv_files, options[:total]
+        labels = data.map { |datum| datum.first['label'] }
+        puts((['Group'] + labels).join("\t"))
+        # Output the mean for each schema
+        group_data(data).each { |group| collect_group_data group, data }
+      end
+      private
+      # Combine the results into groups
+      # @return [Array]
+      def group_data(data)
+        # Make sure we collect all rows, keeping the total last
+        groups = data.map { |d| d.map { |r| r['group'] } }.flatten.uniq
+        groups.delete 'TOTAL'
+        groups << 'TOTAL' if options[:total]
+        groups
+      end
+      # Collect the results for a single group
+      # @return [void]
+      def collect_group_data(group, data)
+        print group + "\t"
+        data.each do |datum|
+          row = datum.find { |r| r['group'] == group }
+          print((row.nil? ? '' : row['mean'].to_s) + "\t")
+        end
+        puts
+      end
+    end
+  end
+end

data/lib/nose_cli/console.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+module NoSE
+  module CLI
+    # Start a pry console while preloading configured objects
+    class NoSECLI < Thor
+      desc 'console PLAN_FILE', 'open a pry console preconfigured with ' \
+                                'variables from the given PLAN_FILE'
+      long_desc <<-LONGDESC
+        `nose console` gives a convenient way to perform manual exploration of
+        generated plan data. It will load plans from the given file and then
+        define a number of variables containing this data. This includes all
+        instance variables in the `Search::Results` object as well as the
+        `model` used to generate the results, the `options` loaded from the
+        configuration file, and an instance of the configured `backend`.
+      LONGDESC
+      def console(plan_file)
+        # Load the results from the plan file and define each as a variable
+        result = load_results plan_file
+        expose_result result
+        # Also extract the model as a variable
+        TOPLEVEL_BINDING.local_variable_set :model, result.workload.model
+        # Load the options and backend as variables
+        TOPLEVEL_BINDING.local_variable_set :options, options
+        TOPLEVEL_BINDING.local_variable_set :backend,
+                                            get_backend(options, result)
+        TOPLEVEL_BINDING.pry
+      end
+      private
+      # Expose the properties of the results object for use in the console
+      # @return [void]
+      def expose_result(result)
+        exposed = result.instance_variables.map do |var|
+          var[1..-1].to_sym
+        end & result.methods
+        exposed.each do |name|
+          TOPLEVEL_BINDING.local_variable_set name, result.method(name).call
+        end
+      end
+    end
+  end
+end

data/lib/nose_cli/create.rb ADDED Viewed

@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+module NoSE
+  module CLI
+    # Add a command for creating the index data structures in a backend
+    class NoSECLI < Thor
+      desc 'create PLAN_FILE_OR_SCHEMA',
+           'create indexes from the given PLAN_FILE_OR_SCHEMA'
+      long_desc <<-LONGDESC
+        `nose create` will load a schema either from generated plan file from
+        `nose search` or a named schema in the `schemas` directory. It will
+        then create all the indexes in the configured backend.
+      LONGDESC
+      option :dry_run, type: :boolean, default: false,
+                       desc: 'print the DDL, but do not execute'
+      option :skip_existing, type: :boolean, default: false, aliases: '-s',
+                             desc: 'ignore indexes which already exist'
+      option :drop_existing, type: :boolean, default: false,
+                             desc: 'drop existing indexes before recreation'
+      def create(*plan_files)
+        plan_files.each do |plan_file|
+          _, backend = load_plans plan_file, options
+          # Produce the DDL and execute unless the dry run option was given
+          backend.indexes_ddl(!options[:dry_run], options[:skip_existing],
+                              options[:drop_existing]) \
+                 .each { |ddl| puts ddl }
+        end
+      end
+    end
+  end
+end

data/lib/nose_cli/diff_plans.rb ADDED Viewed

@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+module NoSE
+  module CLI
+    # Add a command to generate a graphic of the schema from a workload
+    class NoSECLI < Thor
+      desc 'diff-plans PLAN1 PLAN2',
+           'output the differing plans between PLAN1 and PLAN2'
+      long_desc <<-LONGDESC
+        `nose diff-plans` loads two sets of statement plans generated
+        separately by `nose search` and outputs the plans which are different.
+      LONGDESC
+      def diff_plans(plan1, plan2)
+        result1 = load_results plan1
+        result2 = load_results plan2
+        output_diff plan1, result1, result2
+        output_diff plan2, result2, result1
+      end
+      private
+      # Output differing plans between two sets of results
+      # @return [void]
+      def output_diff(plan_name, result1, result2)
+        puts Formatador.parse("[blue]#{plan_name}\n" + '━' * 50 + '[/]')
+        plans1 = result1.plans.reject { |p| result2.plans.include?(p) }
+        output_plans_txt plans1, $stdout, 1, result1.workload.statement_weights
+        plans1 = result1.update_plans.reject do |plan|
+          result2.update_plans.include? plan
+        end
+        output_update_plans_txt plans1, $stdout,
+                                result1.workload.statement_weights
+      end
+    end
+  end
+end

data/lib/nose_cli/dump.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+require 'ostruct'
+module NoSE
+  module CLI
+    # Add a command to dump a workload and its corresponding schema
+    class NoSECLI < Thor
+      desc 'dump PLANS', 'output the plans in PLANS'
+      long_desc <<-LONGDESC
+        `nose dump` will output results in the same format as `nose search`,
+        but for manually defined execution plans in the `plans` subdirectory.
+      LONGDESC
+      shared_option :format
+      shared_option :mix
+      def dump(plan_name)
+        plans = Plans::ExecutionPlans.load plan_name
+        plans.mix = options[:mix].to_sym \
+          unless options[:mix] == 'default' && plans.mix != :default
+        # Set the cost of each plan
+        cost_model = get_class_from_config options, 'cost', :cost_model
+        plans.calculate_cost cost_model
+        results = OpenStruct.new
+        results.workload = Workload.new plans.schema.model
+        results.workload.mix = plans.mix
+        results.model = results.workload.model
+        results.indexes = plans.schema.indexes.values
+        results.enumerated_indexes = []
+        results.plans = []
+        results.update_plans = []
+        # Store all the query and update plans
+        plans.groups.values.flatten(1).each do |plan|
+          if plan.update_steps.empty?
+            results.plans << plan
+          else
+            # XXX: Hack to build a valid update plan
+            statement = OpenStruct.new group: plan.group
+            update_plan = Plans::UpdatePlan.new statement, plan.index, nil,
+                                                plan.update_steps, cost_model
+            update_plan.instance_variable_set :@group, plan.group
+            update_plan.instance_variable_set :@query_plans, plan.query_plans
+            results.update_plans << update_plan
+          end
+        end
+        results.cost_model = cost_model
+        results.weights = Hash[plans.weights.map { |g, w| [g, w[plans.mix]] }]
+        results.total_size = results.indexes.sum_by(&:size)
+        results.total_cost = plans.groups.values.flatten(1).sum_by do |plan|
+          next 0 if plan.weight.nil?
+          plan.cost * plan.weight
+        end
+        # Output the results in the specified format
+        send(('output_' + options[:format]).to_sym, results)
+      end
+    end
+  end
+end