RubyGems - nose-cli - Versions diffs - 0.1.0pre - Mend

nose-cli 0.1.0pre

Files changed (37) hide show

checksums.yaml +7 -0
data/bin/nose +26 -0
data/bin/random_rubis +105 -0
data/bin/restart-cassandra.sh +20 -0
data/bin/run-experiments.sh +61 -0
data/data/nose-cli/nose.yml.example +32 -0
data/lib/nose_cli.rb +364 -0
data/lib/nose_cli/analyze.rb +94 -0
data/lib/nose_cli/benchmark.rb +145 -0
data/lib/nose_cli/collect_results.rb +55 -0
data/lib/nose_cli/console.rb +50 -0
data/lib/nose_cli/create.rb +35 -0
data/lib/nose_cli/diff_plans.rb +39 -0
data/lib/nose_cli/dump.rb +67 -0
data/lib/nose_cli/execute.rb +241 -0
data/lib/nose_cli/export.rb +39 -0
data/lib/nose_cli/genworkload.rb +24 -0
data/lib/nose_cli/graph.rb +24 -0
data/lib/nose_cli/load.rb +44 -0
data/lib/nose_cli/measurements.rb +36 -0
data/lib/nose_cli/plan_schema.rb +84 -0
data/lib/nose_cli/proxy.rb +32 -0
data/lib/nose_cli/random_plans.rb +82 -0
data/lib/nose_cli/recost.rb +45 -0
data/lib/nose_cli/reformat.rb +22 -0
data/lib/nose_cli/repl.rb +144 -0
data/lib/nose_cli/search.rb +77 -0
data/lib/nose_cli/search_all.rb +120 -0
data/lib/nose_cli/search_bench.rb +52 -0
data/lib/nose_cli/shared_options.rb +30 -0
data/lib/nose_cli/texify.rb +141 -0
data/lib/nose_cli/why.rb +70 -0
data/templates/completions.erb +56 -0
data/templates/man.erb +33 -0
data/templates/report.erb +138 -0
data/templates/subman.erb +19 -0
metadata +345 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 69482346fff002e27d56b7826eec5ab14f4626db
+  data.tar.gz: 07e536d085b0a88fb38f8650a940e4a54c18fbf6
+SHA512:
+  metadata.gz: 1c0e8ab04a53a5107a6ff9559f218eea28db5437d21597720c508867d5d737ce065a742a331ab0742df81bb85efe5f1c6b6737ec00cab204cad1879444b1e73b
+  data.tar.gz: 3e7ab9f5d4b30a71697a15cface20dbf99998f738504f0d83cba5b165cb3cfbad588bc380378c3dd637e5e2f021acffc908d03fe3abaad17fb687f66ac9da93f

data/bin/nose ADDED Viewed

@@ -0,0 +1,26 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Optionally enable debug logging
+ENV['NOSE_LOG'] = 'debug' if ARGV.include?('--debug') || ARGV.include?('-d')
+$LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
+require 'nose'
+require 'nose_cli'
+# Start profiling if asked
+unless ENV['NOSE_PROFILE'].nil?
+  require 'ruby-prof'
+  Parallel.instance_variable_set(:@processor_count, 0)
+  RubyProf.start
+end
+NoSE::CLI::NoSECLI.start ARGV
+# Stop profiling and output results
+unless ENV['NOSE_PROFILE'].to_i == 0
+  result = RubyProf.stop
+  printer = RubyProf::CallTreePrinter.new(result)
+  printer.print
+end

data/bin/random_rubis ADDED Viewed

@@ -0,0 +1,105 @@
+#!/usr/bin/env ruby
+# Get and print the seed which is used
+seed = ::Random.new_seed
+$stderr.puts "SEED #{seed}"
+::Random.srand seed
+require 'nose'
+# Record times for the longest running sets of methods
+times = {
+  indexes_for_workload: 0,
+  query_costs: 0,
+  update_costs: 0,
+  setup_model: 0,
+  solve: 0
+}
+NoSE::Timer.enable do |_cls, method, time|
+  times[method] = time if times.key? method
+end
+factor = ARGV[0].to_i
+# Create a random workload generator
+network = NoSE::Random::WattsStrogatzNetwork.new(nodes_nb: 7 * factor)
+workload = NoSE::Workload.new
+network.entities.each { |entity| workload << entity }
+sgen = NoSE::Random::StatementGenerator.new workload.model
+# Add random queries
+1.upto(30 * factor).each do |i|
+  path_length = rand > 0.9 ? 2 : 1
+  path_length = 3 if i <= factor
+  r = rand
+  conditions = if r > 0.95
+                 3
+               elsif r > 0.75
+                 2
+               else
+                 1
+               end
+  q = sgen.random_query path_length,
+                        3,
+                        conditions,
+                        rand > 0.9
+  $stderr.puts q.unparse
+  workload.add_statement q, 10
+end
+# Add random updates
+1.upto(3 * factor).each do
+  u = sgen.random_update 1, 2, 1
+  $stderr.puts u.text
+  workload.add_statement u
+end
+# Add random inserts
+1.upto(5 * factor).each do
+  i = sgen.random_insert
+  $stderr.puts i.text
+  workload.add_statement i
+end
+# Uncomment the lines below to enable profiling
+# (along with the lines above to save the output)
+# require 'ruby-prof'
+# Parallel.instance_variable_set(:@processor_count, 0)
+# RubyProf.start
+# Execute NoSE for the random workload and report the time
+start = Time.now.utc
+indexes = NoSE::IndexEnumerator.new(workload).indexes_for_workload.to_a
+search = NoSE::Search::Search.new(workload,
+                                  NoSE::Cost::RequestCountCost.new)
+search.search_overlap(indexes)
+elapsed = Time.now.utc - start
+# Output the timing values
+total = 0
+times[:costs] = times.delete(:query_costs) + times.delete(:update_costs)
+times.each do |key, time|
+  puts "#{key},#{time}"
+  total += time
+end
+puts "other,#{elapsed - total}"
+# Uncomment the lines below to save profile output
+# (along with the lines above to enable profiling)
+# result = RubyProf.stop
+# result.eliminate_methods!([
+#   /NoSE::Field#hash/,
+#   /Range#/,
+#   /Array#/,
+#   /Set#/,
+#   /Hash#/,
+#   /Integer#downto/,
+#   /Hashids#/,
+#   /String#/,
+#   /Enumerable#/,
+#   /Integer#times/,
+#   /Class#new/
+# ])
+# printer = RubyProf::CallTreePrinter.new(result)
+# printer.print(File.open('prof.out', 'w'))

data/bin/restart-cassandra.sh ADDED Viewed

@@ -0,0 +1,20 @@
+#!/bin/bash
+# Get the first data directory (we assume there's only one)
+# The backup directory is just this directory with -bk appended
+DATA_DIR=`grep data_file_directories -A 1 /etc/cassandra/cassandra.yaml | \
+          tail -n +2 | sed 's/^\s\+-\s\+//; s/\/[^/]*$//'`
+# Ideally sudo should be usable without a password so this can be automated
+# We simply stop Cassandra, restore old data from a backup and restart
+time sudo sh -c "service cassandra stop; \
+                 rm -rf $DATA_DIR; \
+                 cp -r $DATA_DIR-bk $DATA_DIR; \
+                 chown -R cassandra:cassandra $DATA_DIR; \
+                 service cassandra start"
+until cqlsh `hostname -i` -e "USE $1"
+do
+  echo 'Waiting for Cassandra...'
+  sleep 5
+done

data/bin/run-experiments.sh ADDED Viewed

@@ -0,0 +1,61 @@
+#!/bin/sh
+eval `bundle exec nose export`
+# The first argument should be the directory where results are stored
+RESULTS_DIR=$1
+REPEAT=1
+ITERATIONS=1000
+COMMON_OPTIONS="--num-iterations=$ITERATIONS --repeat=$REPEAT --format=csv"
+# Enable command output and fail on error
+set -e
+set -x
+mkdir -p $RESULTS_DIR
+# Passwordless SSH access must be set up to the backend host
+restart_cassandra() {
+  ssh $BACKEND_HOSTS_0 `pwd`/bin/restart_cassandra.sh $BACKEND_KEYSPACE
+}
+run_nose_search() {
+  bundle exec nose search rubis --format=json --mix=$1 > $RESULTS_DIR/$1.json
+}
+run_nose_search bidding
+restart_cassandra
+bundle exec nose benchmark $COMMON_OPTIONS --mix=bidding \
+  $RESULTS_DIR/bidding.json > $RESULTS_DIR/bidding.csv
+run_nose_search write_heavy
+restart_cassandra
+bundle exec nose benchmark $COMMON_OPTIONS --mix=write_heavy \
+  $RESULTS_DIR/write_heavy.json > $RESULTS_DIR/write_heavy.csv
+restart_cassandra
+bundle exec nose benchmark $COMMON_OPTIONS --mix=write_heavy \
+  $RESULTS_DIR/bidding.json > $RESULTS_DIR/bidding_write_heavy.csv
+restart_cassandra
+bundle exec nose execute $COMMON_OPTIONS --mix=bidding \
+  rubis_expert > $RESULTS_DIR/expert.csv
+restart_cassandra
+bundle exec nose execute $COMMON_OPTIONS --mix=write_heavy \
+  rubis_expert > $RESULTS_DIR/expert_write_heavy.csv
+restart_cassandra
+bundle exec nose execute $COMMON_OPTIONS --mix=bidding \
+  rubis_baseline > $RESULTS_DIR/baseline.csv
+restart_cassandra
+bundle exec nose execute $COMMON_OPTIONS --mix=write_heavy \
+  rubis_baseline > $RESULTS_DIR/baseline_write_heavy.csv

data/data/nose-cli/nose.yml.example ADDED Viewed

@@ -0,0 +1,32 @@
+# Connection to the backend database being targeted, currently only Cassandra
+backend:
+  name: cassandra
+  hosts:
+    - localhost
+  port: 9042
+  keyspace: nose
+# Cost model name and parameters
+cost_model:
+  name: request_count
+# Loader-specific configuration
+# The mysql loader is recommended, but csv might work as well if
+# your generated indexes all have path length one
+loader:
+  name: mysql
+  host: 127.0.0.1
+  database: rubis
+  username: root
+  password: root
+# Query proxy
+proxy:
+  name: mysql
+  port: 3307
+# vim: set syntax=yaml:
+# Local Variables:
+# mode:yaml
+# End:

data/lib/nose_cli.rb ADDED Viewed

@@ -0,0 +1,364 @@
+# frozen_string_literal: true
+require 'erb'
+require 'formatador'
+require 'parallel'
+require 'thor'
+require 'yaml'
+require 'nose'
+require_relative 'nose_cli/measurements'
+module NoSE
+  # CLI tools for running the advisor
+  module CLI
+    # A command-line interface to running the advisor tool
+    class NoSECLI < Thor
+      # The path to the configuration file in the working directory
+      CONFIG_FILE_NAME = 'nose.yml'
+      check_unknown_options!
+      class_option :debug, type: :boolean, aliases: '-d',
+                           desc: 'enable detailed debugging information'
+      class_option :parallel, type: :boolean, default: false,
+                              desc: 'run various operations in parallel'
+      class_option :colour, type: :boolean, default: nil, aliases: '-c',
+                            desc: 'enabled coloured output'
+      class_option :interactive, type: :boolean, default: true,
+                                 desc: 'allow actions which require user input'
+      def initialize(_options, local_options, config)
+        super
+        # Set up a logger for this command
+        cmd_name = config[:current_command].name
+        @logger = Logging.logger["nose::#{cmd_name}"]
+        # Peek ahead into the options and prompt the user to create a config
+        check_config_file interactive?(local_options)
+        force_colour(options[:colour]) unless options[:colour].nil?
+        # Disable parallel processing if desired
+        Parallel.instance_variable_set(:@processor_count, 0) \
+          unless options[:parallel]
+      end
+      private
+      # Check if the user has disabled interaction
+      # @return [Boolean]
+      def interactive?(options = [])
+        parse_options = self.class.class_options
+        opts = Thor::Options.new(parse_options).parse(options)
+        opts[:interactive]
+      end
+      # Check if the user has created a configuration file
+      # @return [void]
+      def check_config_file(interactive)
+        return if File.file?(CONFIG_FILE_NAME)
+        if interactive
+          no_create = no? 'nose.yml is missing, ' \
+                          'create from nose.yml.example? [Yn]'
+          example_cfg = File.join Gem.loaded_specs['nose-cli'].full_gem_path,
+                                  'data', 'nose-cli', 'nose.yml.example'
+          FileUtils.cp example_cfg, CONFIG_FILE_NAME unless no_create
+        else
+          @logger.warn 'Configuration file missing'
+        end
+      end
+      # Add the possibility to set defaults via configuration
+      # @return [Thor::CoreExt::HashWithIndifferentAccess]
+      def options
+        original_options = super
+        return original_options unless File.exist? CONFIG_FILE_NAME
+        defaults = YAML.load_file(CONFIG_FILE_NAME).deep_symbolize_keys || {}
+        Thor::CoreExt::HashWithIndifferentAccess \
+          .new(defaults.merge(original_options))
+      end
+      # Get a backend instance for a given configuration and dataset
+      # @return [Backend::BackendBase]
+      def get_backend(config, result)
+        be_class = get_class 'backend', config
+        be_class.new result.workload.model, result.indexes,
+                     result.plans, result.update_plans, config[:backend]
+      end
+      # Get a class of a particular name from the configuration
+      # @return [Object]
+      def get_class(class_name, config)
+        name = config
+        name = config[class_name.to_sym][:name] if config.is_a? Hash
+        require "nose/#{class_name}/#{name}"
+        name = name.split('_').map(&:capitalize).join
+        full_class_name = ['NoSE', class_name.capitalize,
+                           name + class_name.capitalize]
+        full_class_name.reduce(Object) do |mod, name_part|
+          mod.const_get name_part
+        end
+      end
+      # Get a class given a set of options
+      # @return [Object]
+      def get_class_from_config(options, name, type)
+        object_class = get_class name, options[type][:name]
+        object_class.new(**options[type])
+      end
+      # Collect all advisor results for schema design problem
+      # @return [Search::Results]
+      def search_result(workload, cost_model, max_space = Float::INFINITY,
+                        objective = Search::Objective::COST,
+                        by_id_graph = false)
+        enumerated_indexes = IndexEnumerator.new(workload) \
+                                            .indexes_for_workload.to_a
+        Search::Search.new(workload, cost_model, objective, by_id_graph) \
+                      .search_overlap enumerated_indexes, max_space
+      end
+      # Load results of a previous search operation
+      # @return [Search::Results]
+      def load_results(plan_file, mix = 'default')
+        representer = Serialize::SearchResultRepresenter.represent \
+          Search::Results.new
+        json = File.read(plan_file)
+        result = representer.from_json(json)
+        result.workload.mix = mix.to_sym unless \
+          mix.nil? || (mix == 'default' && result.workload.mix != :default)
+        result
+      end
+      # Load plans either from an explicit file or the name
+      # of something in the plans/ directory
+      def load_plans(plan_file, options)
+        if File.exist? plan_file
+          result = load_results(plan_file, options[:mix])
+        else
+          schema = Schema.load plan_file
+          result = OpenStruct.new
+          result.workload = Workload.new schema.model
+          result.indexes = schema.indexes.values
+        end
+        backend = get_backend(options, result)
+        [result, backend]
+      end
+      # Output a list of indexes as text
+      # @return [void]
+      def output_indexes_txt(header, indexes, file)
+        file.puts Formatador.parse("[blue]#{header}[/]")
+        indexes.sort_by(&:key).each { |index| file.puts index.inspect }
+        file.puts
+      end
+      # Output a list of query plans as text
+      # @return [void]
+      def output_plans_txt(plans, file, indent, weights)
+        plans.each do |plan|
+          weight = (plan.weight || weights[plan.query || plan.name])
+          next if weight.nil?
+          cost = plan.cost * weight
+          file.puts "GROUP #{plan.group}" unless plan.group.nil?
+          weight = " * #{weight} = #{cost}"
+          file.puts '  ' * (indent - 1) + plan.query.label \
+            unless plan.query.nil? || plan.query.label.nil?
+          file.puts '  ' * (indent - 1) + plan.query.inspect + weight
+          plan.each { |step| file.puts '  ' * indent + step.inspect }
+          file.puts
+        end
+      end
+      # Output update plans as text
+      # @return [void]
+      def output_update_plans_txt(update_plans, file, weights, mix = nil)
+        unless update_plans.empty?
+          header = "Update plans\n" + '━' * 50
+          file.puts Formatador.parse("[blue]#{header}[/]")
+        end
+        update_plans.group_by(&:statement).each do |statement, plans|
+          weight = if weights.key?(statement)
+                     weights[statement]
+                   elsif weights.key?(statement.group)
+                     weights[statement.group]
+                   else
+                     weights[statement.group][mix]
+                   end
+          next if weight.nil?
+          total_cost = plans.sum_by(&:cost)
+          file.puts "GROUP #{statement.group}" unless statement.group.nil?
+          file.puts statement.label unless statement.label.nil?
+          file.puts "#{statement.inspect} * #{weight} = #{total_cost * weight}"
+          plans.each do |plan|
+            file.puts Formatador.parse(" for [magenta]#{plan.index.key}[/] " \
+                                       "[yellow]$#{plan.cost}[/]")
+            query_weights = Hash[plan.query_plans.map do |query_plan|
+              [query_plan.query, weight]
+            end]
+            output_plans_txt plan.query_plans, file, 2, query_weights
+            plan.update_steps.each do |step|
+              file.puts '  ' + step.inspect
+            end
+            file.puts
+          end
+          file.puts "\n"
+        end
+      end
+      # Output the results of advising as text
+      # @return [void]
+      def output_txt(result, file = $stdout, enumerated = false,
+                     _backend = nil)
+        if enumerated
+          header = "Enumerated indexes\n" + '━' * 50
+          output_indexes_txt header, result.enumerated_indexes, file
+        end
+        # Output selected indexes
+        header = "Indexes\n" + '━' * 50
+        output_indexes_txt header, result.indexes, file
+        file.puts Formatador.parse('  Total size: ' \
+                                   "[blue]#{result.total_size}[/]\n\n")
+        # Output query plans for the discovered indices
+        header = "Query plans\n" + '━' * 50
+        file.puts Formatador.parse("[blue]#{header}[/]")
+        weights = result.workload.statement_weights
+        weights = result.weights if weights.nil? || weights.empty?
+        output_plans_txt result.plans, file, 1, weights
+        result.update_plans = [] if result.update_plans.nil?
+        output_update_plans_txt result.update_plans, file, weights,
+                                result.workload.mix
+        file.puts Formatador.parse('  Total cost: ' \
+                                   "[blue]#{result.total_cost}[/]\n")
+      end
+      # Output an HTML file with a description of the search results
+      # @return [void]
+      def output_html(result, file = $stdout, enumerated = false,
+                      backend = nil)
+        # Get an SVG diagram of the model
+        tmpfile = Tempfile.new %w(model svg)
+        result.workload.model.output :svg, tmpfile.path, true
+        svg = File.open(tmpfile.path).read
+        enumerated &&= result.enumerated_indexes
+        tmpl = File.read File.join(File.dirname(__FILE__),
+                                   '../../templates/report.erb')
+        ns = OpenStruct.new svg: svg,
+                            backend: backend,
+                            indexes: result.indexes,
+                            enumerated_indexes: enumerated,
+                            workload: result.workload,
+                            update_plans: result.update_plans,
+                            plans: result.plans,
+                            total_size: result.total_size,
+                            total_cost: result.total_cost
+        force_colour
+        file.write ERB.new(tmpl, nil, '>').result(ns.instance_eval { binding })
+      end
+      # Output the results of advising as JSON
+      # @return [void]
+      def output_json(result, file = $stdout, enumerated = false,
+                      _backend = nil)
+        # Temporarily remove the enumerated indexes
+        if enumerated
+          enumerated = result.enumerated_indexes
+          result.delete_field :enumerated_indexes
+        end
+        file.puts JSON.pretty_generate \
+          Serialize::SearchResultRepresenter.represent(result).to_hash
+        result.enumerated_indexes = enumerated if enumerated
+      end
+      # Output the results of advising as YAML
+      # @return [void]
+      def output_yml(result, file = $stdout, enumerated = false,
+                     _backend = nil)
+        # Temporarily remove the enumerated indexes
+        if enumerated
+          enumerated = result.enumerated_indexes
+          result.delete_field :enumerated_indexes
+        end
+        file.puts Serialize::SearchResultRepresenter.represent(result).to_yaml
+        result.enumerated_indexes = enumerated if enumerated
+      end
+      # Filter an options hash for those only relevant to a given command
+      # @return [Thor::CoreExt::HashWithIndifferentAccess]
+      def filter_command_options(opts, command)
+        Thor::CoreExt::HashWithIndifferentAccess.new(opts.select do |key|
+          self.class.commands[command].options \
+            .each_key.map(&:to_sym).include? key.to_sym
+        end)
+      end
+      # Enable forcing the colour or no colour for output
+      # We just lie to Formatador about whether or not $stdout is a tty
+      # @return [void]
+      def force_colour(colour = true)
+        stdout_metaclass = class << $stdout; self; end
+        method = colour ? ->() { true } : ->() { false }
+        stdout_metaclass.send(:define_method, :tty?, &method)
+      end
+    end
+  end
+end
+require_relative 'nose_cli/shared_options'
+# Require the various subcommands
+require_relative 'nose_cli/analyze'
+require_relative 'nose_cli/benchmark'
+require_relative 'nose_cli/collect_results'
+require_relative 'nose_cli/create'
+require_relative 'nose_cli/diff_plans'
+require_relative 'nose_cli/dump'
+require_relative 'nose_cli/export'
+require_relative 'nose_cli/execute'
+require_relative 'nose_cli/load'
+require_relative 'nose_cli/genworkload'
+require_relative 'nose_cli/graph'
+require_relative 'nose_cli/plan_schema'
+require_relative 'nose_cli/proxy'
+require_relative 'nose_cli/random_plans'
+require_relative 'nose_cli/reformat'
+require_relative 'nose_cli/repl'
+require_relative 'nose_cli/recost'
+require_relative 'nose_cli/search'
+require_relative 'nose_cli/search_all'
+require_relative 'nose_cli/search_bench'
+require_relative 'nose_cli/texify'
+require_relative 'nose_cli/why'
+# Only include the console command if pry is available
+begin
+  require 'pry'
+  require_relative 'nose_cli/console'
+rescue LoadError
+  nil
+end