RubyGems - nose-cli - Versions diffs - 0.1.0pre - Mend

nose-cli 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +7 -0
data/bin/nose +26 -0
data/bin/random_rubis +105 -0
data/bin/restart-cassandra.sh +20 -0
data/bin/run-experiments.sh +61 -0
data/data/nose-cli/nose.yml.example +32 -0
data/lib/nose_cli.rb +364 -0
data/lib/nose_cli/analyze.rb +94 -0
data/lib/nose_cli/benchmark.rb +145 -0
data/lib/nose_cli/collect_results.rb +55 -0
data/lib/nose_cli/console.rb +50 -0
data/lib/nose_cli/create.rb +35 -0
data/lib/nose_cli/diff_plans.rb +39 -0
data/lib/nose_cli/dump.rb +67 -0
data/lib/nose_cli/execute.rb +241 -0
data/lib/nose_cli/export.rb +39 -0
data/lib/nose_cli/genworkload.rb +24 -0
data/lib/nose_cli/graph.rb +24 -0
data/lib/nose_cli/load.rb +44 -0
data/lib/nose_cli/measurements.rb +36 -0
data/lib/nose_cli/plan_schema.rb +84 -0
data/lib/nose_cli/proxy.rb +32 -0
data/lib/nose_cli/random_plans.rb +82 -0
data/lib/nose_cli/recost.rb +45 -0
data/lib/nose_cli/reformat.rb +22 -0
data/lib/nose_cli/repl.rb +144 -0
data/lib/nose_cli/search.rb +77 -0
data/lib/nose_cli/search_all.rb +120 -0
data/lib/nose_cli/search_bench.rb +52 -0
data/lib/nose_cli/shared_options.rb +30 -0
data/lib/nose_cli/texify.rb +141 -0
data/lib/nose_cli/why.rb +70 -0
data/templates/completions.erb +56 -0
data/templates/man.erb +33 -0
data/templates/report.erb +138 -0
data/templates/subman.erb +19 -0
metadata +345 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 69482346fff002e27d56b7826eec5ab14f4626db
+  data.tar.gz: 07e536d085b0a88fb38f8650a940e4a54c18fbf6
+SHA512:
+  metadata.gz: 1c0e8ab04a53a5107a6ff9559f218eea28db5437d21597720c508867d5d737ce065a742a331ab0742df81bb85efe5f1c6b6737ec00cab204cad1879444b1e73b
+  data.tar.gz: 3e7ab9f5d4b30a71697a15cface20dbf99998f738504f0d83cba5b165cb3cfbad588bc380378c3dd637e5e2f021acffc908d03fe3abaad17fb687f66ac9da93f

data/bin/nose ADDED Viewed

@@ -0,0 +1,26 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Optionally enable debug logging
+ENV['NOSE_LOG'] = 'debug' if ARGV.include?('--debug') || ARGV.include?('-d')
+$LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
+require 'nose'
+require 'nose_cli'
+# Start profiling if asked
+unless ENV['NOSE_PROFILE'].nil?
+  require 'ruby-prof'
+  Parallel.instance_variable_set(:@processor_count, 0)
+  RubyProf.start
+end
+NoSE::CLI::NoSECLI.start ARGV
+# Stop profiling and output results
+unless ENV['NOSE_PROFILE'].to_i == 0
+  result = RubyProf.stop
+  printer = RubyProf::CallTreePrinter.new(result)
+  printer.print
+end

data/bin/random_rubis ADDED Viewed

@@ -0,0 +1,105 @@
+#!/usr/bin/env ruby
+# Get and print the seed which is used
+seed = ::Random.new_seed
+$stderr.puts "SEED #{seed}"
+::Random.srand seed
+require 'nose'
+# Record times for the longest running sets of methods
+times = {
+  indexes_for_workload: 0,
+  query_costs: 0,
+  update_costs: 0,
+  setup_model: 0,
+  solve: 0
+}
+NoSE::Timer.enable do |_cls, method, time|
+  times[method] = time if times.key? method
+end
+factor = ARGV[0].to_i
+# Create a random workload generator
+network = NoSE::Random::WattsStrogatzNetwork.new(nodes_nb: 7 * factor)
+workload = NoSE::Workload.new
+network.entities.each { |entity| workload << entity }
+sgen = NoSE::Random::StatementGenerator.new workload.model
+# Add random queries
+1.upto(30 * factor).each do |i|
+  path_length = rand > 0.9 ? 2 : 1
+  path_length = 3 if i <= factor
+  r = rand
+  conditions = if r > 0.95
+                 3
+               elsif r > 0.75
+                 2
+               else
+                 1
+               end
+  q = sgen.random_query path_length,
+                        3,
+                        conditions,
+                        rand > 0.9
+  $stderr.puts q.unparse
+  workload.add_statement q, 10
+end
+# Add random updates
+1.upto(3 * factor).each do
+  u = sgen.random_update 1, 2, 1
+  $stderr.puts u.text
+  workload.add_statement u
+end
+# Add random inserts
+1.upto(5 * factor).each do
+  i = sgen.random_insert
+  $stderr.puts i.text
+  workload.add_statement i
+end
+# Uncomment the lines below to enable profiling
+# (along with the lines above to save the output)
+# require 'ruby-prof'
+# Parallel.instance_variable_set(:@processor_count, 0)
+# RubyProf.start
+# Execute NoSE for the random workload and report the time
+start = Time.now.utc
+indexes = NoSE::IndexEnumerator.new(workload).indexes_for_workload.to_a
+search = NoSE::Search::Search.new(workload,
+                                  NoSE::Cost::RequestCountCost.new)
+search.search_overlap(indexes)
+elapsed = Time.now.utc - start
+# Output the timing values
+total = 0
+times[:costs] = times.delete(:query_costs) + times.delete(:update_costs)
+times.each do |key, time|
+  puts "#{key},#{time}"
+  total += time
+end
+puts "other,#{elapsed - total}"
+# Uncomment the lines below to save profile output
+# (along with the lines above to enable profiling)
+# result = RubyProf.stop
+# result.eliminate_methods!([
+#   /NoSE::Field#hash/,
+#   /Range#/,
+#   /Array#/,
+#   /Set#/,
+#   /Hash#/,
+#   /Integer#downto/,
+#   /Hashids#/,
+#   /String#/,
+#   /Enumerable#/,
+#   /Integer#times/,
+#   /Class#new/
+# ])
+# printer = RubyProf::CallTreePrinter.new(result)
+# printer.print(File.open('prof.out', 'w'))

data/bin/restart-cassandra.sh ADDED Viewed

@@ -0,0 +1,20 @@
+#!/bin/bash
+# Get the first data directory (we assume there's only one)
+# The backup directory is just this directory with -bk appended
+DATA_DIR=`grep data_file_directories -A 1 /etc/cassandra/cassandra.yaml | \
+          tail -n +2 | sed 's/^\s\+-\s\+//; s/\/[^/]*$//'`
+# Ideally sudo should be usable without a password so this can be automated
+# We simply stop Cassandra, restore old data from a backup and restart
+time sudo sh -c "service cassandra stop; \
+                 rm -rf $DATA_DIR; \
+                 cp -r $DATA_DIR-bk $DATA_DIR; \
+                 chown -R cassandra:cassandra $DATA_DIR; \
+                 service cassandra start"
+until cqlsh `hostname -i` -e "USE $1"
+do
+  echo 'Waiting for Cassandra...'
+  sleep 5
+done

data/bin/run-experiments.sh ADDED Viewed

@@ -0,0 +1,61 @@
+#!/bin/sh
+eval `bundle exec nose export`
+# The first argument should be the directory where results are stored
+RESULTS_DIR=$1
+REPEAT=1
+ITERATIONS=1000
+COMMON_OPTIONS="--num-iterations=$ITERATIONS --repeat=$REPEAT --format=csv"
+# Enable command output and fail on error
+set -e
+set -x
+mkdir -p $RESULTS_DIR
+# Passwordless SSH access must be set up to the backend host
+restart_cassandra() {
+  ssh $BACKEND_HOSTS_0 `pwd`/bin/restart_cassandra.sh $BACKEND_KEYSPACE
+}
+run_nose_search() {
+  bundle exec nose search rubis --format=json --mix=$1 > $RESULTS_DIR/$1.json
+}
+run_nose_search bidding
+restart_cassandra
+bundle exec nose benchmark $COMMON_OPTIONS --mix=bidding \
+  $RESULTS_DIR/bidding.json > $RESULTS_DIR/bidding.csv
+run_nose_search write_heavy
+restart_cassandra
+bundle exec nose benchmark $COMMON_OPTIONS --mix=write_heavy \
+  $RESULTS_DIR/write_heavy.json > $RESULTS_DIR/write_heavy.csv
+restart_cassandra
+bundle exec nose benchmark $COMMON_OPTIONS --mix=write_heavy \
+  $RESULTS_DIR/bidding.json > $RESULTS_DIR/bidding_write_heavy.csv
+restart_cassandra
+bundle exec nose execute $COMMON_OPTIONS --mix=bidding \
+  rubis_expert > $RESULTS_DIR/expert.csv
+restart_cassandra
+bundle exec nose execute $COMMON_OPTIONS --mix=write_heavy \
+  rubis_expert > $RESULTS_DIR/expert_write_heavy.csv
+restart_cassandra
+bundle exec nose execute $COMMON_OPTIONS --mix=bidding \
+  rubis_baseline > $RESULTS_DIR/baseline.csv
+restart_cassandra
+bundle exec nose execute $COMMON_OPTIONS --mix=write_heavy \
+  rubis_baseline > $RESULTS_DIR/baseline_write_heavy.csv

data/data/nose-cli/nose.yml.example ADDED Viewed

@@ -0,0 +1,32 @@
+# Connection to the backend database being targeted, currently only Cassandra
+backend:
+  name: cassandra
+  hosts:
+    - localhost
+  port: 9042
+  keyspace: nose
+# Cost model name and parameters
+cost_model:
+  name: request_count
+# Loader-specific configuration
+# The mysql loader is recommended, but csv might work as well if
+# your generated indexes all have path length one
+loader:
+  name: mysql
+  host: 127.0.0.1
+  database: rubis
+  username: root
+  password: root
+# Query proxy
+proxy:
+  name: mysql
+  port: 3307
+# vim: set syntax=yaml:
+# Local Variables:
+# mode:yaml
+# End:

data/lib/nose_cli.rb ADDED Viewed

@@ -0,0 +1,364 @@
+# frozen_string_literal: true
+require 'erb'
+require 'formatador'
+require 'parallel'
+require 'thor'
+require 'yaml'
+require 'nose'
+require_relative 'nose_cli/measurements'
+module NoSE
+  # CLI tools for running the advisor
+  module CLI
+    # A command-line interface to running the advisor tool
+    class NoSECLI < Thor
+      # The path to the configuration file in the working directory
+      CONFIG_FILE_NAME = 'nose.yml'
+      check_unknown_options!
+      class_option :debug, type: :boolean, aliases: '-d',
+                           desc: 'enable detailed debugging information'
+      class_option :parallel, type: :boolean, default: false,
+                              desc: 'run various operations in parallel'
+      class_option :colour, type: :boolean, default: nil, aliases: '-c',
+                            desc: 'enabled coloured output'
+      class_option :interactive, type: :boolean, default: true,
+                                 desc: 'allow actions which require user input'
+      def initialize(_options, local_options, config)
+        super
+        # Set up a logger for this command
+        cmd_name = config[:current_command].name
+        @logger = Logging.logger["nose::#{cmd_name}"]
+        # Peek ahead into the options and prompt the user to create a config
+        check_config_file interactive?(local_options)
+        force_colour(options[:colour]) unless options[:colour].nil?
+        # Disable parallel processing if desired
+        Parallel.instance_variable_set(:@processor_count, 0) \
+          unless options[:parallel]
+      end
+      private
+      # Check if the user has disabled interaction
+      # @return [Boolean]
+      def interactive?(options = [])
+        parse_options = self.class.class_options
+        opts = Thor::Options.new(parse_options).parse(options)
+        opts[:interactive]
+      end
+      # Check if the user has created a configuration file
+      # @return [void]
+      def check_config_file(interactive)
+        return if File.file?(CONFIG_FILE_NAME)
+        if interactive
+          no_create = no? 'nose.yml is missing, ' \
+                          'create from nose.yml.example? [Yn]'
+          example_cfg = File.join Gem.loaded_specs['nose-cli'].full_gem_path,
+                                  'data', 'nose-cli', 'nose.yml.example'
+          FileUtils.cp example_cfg, CONFIG_FILE_NAME unless no_create
+        else
+          @logger.warn 'Configuration file missing'
+        end
+      end
+      # Add the possibility to set defaults via configuration
+      # @return [Thor::CoreExt::HashWithIndifferentAccess]
+      def options
+        original_options = super
+        return original_options unless File.exist? CONFIG_FILE_NAME
+        defaults = YAML.load_file(CONFIG_FILE_NAME).deep_symbolize_keys || {}
+        Thor::CoreExt::HashWithIndifferentAccess \
+          .new(defaults.merge(original_options))
+      end
+      # Get a backend instance for a given configuration and dataset
+      # @return [Backend::BackendBase]
+      def get_backend(config, result)
+        be_class = get_class 'backend', config
+        be_class.new result.workload.model, result.indexes,
+                     result.plans, result.update_plans, config[:backend]
+      end
+      # Get a class of a particular name from the configuration
+      # @return [Object]
+      def get_class(class_name, config)
+        name = config
+        name = config[class_name.to_sym][:name] if config.is_a? Hash
+        require "nose/#{class_name}/#{name}"
+        name = name.split('_').map(&:capitalize).join
+        full_class_name = ['NoSE', class_name.capitalize,
+                           name + class_name.capitalize]
+        full_class_name.reduce(Object) do |mod, name_part|
+          mod.const_get name_part
+        end
+      end
+      # Get a class given a set of options
+      # @return [Object]
+      def get_class_from_config(options, name, type)
+        object_class = get_class name, options[type][:name]
+        object_class.new(**options[type])
+      end
+      # Collect all advisor results for schema design problem
+      # @return [Search::Results]
+      def search_result(workload, cost_model, max_space = Float::INFINITY,
+                        objective = Search::Objective::COST,
+                        by_id_graph = false)
+        enumerated_indexes = IndexEnumerator.new(workload) \
+                                            .indexes_for_workload.to_a
+        Search::Search.new(workload, cost_model, objective, by_id_graph) \
+                      .search_overlap enumerated_indexes, max_space
+      end
+      # Load results of a previous search operation
+      # @return [Search::Results]
+      def load_results(plan_file, mix = 'default')
+        representer = Serialize::SearchResultRepresenter.represent \
+          Search::Results.new
+        json = File.read(plan_file)
+        result = representer.from_json(json)
+        result.workload.mix = mix.to_sym unless \
+          mix.nil? || (mix == 'default' && result.workload.mix != :default)
+        result
+      end
+      # Load plans either from an explicit file or the name
+      # of something in the plans/ directory
+      def load_plans(plan_file, options)
+        if File.exist? plan_file
+          result = load_results(plan_file, options[:mix])
+        else
+          schema = Schema.load plan_file
+          result = OpenStruct.new
+          result.workload = Workload.new schema.model
+          result.indexes = schema.indexes.values
+        end
+        backend = get_backend(options, result)
+        [result, backend]
+      end
+      # Output a list of indexes as text
+      # @return [void]
+      def output_indexes_txt(header, indexes, file)
+        file.puts Formatador.parse("[blue]#{header}[/]")
+        indexes.sort_by(&:key).each { |index| file.puts index.inspect }
+        file.puts
+      end
+      # Output a list of query plans as text
+      # @return [void]
+      def output_plans_txt(plans, file, indent, weights)
+        plans.each do |plan|
+          weight = (plan.weight || weights[plan.query || plan.name])
+          next if weight.nil?
+          cost = plan.cost * weight
+          file.puts "GROUP #{plan.group}" unless plan.group.nil?
+          weight = " * #{weight} = #{cost}"
+          file.puts '  ' * (indent - 1) + plan.query.label \
+            unless plan.query.nil? || plan.query.label.nil?
+          file.puts '  ' * (indent - 1) + plan.query.inspect + weight
+          plan.each { |step| file.puts '  ' * indent + step.inspect }
+          file.puts
+        end
+      end
+      # Output update plans as text
+      # @return [void]
+      def output_update_plans_txt(update_plans, file, weights, mix = nil)
+        unless update_plans.empty?
+          header = "Update plans\n" + '━' * 50
+          file.puts Formatador.parse("[blue]#{header}[/]")
+        end
+        update_plans.group_by(&:statement).each do |statement, plans|
+          weight = if weights.key?(statement)
+                     weights[statement]
+                   elsif weights.key?(statement.group)
+                     weights[statement.group]
+                   else
+                     weights[statement.group][mix]
+                   end
+          next if weight.nil?
+          total_cost = plans.sum_by(&:cost)
+          file.puts "GROUP #{statement.group}" unless statement.group.nil?
+          file.puts statement.label unless statement.label.nil?
+          file.puts "#{statement.inspect} * #{weight} = #{total_cost * weight}"
+          plans.each do |plan|
+            file.puts Formatador.parse(" for [magenta]#{plan.index.key}[/] " \
+                                       "[yellow]$#{plan.cost}[/]")
+            query_weights = Hash[plan.query_plans.map do |query_plan|
+              [query_plan.query, weight]
+            end]
+            output_plans_txt plan.query_plans, file, 2, query_weights
+            plan.update_steps.each do |step|
+              file.puts '  ' + step.inspect
+            end
+            file.puts
+          end
+          file.puts "\n"
+        end
+      end
+      # Output the results of advising as text
+      # @return [void]
+      def output_txt(result, file = $stdout, enumerated = false,
+                     _backend = nil)
+        if enumerated
+          header = "Enumerated indexes\n" + '━' * 50
+          output_indexes_txt header, result.enumerated_indexes, file
+        end
+        # Output selected indexes
+        header = "Indexes\n" + '━' * 50
+        output_indexes_txt header, result.indexes, file
+        file.puts Formatador.parse('  Total size: ' \
+                                   "[blue]#{result.total_size}[/]\n\n")
+        # Output query plans for the discovered indices
+        header = "Query plans\n" + '━' * 50
+        file.puts Formatador.parse("[blue]#{header}[/]")
+        weights = result.workload.statement_weights
+        weights = result.weights if weights.nil? || weights.empty?
+        output_plans_txt result.plans, file, 1, weights
+        result.update_plans = [] if result.update_plans.nil?
+        output_update_plans_txt result.update_plans, file, weights,
+                                result.workload.mix
+        file.puts Formatador.parse('  Total cost: ' \
+                                   "[blue]#{result.total_cost}[/]\n")
+      end
+      # Output an HTML file with a description of the search results
+      # @return [void]
+      def output_html(result, file = $stdout, enumerated = false,
+                      backend = nil)
+        # Get an SVG diagram of the model
+        tmpfile = Tempfile.new %w(model svg)
+        result.workload.model.output :svg, tmpfile.path, true
+        svg = File.open(tmpfile.path).read
+        enumerated &&= result.enumerated_indexes
+        tmpl = File.read File.join(File.dirname(__FILE__),
+                                   '../../templates/report.erb')
+        ns = OpenStruct.new svg: svg,
+                            backend: backend,
+                            indexes: result.indexes,
+                            enumerated_indexes: enumerated,
+                            workload: result.workload,
+                            update_plans: result.update_plans,
+                            plans: result.plans,
+                            total_size: result.total_size,
+                            total_cost: result.total_cost
+        force_colour
+        file.write ERB.new(tmpl, nil, '>').result(ns.instance_eval { binding })
+      end
+      # Output the results of advising as JSON
+      # @return [void]
+      def output_json(result, file = $stdout, enumerated = false,
+                      _backend = nil)
+        # Temporarily remove the enumerated indexes
+        if enumerated
+          enumerated = result.enumerated_indexes
+          result.delete_field :enumerated_indexes
+        end
+        file.puts JSON.pretty_generate \
+          Serialize::SearchResultRepresenter.represent(result).to_hash
+        result.enumerated_indexes = enumerated if enumerated
+      end
+      # Output the results of advising as YAML
+      # @return [void]
+      def output_yml(result, file = $stdout, enumerated = false,
+                     _backend = nil)
+        # Temporarily remove the enumerated indexes
+        if enumerated
+          enumerated = result.enumerated_indexes
+          result.delete_field :enumerated_indexes
+        end
+        file.puts Serialize::SearchResultRepresenter.represent(result).to_yaml
+        result.enumerated_indexes = enumerated if enumerated
+      end
+      # Filter an options hash for those only relevant to a given command
+      # @return [Thor::CoreExt::HashWithIndifferentAccess]
+      def filter_command_options(opts, command)
+        Thor::CoreExt::HashWithIndifferentAccess.new(opts.select do |key|
+          self.class.commands[command].options \
+            .each_key.map(&:to_sym).include? key.to_sym
+        end)
+      end
+      # Enable forcing the colour or no colour for output
+      # We just lie to Formatador about whether or not $stdout is a tty
+      # @return [void]
+      def force_colour(colour = true)
+        stdout_metaclass = class << $stdout; self; end
+        method = colour ? ->() { true } : ->() { false }
+        stdout_metaclass.send(:define_method, :tty?, &method)
+      end
+    end
+  end
+end
+require_relative 'nose_cli/shared_options'
+# Require the various subcommands
+require_relative 'nose_cli/analyze'
+require_relative 'nose_cli/benchmark'
+require_relative 'nose_cli/collect_results'
+require_relative 'nose_cli/create'
+require_relative 'nose_cli/diff_plans'
+require_relative 'nose_cli/dump'
+require_relative 'nose_cli/export'
+require_relative 'nose_cli/execute'
+require_relative 'nose_cli/load'
+require_relative 'nose_cli/genworkload'
+require_relative 'nose_cli/graph'
+require_relative 'nose_cli/plan_schema'
+require_relative 'nose_cli/proxy'
+require_relative 'nose_cli/random_plans'
+require_relative 'nose_cli/reformat'
+require_relative 'nose_cli/repl'
+require_relative 'nose_cli/recost'
+require_relative 'nose_cli/search'
+require_relative 'nose_cli/search_all'
+require_relative 'nose_cli/search_bench'
+require_relative 'nose_cli/texify'
+require_relative 'nose_cli/why'
+# Only include the console command if pry is available
+begin
+  require 'pry'
+  require_relative 'nose_cli/console'
+rescue LoadError
+  nil
+end