nose-cli 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'gruff'
5
+
6
+ module NoSE
7
+ module CLI
8
+ # Add a command to generate a graphic of the schema from a workload
9
+ class NoSECLI < Thor
10
+ desc 'analyze OUTPUT_FILE CSV_FILES',
11
+ 'output a graph to OUTPUT_FILE comparing the CSV_FILES'
12
+
13
+ long_desc <<-LONGDESC
14
+ `nose analyze` will create a graph comparing the runtimes from multiple
15
+ runs of different schemas for a particular workload. The CSV files
16
+ should be of the format produced from either `nose benchmark` or
17
+ `nose execute`.
18
+ LONGDESC
19
+
20
+ option :total, type: :boolean, default: false, aliases: '-t',
21
+ desc: 'whether to include a line for totals in the '\
22
+ 'graph'
23
+
24
+ def analyze(output_file, *csv_files)
25
+ # Load data from the files
26
+ data = load_data csv_files, options[:total]
27
+
28
+ # Set graph properties
29
+ g = Gruff::Bar.new '2000x800'
30
+ g.title = 'NoSE Schema Performance'
31
+ g.x_axis_label = '\nWorkload group'
32
+ g.y_axis_label = 'Weighted execution time (s)'
33
+ g.title_font_size = 20
34
+ g.legend_font_size = 10
35
+ g.marker_font_size = 10
36
+ g.label_stagger_height = 15
37
+ g.legend_box_size = 10
38
+ g.bar_spacing = 0.5
39
+
40
+ # Add each data element to the graph
41
+ data.each do |datum|
42
+ g.data datum.first['label'], datum.map { |row| row['mean'] }
43
+ end
44
+ g.labels = Hash[data.first.map.with_index do |row, n|
45
+ [n, row['group']]
46
+ end]
47
+
48
+ g.write output_file
49
+ end
50
+
51
+ private
52
+
53
+ # Load the data from the given list of CSV files
54
+ # @return [Array<Hash>]
55
+ def load_data(csv_files, total = false)
56
+ headers = nil
57
+ csv_files.map do |file|
58
+ lines = CSV.read(file)
59
+ headers = lines.first if headers.nil?
60
+
61
+ lines = lines[1..-1].map do |row|
62
+ Hash[headers.zip row]
63
+ end
64
+ grouped_lines = lines.group_by { |row| row['group'] }
65
+
66
+ rows = grouped_lines.map do |group, grouped_rows|
67
+ mean = grouped_rows.inject(0) { |sum, row| sum + row['mean'].to_f }
68
+ {
69
+ 'label' => grouped_rows.first['label'],
70
+ 'group' => group,
71
+ 'weight' => grouped_rows.first['weight'].to_f,
72
+ 'mean' => mean
73
+ }
74
+ end
75
+
76
+ # Add an additional row for the total
77
+ if total
78
+ total_weight = rows.map { |row| row['weight'] }.inject(0, &:+)
79
+ rows << {
80
+ 'label' => rows.first['label'],
81
+ 'group' => 'TOTAL',
82
+ 'weight' => 1.0,
83
+ 'mean' => rows.inject(0) do |sum, row|
84
+ sum + row['mean'] * row['weight'] / total_weight
85
+ end
86
+ }
87
+ end
88
+
89
+ rows
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'table_print'
5
+
6
+ module NoSE
7
+ module CLI
8
+ # Run performance tests on plans for a particular schema
9
+ class NoSECLI < Thor
10
+ desc 'benchmark PLAN_FILE', 'test performance of plans in PLAN_FILE'
11
+
12
+ long_desc <<-LONGDESC
13
+ `nose benchmark` will take a JSON file output by `nose search`,
14
+ execute each statement, and output a summary of the execution times.
15
+ Before runnng benchmark, `nose create` and `nose load` must be used to
16
+ prepare the target database.
17
+ LONGDESC
18
+
19
+ shared_option :mix
20
+
21
+ option :num_iterations, type: :numeric, default: 100,
22
+ banner: 'ITERATIONS',
23
+ desc: 'the number of times to execute each ' \
24
+ 'statement'
25
+ option :repeat, type: :numeric, default: 1,
26
+ desc: 'how many times to repeat the benchmark'
27
+ option :group, type: :string, default: nil, aliases: '-g',
28
+ desc: 'restrict the benchmark to statements in the ' \
29
+ 'given group'
30
+ option :fail_on_empty, type: :boolean, default: true,
31
+ desc: 'abort if a column family is empty'
32
+ option :totals, type: :boolean, default: false, aliases: '-t',
33
+ desc: 'whether to include group totals in the output'
34
+ option :format, type: :string, default: 'txt',
35
+ enum: %w(txt csv), aliases: '-f',
36
+ desc: 'the format of the output data'
37
+
38
+ def benchmark(plan_file)
39
+ label = File.basename plan_file, '.*'
40
+ result = load_results plan_file, options[:mix]
41
+
42
+ backend = get_backend(options, result)
43
+
44
+ index_values = index_values result.indexes, backend,
45
+ options[:num_iterations],
46
+ options[:fail_on_empty]
47
+
48
+ group_tables = Hash.new { |h, k| h[k] = [] }
49
+ group_totals = Hash.new { |h, k| h[k] = 0 }
50
+ result.plans.each do |plan|
51
+ query = plan.query
52
+ weight = result.workload.statement_weights[query]
53
+ next if query.is_a?(SupportQuery) || !weight
54
+ @logger.debug { "Executing #{query.text}" }
55
+
56
+ next unless options[:group].nil? || plan.group == options[:group]
57
+
58
+ indexes = plan.select do |step|
59
+ step.is_a? Plans::IndexLookupPlanStep
60
+ end.map(&:index)
61
+
62
+ measurement = bench_query backend, indexes, plan, index_values,
63
+ options[:num_iterations], options[:repeat],
64
+ weight: weight
65
+ next if measurement.empty?
66
+
67
+ measurement.estimate = plan.cost
68
+ group_totals[plan.group] += measurement.mean
69
+ group_tables[plan.group] << measurement
70
+ end
71
+
72
+ result.workload.updates.each do |update|
73
+ weight = result.workload.statement_weights[update]
74
+ next unless weight
75
+
76
+ plans = (result.update_plans || []).select do |possible_plan|
77
+ possible_plan.statement == update
78
+ end
79
+ next if plans.empty?
80
+
81
+ @logger.debug { "Executing #{update.text}" }
82
+
83
+ plans.each do |plan|
84
+ next unless options[:group].nil? || plan.group == options[:group]
85
+
86
+ # Get all indexes used by support queries
87
+ indexes = plan.query_plans.flat_map(&:indexes) << plan.index
88
+
89
+ measurement = bench_update backend, indexes, plan, index_values,
90
+ options[:num_iterations],
91
+ options[:repeat], weight: weight
92
+ next if measurement.empty?
93
+
94
+ measurement.estimate = plan.cost
95
+ group_totals[plan.group] += measurement.mean
96
+ group_tables[plan.group] << measurement
97
+ end
98
+ end
99
+
100
+ total = 0
101
+ table = []
102
+ group_totals.each do |group, group_total|
103
+ total += group_total
104
+ total_measurement = Measurements::Measurement.new nil, 'TOTAL'
105
+ group_table = group_tables[group]
106
+ total_measurement << group_table.map(&:weighted_mean) \
107
+ .inject(0, &:+)
108
+ group_table << total_measurement if options[:totals]
109
+ table << OpenStruct.new(label: label, group: group,
110
+ measurements: group_table)
111
+ end
112
+
113
+ if options[:totals]
114
+ total_measurement = Measurements::Measurement.new nil, 'TOTAL'
115
+ total_measurement << table.map do |group|
116
+ group.measurements.find { |m| m.name == 'TOTAL' }.mean
117
+ end.inject(0, &:+)
118
+ table << OpenStruct.new(label: label, group: 'TOTAL',
119
+ measurements: [total_measurement])
120
+ end
121
+
122
+ case options[:format]
123
+ when 'txt'
124
+ output_table table
125
+ else
126
+ output_csv table
127
+ end
128
+ end
129
+
130
+ private
131
+
132
+ # Get a sample of values from each index used by the queries
133
+ # @return [Hash]
134
+ def index_values(indexes, backend, iterations, fail_on_empty = true)
135
+ Hash[indexes.map do |index|
136
+ values = backend.index_sample(index, iterations).to_a
137
+ fail "Index #{index.key} is empty and will produce no results" \
138
+ if values.empty? && fail_on_empty
139
+
140
+ [index, values]
141
+ end]
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module CLI
5
+ # Add a command to generate a graphic of the schema from a workload
6
+ class NoSECLI < Thor
7
+ desc 'collect-results CSV_FILES',
8
+ 'collect results from CSV_FILES and produce a `gnuplot` data file'
9
+
10
+ long_desc <<-LONGDESC
11
+ `nose collect-results` combines results from multiple statement
12
+ execution runs and produces a data file which can be used to generate
13
+ clustered bar charts in `gnuplot`.
14
+ LONGDESC
15
+
16
+ option :total, type: :boolean, default: false, aliases: '-t',
17
+ desc: 'whether to include a line for totals in the '\
18
+ 'graph'
19
+
20
+ def collect_results(*csv_files)
21
+ # Load the data and output the header
22
+ data = load_data csv_files, options[:total]
23
+ labels = data.map { |datum| datum.first['label'] }
24
+ puts((['Group'] + labels).join("\t"))
25
+
26
+ # Output the mean for each schema
27
+ group_data(data).each { |group| collect_group_data group, data }
28
+ end
29
+
30
+ private
31
+
32
+ # Combine the results into groups
33
+ # @return [Array]
34
+ def group_data(data)
35
+ # Make sure we collect all rows, keeping the total last
36
+ groups = data.map { |d| d.map { |r| r['group'] } }.flatten.uniq
37
+ groups.delete 'TOTAL'
38
+ groups << 'TOTAL' if options[:total]
39
+
40
+ groups
41
+ end
42
+
43
+ # Collect the results for a single group
44
+ # @return [void]
45
+ def collect_group_data(group, data)
46
+ print group + "\t"
47
+ data.each do |datum|
48
+ row = datum.find { |r| r['group'] == group }
49
+ print((row.nil? ? '' : row['mean'].to_s) + "\t")
50
+ end
51
+ puts
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module CLI
5
+ # Start a pry console while preloading configured objects
6
+ class NoSECLI < Thor
7
+ desc 'console PLAN_FILE', 'open a pry console preconfigured with ' \
8
+ 'variables from the given PLAN_FILE'
9
+
10
+ long_desc <<-LONGDESC
11
+ `nose console` gives a convenient way to perform manual exploration of
12
+ generated plan data. It will load plans from the given file and then
13
+ define a number of variables containing this data. This includes all
14
+ instance variables in the `Search::Results` object as well as the
15
+ `model` used to generate the results, the `options` loaded from the
16
+ configuration file, and an instance of the configured `backend`.
17
+ LONGDESC
18
+
19
+ def console(plan_file)
20
+ # Load the results from the plan file and define each as a variable
21
+ result = load_results plan_file
22
+ expose_result result
23
+
24
+ # Also extract the model as a variable
25
+ TOPLEVEL_BINDING.local_variable_set :model, result.workload.model
26
+
27
+ # Load the options and backend as variables
28
+ TOPLEVEL_BINDING.local_variable_set :options, options
29
+ TOPLEVEL_BINDING.local_variable_set :backend,
30
+ get_backend(options, result)
31
+
32
+ TOPLEVEL_BINDING.pry
33
+ end
34
+
35
+ private
36
+
37
+ # Expose the properties of the results object for use in the console
38
+ # @return [void]
39
+ def expose_result(result)
40
+ exposed = result.instance_variables.map do |var|
41
+ var[1..-1].to_sym
42
+ end & result.methods
43
+
44
+ exposed.each do |name|
45
+ TOPLEVEL_BINDING.local_variable_set name, result.method(name).call
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module CLI
5
+ # Add a command for creating the index data structures in a backend
6
+ class NoSECLI < Thor
7
+ desc 'create PLAN_FILE_OR_SCHEMA',
8
+ 'create indexes from the given PLAN_FILE_OR_SCHEMA'
9
+
10
+ long_desc <<-LONGDESC
11
+ `nose create` will load a schema either from generated plan file from
12
+ `nose search` or a named schema in the `schemas` directory. It will
13
+ then create all the indexes in the configured backend.
14
+ LONGDESC
15
+
16
+ option :dry_run, type: :boolean, default: false,
17
+ desc: 'print the DDL, but do not execute'
18
+ option :skip_existing, type: :boolean, default: false, aliases: '-s',
19
+ desc: 'ignore indexes which already exist'
20
+ option :drop_existing, type: :boolean, default: false,
21
+ desc: 'drop existing indexes before recreation'
22
+
23
+ def create(*plan_files)
24
+ plan_files.each do |plan_file|
25
+ _, backend = load_plans plan_file, options
26
+
27
+ # Produce the DDL and execute unless the dry run option was given
28
+ backend.indexes_ddl(!options[:dry_run], options[:skip_existing],
29
+ options[:drop_existing]) \
30
+ .each { |ddl| puts ddl }
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module CLI
5
+ # Add a command to generate a graphic of the schema from a workload
6
+ class NoSECLI < Thor
7
+ desc 'diff-plans PLAN1 PLAN2',
8
+ 'output the differing plans between PLAN1 and PLAN2'
9
+
10
+ long_desc <<-LONGDESC
11
+ `nose diff-plans` loads two sets of statement plans generated
12
+ separately by `nose search` and outputs the plans which are different.
13
+ LONGDESC
14
+
15
+ def diff_plans(plan1, plan2)
16
+ result1 = load_results plan1
17
+ result2 = load_results plan2
18
+
19
+ output_diff plan1, result1, result2
20
+ output_diff plan2, result2, result1
21
+ end
22
+
23
+ private
24
+
25
+ # Output differing plans between two sets of results
26
+ # @return [void]
27
+ def output_diff(plan_name, result1, result2)
28
+ puts Formatador.parse("[blue]#{plan_name}\n" + '━' * 50 + '[/]')
29
+ plans1 = result1.plans.reject { |p| result2.plans.include?(p) }
30
+ output_plans_txt plans1, $stdout, 1, result1.workload.statement_weights
31
+ plans1 = result1.update_plans.reject do |plan|
32
+ result2.update_plans.include? plan
33
+ end
34
+ output_update_plans_txt plans1, $stdout,
35
+ result1.workload.statement_weights
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ostruct'
4
+
5
+ module NoSE
6
+ module CLI
7
+ # Add a command to dump a workload and its corresponding schema
8
+ class NoSECLI < Thor
9
+ desc 'dump PLANS', 'output the plans in PLANS'
10
+
11
+ long_desc <<-LONGDESC
12
+ `nose dump` will output results in the same format as `nose search`,
13
+ but for manually defined execution plans in the `plans` subdirectory.
14
+ LONGDESC
15
+
16
+ shared_option :format
17
+ shared_option :mix
18
+
19
+ def dump(plan_name)
20
+ plans = Plans::ExecutionPlans.load plan_name
21
+ plans.mix = options[:mix].to_sym \
22
+ unless options[:mix] == 'default' && plans.mix != :default
23
+
24
+ # Set the cost of each plan
25
+ cost_model = get_class_from_config options, 'cost', :cost_model
26
+ plans.calculate_cost cost_model
27
+
28
+ results = OpenStruct.new
29
+ results.workload = Workload.new plans.schema.model
30
+ results.workload.mix = plans.mix
31
+ results.model = results.workload.model
32
+ results.indexes = plans.schema.indexes.values
33
+ results.enumerated_indexes = []
34
+
35
+ results.plans = []
36
+ results.update_plans = []
37
+
38
+ # Store all the query and update plans
39
+ plans.groups.values.flatten(1).each do |plan|
40
+ if plan.update_steps.empty?
41
+ results.plans << plan
42
+ else
43
+ # XXX: Hack to build a valid update plan
44
+ statement = OpenStruct.new group: plan.group
45
+ update_plan = Plans::UpdatePlan.new statement, plan.index, nil,
46
+ plan.update_steps, cost_model
47
+ update_plan.instance_variable_set :@group, plan.group
48
+ update_plan.instance_variable_set :@query_plans, plan.query_plans
49
+ results.update_plans << update_plan
50
+ end
51
+ end
52
+
53
+ results.cost_model = cost_model
54
+ results.weights = Hash[plans.weights.map { |g, w| [g, w[plans.mix]] }]
55
+ results.total_size = results.indexes.sum_by(&:size)
56
+ results.total_cost = plans.groups.values.flatten(1).sum_by do |plan|
57
+ next 0 if plan.weight.nil?
58
+
59
+ plan.cost * plan.weight
60
+ end
61
+
62
+ # Output the results in the specified format
63
+ send(('output_' + options[:format]).to_sym, results)
64
+ end
65
+ end
66
+ end
67
+ end