nose-cli 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,241 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'table_print'
4
+
5
+ module NoSE
6
+ module CLI
7
+ # Run performance tests on plans for a particular schema
8
+ class NoSECLI < Thor
9
+ desc 'execute PLANS', 'test performance of the named PLANS'
10
+
11
+ long_desc <<-LONGDESC
12
+ `nose execute` is similar to `nose benchmark`. It will take manually
13
+ defined plans with the given name stored in the `plans` subdirectory,
14
+ execute each statement, and output a summary of the execution times.
15
+ Before runnng execute, `nose create` and `nose load` must be used to
16
+ prepare the target database.
17
+ LONGDESC
18
+
19
+ shared_option :mix
20
+
21
+ option :num_iterations, type: :numeric, default: 100,
22
+ banner: 'the number of times to execute each ' \
23
+ 'statement'
24
+ option :repeat, type: :numeric, default: 1,
25
+ banner: 'how many times to repeat the benchmark'
26
+ option :group, type: :string, default: nil, aliases: '-g',
27
+ banner: 'restrict the benchmark to statements in the ' \
28
+ 'given group'
29
+ option :fail_on_empty, type: :boolean, default: true,
30
+ banner: 'abort if a column family is empty'
31
+ option :totals, type: :boolean, default: false, aliases: '-t',
32
+ banner: 'whether to include group totals in the output'
33
+ option :format, type: :string, default: 'txt',
34
+ enum: %w(txt csv), aliases: '-f',
35
+ banner: 'the format of the output data'
36
+
37
+ def execute(plans_name)
38
+ # Load the execution plans
39
+ plans = Plans::ExecutionPlans.load plans_name
40
+
41
+ # Construct an instance of the backend
42
+ result = OpenStruct.new
43
+ result.workload = Workload.new plans.schema.model
44
+ result.workload.mix = options[:mix].to_sym \
45
+ unless options[:mix] == 'default' && result.workload.mix != :default
46
+ result.model = result.workload.model
47
+ result.indexes = plans.schema.indexes.values
48
+ backend = get_backend(options, result)
49
+
50
+ # Get sample index values to use in queries
51
+ index_values = index_values plans.schema.indexes.values, backend,
52
+ options[:num_iterations],
53
+ options[:fail_on_empty]
54
+
55
+ table = []
56
+ total = 0
57
+ plans.groups.each do |group, group_plans|
58
+ next if options[:group] && group != options[:group]
59
+
60
+ group_table = []
61
+ group_total = 0
62
+ group_weight = plans.weights[group][result.workload.mix]
63
+ next unless group_weight
64
+
65
+ group_plans.each do |plan|
66
+ next if options[:plan] && plan.name != options[:plan]
67
+
68
+ update = !plan.steps.last.is_a?(Plans::IndexLookupPlanStep)
69
+ method = update ? :bench_update : :bench_query
70
+ measurement = send method, backend, plans.schema.indexes.values,
71
+ plan, index_values,
72
+ options[:num_iterations],
73
+ options[:repeat], weight: group_weight
74
+
75
+ # Run the query and get the total time
76
+ group_total += measurement.mean
77
+ group_table << measurement
78
+ end
79
+
80
+ if options[:totals]
81
+ total_measurement = Measurements::Measurement.new nil, 'TOTAL'
82
+ total_measurement << group_table.map(&:weighted_mean) \
83
+ .inject(0, &:+)
84
+ group_table << total_measurement
85
+ end
86
+
87
+ table << OpenStruct.new(label: plans_name, group: group,
88
+ measurements: group_table)
89
+ group_total *= group_weight
90
+ total += group_total
91
+ end
92
+
93
+ if options[:totals]
94
+ total_measurement = Measurements::Measurement.new nil, 'TOTAL'
95
+ total_measurement << table.map do |group|
96
+ group.measurements.find { |m| m.name == 'TOTAL' }.mean
97
+ end.inject(0, &:+)
98
+ table << OpenStruct.new(label: plans_name, group: 'TOTAL',
99
+ measurements: [total_measurement])
100
+ end
101
+
102
+ case options[:format]
103
+ when 'txt'
104
+ output_table table
105
+ else
106
+ output_csv table
107
+ end
108
+ end
109
+
110
+ private
111
+
112
+ # Output the table of results
113
+ # @return [void]
114
+ def output_table(table)
115
+ columns = [
116
+ 'label', 'group',
117
+ { 'measurements.name' => { display_name: 'name' } },
118
+ { 'measurements.weight' => { display_name: 'weight' } },
119
+ { 'measurements.mean' => { display_name: 'mean' } },
120
+ { 'measurements.estimate' => { display_name: 'cost' } }
121
+ ]
122
+
123
+ tp table, *columns
124
+ end
125
+
126
+ # Output a CSV file of results
127
+ # @return [void]
128
+ def output_csv(table)
129
+ csv_str = CSV.generate do |csv|
130
+ csv << %w(label group name weight mean cost)
131
+
132
+ table.each do |group|
133
+ group.measurements.each do |measurement|
134
+ csv << [
135
+ group.label,
136
+ group.group,
137
+ measurement.name,
138
+ measurement.weight,
139
+ measurement.mean,
140
+ measurement.estimate
141
+ ]
142
+ end
143
+ end
144
+ end
145
+
146
+ puts csv_str
147
+ end
148
+
149
+ # Get the average execution time for a single query plan
150
+ # @return [Measurements::Measurement]
151
+ def bench_query(backend, indexes, plan, index_values, iterations, repeat,
152
+ weight: 1.0)
153
+
154
+ condition_list = execute_conditions plan.params, indexes, index_values,
155
+ iterations
156
+ prepared = backend.prepare_query nil, plan.select_fields, plan.params,
157
+ [plan.steps]
158
+
159
+ measurement = Measurements::Measurement.new plan, weight: weight
160
+
161
+ 1.upto(repeat) do
162
+ # Execute each plan and measure the time
163
+ start_time = Time.now.utc
164
+ condition_list.each { |conditions| prepared.execute conditions }
165
+ elapsed = Time.now.utc - start_time
166
+
167
+ measurement << (elapsed / iterations)
168
+ end
169
+
170
+ measurement
171
+ end
172
+
173
+ # Get the average execution time for a single update plan
174
+ # @return [Measurements::Measurement]
175
+ def bench_update(backend, indexes, plan, index_values,
176
+ iterations, repeat, weight: 1.0)
177
+ condition_list = execute_conditions plan.params, indexes, index_values,
178
+ iterations
179
+
180
+ # Get values for the fields which were provided as parameters
181
+ fields = plan.update_steps.last.fields.select do |field|
182
+ plan.params.key? field.id
183
+ end
184
+ setting_list = 1.upto(iterations).map do |i|
185
+ fields.map do |field|
186
+ # First check for IDs given as part of the query otherwise
187
+ # get the backend to generate a random ID or take a random value
188
+ condition = condition_list[i - 1][field.id]
189
+ value = if !condition.nil? && field.is_a?(Fields::IDField)
190
+ condition.value
191
+ elsif field.is_a?(Fields::IDField)
192
+ backend.generate_id
193
+ else
194
+ field.random_value
195
+ end
196
+
197
+ FieldSetting.new(field, value)
198
+ end
199
+ end
200
+
201
+ prepared = backend.prepare_update nil, [plan]
202
+
203
+ measurement = Measurements::Measurement.new plan, weight: weight
204
+
205
+ 1.upto(repeat) do
206
+ # Execute each plan and measure the time
207
+ start_time = Time.now.utc
208
+ setting_list.zip(condition_list).each do |settings, conditions|
209
+ prepared.each { |p| p.execute settings, conditions }
210
+ end
211
+ elapsed = Time.now.utc - start_time
212
+
213
+ measurement << (elapsed / iterations)
214
+ end
215
+
216
+ measurement
217
+ end
218
+
219
+ # Construct a list of values to be substituted in the plan
220
+ # @return [Array<Hash>]
221
+ def execute_conditions(params, indexes, index_values, iterations)
222
+ 1.upto(iterations).map do |i|
223
+ Hash[params.map do |field_id, condition|
224
+ value = nil
225
+ indexes.each do |index|
226
+ values = index_values[index]
227
+ next if values.empty?
228
+ value = values[i % values.length][condition.field.id]
229
+ break unless value.nil?
230
+ end
231
+
232
+ [
233
+ field_id,
234
+ Condition.new(condition.field, condition.operator, value)
235
+ ]
236
+ end]
237
+ end
238
+ end
239
+ end
240
+ end
241
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module CLI
5
+ # Add a command to generate a graphic of the schema from a workload
6
+ class NoSECLI < Thor
7
+ desc 'export', 'export the configuration as environment variables'
8
+
9
+ long_desc <<-LONGDESC
10
+ `nose export` reads the configuration file and outputs key-value pairs
11
+ suitable for use as environment variables.
12
+ LONGDESC
13
+
14
+ def export
15
+ export_value [], options
16
+ end
17
+
18
+ private
19
+
20
+ # Recursively export the values
21
+ # @return [void]
22
+ def export_value(path, value)
23
+ if value.is_a? Hash
24
+ value.each do |key, nested_value|
25
+ export_value path + [key], nested_value
26
+ end
27
+ elsif value.is_a? Array
28
+ # Append an integer index to each element of the array
29
+ export_value path + ['count'], value.length
30
+ value.each_with_index do |nested_value, i|
31
+ export_value path + [i], nested_value
32
+ end
33
+ else
34
+ puts "#{path.join('_').upcase}=\"#{value}\""
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module CLI
5
+ # Add a command to generate a workload file from a given loader
6
+ class NoSECLI < Thor
7
+ desc 'genworkload NAME',
8
+ 'generate a workload called NAME from the configured loader'
9
+
10
+ long_desc <<-LONGDESC
11
+ `nose genworkload` will produce a new file in the `workloads` directory
12
+ containing information on the workload from the configured loader.
13
+ LONGDESC
14
+
15
+ def genworkload(name)
16
+ loader_class = get_class 'loader', options
17
+ workload = loader_class.new.workload options[:loader]
18
+ File.open("./workloads/#{name}.rb", 'w') do |file|
19
+ file.write workload.source_code
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module CLI
5
+ # Add a command to generate a graphic of the schema from a workload
6
+ class NoSECLI < Thor
7
+ desc 'graph WORKLOAD FILE', 'output a FILE of the given WORKLOAD'
8
+
9
+ long_desc <<-LONGDESC
10
+ `nose graph` will produce a visual representation of the schema for the
11
+ named workload in the `workloads` directory.
12
+ LONGDESC
13
+
14
+ option :include_fields, type: :boolean, default: false, aliases: '-i',
15
+ desc: 'include each field in the output graph'
16
+
17
+ def graph(workload_name, filename)
18
+ workload = Workload.load workload_name
19
+ type = filename.split('.').last.to_sym
20
+ workload.model.output type, filename, options[:include_fields]
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module CLI
5
+ # Add a command to load index data into a backend from a configured loader
6
+ class NoSECLI < Thor
7
+ desc 'load PLAN_FILE_OR_SCHEMA',
8
+ 'create indexes from the given PLAN_FILE_OR_SCHEMA'
9
+
10
+ long_desc <<-LONGDESC
11
+ `nose load` will load a schema either from generated plan file from
12
+ `nose search` or a named schema in the `schemas` directory. It will
13
+ then populate the backend indexes as defined by the schema using data
14
+ from the configured loader. It assumes that the indexes have already
15
+ been created by `nose create`.
16
+ LONGDESC
17
+
18
+ option :progress, type: :boolean, default: true, aliases: '-p',
19
+ desc: 'whether to display an indication of progress'
20
+ option :limit, type: :numeric, default: nil, aliases: '-l',
21
+ desc: 'limit the number of entries loaded ' \
22
+ '(useful for testing)'
23
+ option :skip_nonempty, type: :boolean, default: true, aliases: '-s',
24
+ desc: 'ignore indexes which are not empty'
25
+
26
+ def load(*plan_files)
27
+ plan_files.each { |plan_file| load_plan plan_file, options }
28
+ end
29
+
30
+ private
31
+
32
+ # Load data from a single plan file
33
+ # @return [void]
34
+ def load_plan(plan_file, options)
35
+ result, backend = load_plans plan_file, options
36
+
37
+ # Create a new instance of the loader class and execute
38
+ loader = get_class('loader', options).new result.workload, backend
39
+ loader.load result.indexes, options[:loader], options[:progress],
40
+ options[:limit], options[:skip_nonempty]
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'descriptive_statistics/safe'
4
+ require 'forwardable'
5
+
6
+ module NoSE
7
+ # Storage and presentation of value from performance measusrements
8
+ module Measurements
9
+ # A measurement of a single statement execution time
10
+ class Measurement
11
+ attr_accessor :estimate
12
+ attr_reader :plan, :name, :weight
13
+
14
+ # Allow the values array to store numbers and compute stats
15
+ extend Forwardable
16
+ def_delegators :@values, :each, :<<, :size, :count, :length, :empty?
17
+
18
+ include Enumerable
19
+ include DescriptiveStatistics
20
+
21
+ def initialize(plan, name = nil, estimate = nil, weight: 1.0)
22
+ @plan = plan
23
+ @name = name || (plan && plan.name)
24
+ @estimate = estimate
25
+ @weight = weight
26
+ @values = []
27
+ end
28
+
29
+ # The mean weighted by this measurement weight
30
+ # @return [Fixnum]
31
+ def weighted_mean
32
+ @weight * mean
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module CLI
5
+ # Add a command to generate a graphic of the schema from a workload
6
+ class NoSECLI < Thor
7
+ desc 'plan-schema WORKLOAD SCHEMA',
8
+ 'output plans for the given WORKLOAD using SCHEMA'
9
+
10
+ long_desc <<-LONGDESC
11
+ `nose plan-schema` produces a set of plans for the given WORKLOAD
12
+ using the manually-defined SCHEMA.
13
+
14
+ This is useful to compare manually-defined execution plans with the
15
+ plans that NoSE would produce for the same schema.
16
+ LONGDESC
17
+
18
+ shared_option :format
19
+ shared_option :mix
20
+
21
+ def plan_schema(workload_name, schema_name)
22
+ workload = Workload.load workload_name
23
+ workload.mix = options[:mix].to_sym \
24
+ unless options[:mix] == 'default' && workload.mix != :default
25
+ schema = Schema.load schema_name
26
+ indexes = schema.indexes.values
27
+
28
+ # Build the statement plans
29
+ cost_model = get_class_from_config options, 'cost', :cost_model
30
+ planner = Plans::QueryPlanner.new workload, indexes, cost_model
31
+ trees = workload.queries.map { |q| planner.find_plans_for_query q }
32
+ plans = trees.map(&:min)
33
+
34
+ update_plans = build_update_plans workload.statements, indexes,
35
+ workload.model, trees, cost_model
36
+
37
+ # Construct a result set
38
+ results = plan_schema_results workload, indexes, plans, update_plans,
39
+ cost_model
40
+
41
+ # Output the results in the specified format
42
+ send(('output_' + options[:format]).to_sym, results)
43
+ end
44
+
45
+ private
46
+
47
+ # Construct a result set
48
+ # @return [OpenStruct]
49
+ def plan_schema_results(workload, indexes, plans, update_plans,
50
+ cost_model)
51
+ results = OpenStruct.new
52
+ results.workload = workload
53
+ results.model = workload.model
54
+ results.indexes = indexes
55
+ results.enumerated_indexes = []
56
+ results.plans = plans
57
+ results.update_plans = update_plans
58
+ results.cost_model = cost_model
59
+ results.weights = workload.statement_weights
60
+ results.total_size = results.indexes.sum_by(&:size)
61
+ results.total_cost = plans.sum_by { |plan| plan.cost * plan.weight }
62
+
63
+ results
64
+ end
65
+
66
+ # Produce all update plans for the schema
67
+ # @return [Array<Plans::UpdatePlan>]
68
+ def build_update_plans(statements, indexes, model, trees, cost_model)
69
+ planner = Plans::UpdatePlanner.new model, trees, cost_model
70
+ update_plans = []
71
+ statements.each do |statement|
72
+ next if statement.is_a? Query
73
+
74
+ planner.find_plans_for_update(statement, indexes).each do |plan|
75
+ plan.select_query_plans(indexes)
76
+ update_plans << plan
77
+ end
78
+ end
79
+
80
+ update_plans
81
+ end
82
+ end
83
+ end
84
+ end