nose 0.1.0pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,328 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logging'
4
+
5
+ begin
6
+ require 'mipper'
7
+ rescue LoadError
8
+ # We can't use most search functionality, but it won't explode
9
+ nil
10
+ end
11
+
12
+ module NoSE
13
+ module Search
14
+ # Simple enum for possible objective functions
15
+ module Objective
16
+ # Minimize the cost of statements in the workload
17
+ COST = 1
18
+
19
+ # Minimize the space usage of generated indexes
20
+ SPACE = 2
21
+
22
+ # Minimize the total number of indexes
23
+ INDEXES = 3
24
+ end
25
+
26
+ # A representation of a search problem as an ILP
27
+ class Problem
28
+ attr_reader :model, :status, :queries, :updates,
29
+ :index_vars, :query_vars, :indexes, :data,
30
+ :objective_type, :objective_value
31
+
32
+ def initialize(queries, updates, data, objective = Objective::COST)
33
+ @queries = queries
34
+ @updates = updates
35
+ @data = data
36
+ @indexes = @data[:costs].flat_map { |_, ic| ic.keys }.uniq
37
+ @logger = Logging.logger['nose::search::problem']
38
+ @status = nil
39
+ @objective_type = objective
40
+
41
+ setup_model
42
+ end
43
+
44
+ # Run the solver and make the selected indexes available
45
+ # @return [void]
46
+ def solve(previous_type = nil)
47
+ return unless @status.nil?
48
+
49
+ # Run the optimization
50
+ @model.optimize
51
+ @status = model.status
52
+ fail NoSolutionException, @status if @status != :optimized
53
+
54
+ # Store the objective value
55
+ @objective_value = @obj_var.value
56
+
57
+ if @objective_type != Objective::INDEXES && previous_type.nil?
58
+ solve_next Objective::INDEXES
59
+ return
60
+ elsif !previous_type.nil? && previous_type != Objective::SPACE
61
+ solve_next Objective::SPACE
62
+ return
63
+ elsif @objective_value.nil?
64
+ @objective_value = @model.objective_value
65
+ end
66
+
67
+ @logger.debug do
68
+ "Final objective value is #{@objective.inspect}" \
69
+ " = #{@objective_value}"
70
+ end
71
+ end
72
+
73
+ # Return the selected indices
74
+ # @return [Set<Index>]
75
+ def selected_indexes
76
+ return if @status.nil?
77
+ return @selected_indexes if @selected_indexes
78
+
79
+ @selected_indexes = @index_vars.each_key.select do |index|
80
+ @index_vars[index].value
81
+ end.to_set
82
+ end
83
+
84
+ # Return relevant data on the results of the ILP
85
+ # @return [Results]
86
+ def result
87
+ result = Results.new self, @data[:by_id_graph]
88
+ result.enumerated_indexes = indexes
89
+ result.indexes = selected_indexes
90
+
91
+ # TODO: Update for indexes grouped by ID path
92
+ result.total_size = selected_indexes.sum_by(&:size)
93
+ result.total_cost = @objective_value
94
+
95
+ result
96
+ end
97
+
98
+ # Get the size of all indexes in the workload
99
+ # @return [MIPPeR::LinExpr]
100
+ def total_size
101
+ # TODO: Update for indexes grouped by ID path
102
+ @indexes.map do |index|
103
+ @index_vars[index] * (index.size * 1.0)
104
+ end.reduce(&:+)
105
+ end
106
+
107
+ # Get the cost of all queries in the workload
108
+ # @return [MIPPeR::LinExpr]
109
+ def total_cost
110
+ cost = @queries.reduce(MIPPeR::LinExpr.new) do |expr, query|
111
+ expr.add(@indexes.reduce(MIPPeR::LinExpr.new) do |subexpr, index|
112
+ subexpr.add total_query_cost(@data[:costs][query][index],
113
+ @query_vars[index][query],
114
+ @sort_costs[query][index],
115
+ @sort_vars[query][index])
116
+ end)
117
+ end
118
+
119
+ cost = add_update_costs cost
120
+ cost
121
+ end
122
+
123
+ # The total number of indexes
124
+ # @return [MIPPeR::LinExpr]
125
+ def total_indexes
126
+ total = MIPPeR::LinExpr.new
127
+ @index_vars.each_value { |var| total += var * 1.0 }
128
+
129
+ total
130
+ end
131
+
132
+ private
133
+
134
+ # Pin the current objective value and set a new objective
135
+ # @return [void]
136
+ def solve_next(objective_type)
137
+ @obj_var.lower_bound = @objective_value
138
+ @obj_var.upper_bound = @objective_value
139
+
140
+ if objective_type == Objective::INDEXES
141
+ @objective_type = Objective::INDEXES
142
+ define_objective 'objective_indexes'
143
+ elsif objective_type == Objective::SPACE
144
+ @objective_type = Objective::SPACE
145
+ define_objective 'objective_space'
146
+ end
147
+
148
+ @status = nil
149
+ solve objective_type
150
+ end
151
+
152
+ # Write a model to a temporary file and log the file name
153
+ # @return [void]
154
+ def log_model(type)
155
+ @logger.debug do
156
+ tmpfile = Tempfile.new ['model', '.mps']
157
+ ObjectSpace.undefine_finalizer tmpfile
158
+ @model.write_mps tmpfile.path
159
+ "#{type} written to #{tmpfile.path}"
160
+ end
161
+ end
162
+
163
+ # Build the ILP by creating all the variables and constraints
164
+ # @return [void]
165
+ def setup_model
166
+ # Set up solver environment
167
+ @model = MIPPeR::CbcModel.new
168
+
169
+ add_variables
170
+ prepare_sort_costs
171
+ @model.update
172
+
173
+ add_constraints
174
+ define_objective
175
+ @model.update
176
+
177
+ log_model 'Model'
178
+ end
179
+
180
+ private
181
+
182
+ # Set the value of the objective function (workload cost)
183
+ # @return [void]
184
+ def define_objective(var_name = 'objective')
185
+ obj = case @objective_type
186
+ when Objective::COST
187
+ total_cost
188
+ when Objective::SPACE
189
+ total_size
190
+ when Objective::INDEXES
191
+ total_indexes
192
+ end
193
+
194
+ # Add the objective function as a variable
195
+ var_name = nil unless ENV['NOSE_LOG'] == 'debug'
196
+ @obj_var = MIPPeR::Variable.new 0, Float::INFINITY, 1.0,
197
+ :continuous, var_name
198
+ @model << @obj_var
199
+ @model.update
200
+
201
+ @model << MIPPeR::Constraint.new(obj + @obj_var * -1.0, :==, 0.0)
202
+
203
+ @logger.debug { "Objective function is #{obj.inspect}" }
204
+
205
+ @objective = obj
206
+ @model.sense = :min
207
+ end
208
+
209
+ # Initialize query and index variables
210
+ # @return [void]
211
+ def add_variables
212
+ @index_vars = {}
213
+ @query_vars = {}
214
+ @indexes.each do |index|
215
+ @query_vars[index] = {}
216
+ @queries.each_with_index do |query, q|
217
+ query_var = "q#{q}_#{index.key}" if ENV['NOSE_LOG'] == 'debug'
218
+ var = MIPPeR::Variable.new 0, 1, 0, :binary, query_var
219
+ @model << var
220
+ @query_vars[index][query] = var
221
+ end
222
+
223
+ var_name = index.key if ENV['NOSE_LOG'] == 'debug'
224
+ @index_vars[index] = MIPPeR::Variable.new 0, 1, 0, :binary, var_name
225
+
226
+ # If needed when grouping by ID graph, add an extra
227
+ # variable for the base index based on the ID graph
228
+ next unless @data[:by_id_graph]
229
+ id_graph = index.to_id_graph
230
+ next if id_graph == index
231
+
232
+ # Add a new variable for the ID graph if needed
233
+ unless @index_vars.key? id_graph
234
+ var_name = index.key if ENV['NOSE_LOG'] == 'debug'
235
+ @index_vars[id_graph] = MIPPeR::Variable.new 0, 1, 0, :binary,
236
+ var_name
237
+ end
238
+
239
+ # Ensure that the ID graph of this index is present if we use it
240
+ name = "ID_#{id_graph.key}_#{index.key}" \
241
+ if ENV['NOSE_LOG'] == 'debug'
242
+ constr = MIPPeR::Constraint.new @index_vars[id_graph] * 1.0 + \
243
+ @index_vars[index] * -1.0,
244
+ :>=, 0, name
245
+ @model << constr
246
+ end
247
+
248
+ @index_vars.each_value { |var| @model << var }
249
+ end
250
+
251
+ # Prepare variables and constraints to account for the cost of sorting
252
+ # @return [void]
253
+ def prepare_sort_costs
254
+ @sort_costs = {}
255
+ @sort_vars = {}
256
+ @data[:costs].each do |query, index_costs|
257
+ @sort_costs[query] = {}
258
+ @sort_vars[query] = {}
259
+
260
+ index_costs.each do |index, (steps, _)|
261
+ sort_step = steps.find { |s| s.is_a?(Plans::SortPlanStep) }
262
+ next if sort_step.nil?
263
+
264
+ @sort_costs[query][index] ||= sort_step.cost
265
+ q = @queries.index query
266
+
267
+ name = "s#{q}" if ENV['NOSE_LOG'] == 'debug'
268
+ sort_var = MIPPeR::Variable.new 0, 1, 0, :binary, name
269
+ @sort_vars[query][index] ||= sort_var
270
+ @model << sort_var
271
+
272
+ name = "q#{q}_#{index.key}_sort" if ENV['NOSE_LOG'] == 'debug'
273
+ constr = MIPPeR::Constraint.new @sort_vars[query][index] * 1.0 +
274
+ @query_vars[index][query] * -1.0,
275
+ :>=, 0, name
276
+ @model << constr
277
+ end
278
+ end
279
+ end
280
+
281
+ # Add all necessary constraints to the model
282
+ # @return [void]
283
+ def add_constraints
284
+ [
285
+ IndexPresenceConstraints,
286
+ SpaceConstraint,
287
+ CompletePlanConstraints
288
+ ].each { |constraint| constraint.apply self }
289
+
290
+ @logger.debug do
291
+ "Added #{@model.constraints.count} constraints to model"
292
+ end
293
+ end
294
+
295
+ # Deal with updates which do not require support queries
296
+ # @return [MIPPeR::LinExpr]
297
+ def add_update_costs(min_cost)
298
+ @updates.each do |update|
299
+ @indexes.each do |index|
300
+ index = index.to_id_graph if data[:by_id_graph]
301
+ next unless update.modifies_index?(index)
302
+
303
+ min_cost.add @index_vars[index] *
304
+ @data[:update_costs][update][index]
305
+ end
306
+ end
307
+
308
+ min_cost
309
+ end
310
+
311
+ # Get the total cost of the query for the objective function
312
+ # @return [MIPPeR::LinExpr]
313
+ def total_query_cost(cost, query_var, sort_cost, sort_var)
314
+ return MIPPeR::LinExpr.new if cost.nil?
315
+ query_cost = cost.last * 1.0
316
+
317
+ cost_expr = query_var * query_cost
318
+ cost_expr += sort_var * sort_cost unless sort_cost.nil?
319
+
320
+ cost_expr
321
+ end
322
+ end
323
+
324
+ # Thrown when no solution can be found to the ILP
325
+ class NoSolutionException < StandardError
326
+ end
327
+ end
328
+ end
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Search
5
+ # A container for results from a schema search
6
+ class Results
7
+ attr_reader :cost_model
8
+ attr_accessor :enumerated_indexes, :indexes, :total_size, :total_cost,
9
+ :workload, :update_plans, :plans,
10
+ :revision, :time, :command, :by_id_graph
11
+
12
+ def initialize(problem = nil, by_id_graph = false)
13
+ @problem = problem
14
+ return if problem.nil?
15
+ @by_id_graph = by_id_graph
16
+
17
+ # Find the indexes the ILP says the query should use
18
+ @query_indexes = Hash.new { |h, k| h[k] = Set.new }
19
+ @problem.query_vars.each do |index, query_vars|
20
+ query_vars.each do |query, var|
21
+ next unless var.value
22
+ @query_indexes[query].add index
23
+ end
24
+ end
25
+ end
26
+
27
+ # Provide access to the underlying model in the workload
28
+ # @return [Model]
29
+ def model
30
+ @workload.nil? ? @model : @workload.model
31
+ end
32
+
33
+ # Assign the model to the workload if it exists, otherwise store it
34
+ # @return [void]
35
+ def model=(model)
36
+ if @workload.nil?
37
+ @model = model
38
+ else
39
+ @workload.instance_variable_set :@model, model
40
+ end
41
+ end
42
+
43
+ # After setting the cost model, recalculate the cost
44
+ # @return [void]
45
+ def cost_model=(new_cost_model)
46
+ recalculate_cost new_cost_model
47
+ @cost_model = new_cost_model
48
+ end
49
+
50
+ # After setting the cost model, recalculate the cost
51
+ # @return [void]
52
+ def recalculate_cost(new_cost_model = nil)
53
+ new_cost_model = @cost_model if new_cost_model.nil?
54
+
55
+ (@plans || []).each do |plan|
56
+ plan.each { |s| s.calculate_cost new_cost_model }
57
+ end
58
+ (@update_plans || []).each do |plan|
59
+ plan.update_steps.each { |s| s.calculate_cost new_cost_model }
60
+ plan.query_plans.each do |query_plan|
61
+ query_plan.each { |s| s.calculate_cost new_cost_model }
62
+ end
63
+ end
64
+
65
+ # Recalculate the total
66
+ query_cost = (@plans || []).sum_by do |plan|
67
+ plan.cost * @workload.statement_weights[plan.query]
68
+ end
69
+ update_cost = (@update_plans || []).sum_by do |plan|
70
+ plan.cost * @workload.statement_weights[plan.statement]
71
+ end
72
+ @total_cost = query_cost + update_cost
73
+ end
74
+
75
+ # Validate that the results of the search are consistent
76
+ # @return [void]
77
+ def validate
78
+ validate_indexes
79
+ validate_query_indexes @plans
80
+ validate_update_indexes
81
+
82
+ planned_queries = plans.map(&:query).to_set
83
+ fail InvalidResultsException unless \
84
+ (@workload.queries.to_set - planned_queries).empty?
85
+ validate_query_plans @plans
86
+
87
+ validate_update_plans
88
+ validate_objective
89
+
90
+ freeze
91
+ end
92
+
93
+ # Set the query plans which should be used based on the entire tree
94
+ # @return [void]
95
+ def plans_from_trees(trees)
96
+ @plans = trees.map do |tree|
97
+ # Exclude support queries since they will be in update plans
98
+ query = tree.query
99
+ next if query.is_a?(SupportQuery)
100
+
101
+ select_plan tree
102
+ end.compact
103
+ end
104
+
105
+ # Select the single query plan from a tree of plans
106
+ # @return [Plans::QueryPlan]
107
+ # @raise [InvalidResultsException]
108
+ def select_plan(tree)
109
+ query = tree.query
110
+ plan = tree.find do |tree_plan|
111
+ tree_plan.indexes.to_set == @query_indexes[query]
112
+ end
113
+ plan.instance_variable_set :@workload, @workload
114
+
115
+ fail InvalidResultsException if plan.nil?
116
+ plan
117
+ end
118
+
119
+ private
120
+
121
+ # Check that the indexes selected were actually enumerated
122
+ # @return [void]
123
+ def validate_indexes
124
+ # We may not have enumerated ID graphs
125
+ check_indexes = @indexes.dup
126
+ @indexes.each do |index|
127
+ check_indexes.delete index.to_id_graph
128
+ end if @by_id_graph
129
+
130
+ fail InvalidResultsException unless \
131
+ (check_indexes - @enumerated_indexes).empty?
132
+ end
133
+
134
+ # Ensure we only have necessary update plans which use available indexes
135
+ # @return [void]
136
+ def validate_update_indexes
137
+ @update_plans.each do |plan|
138
+ validate_query_indexes plan.query_plans
139
+ valid_plan = @indexes.include?(plan.index)
140
+ fail InvalidResultsException unless valid_plan
141
+ end
142
+ end
143
+
144
+ # Check that the objective function has the expected value
145
+ # @return [void]
146
+ def validate_objective
147
+ if @problem.objective_type == Objective::COST
148
+ query_cost = @plans.reduce 0 do |sum, plan|
149
+ sum + @workload.statement_weights[plan.query] * plan.cost
150
+ end
151
+ update_cost = @update_plans.reduce 0 do |sum, plan|
152
+ sum + @workload.statement_weights[plan.statement] * plan.cost
153
+ end
154
+ cost = query_cost + update_cost
155
+
156
+ fail InvalidResultsException unless (cost - @total_cost).abs < 0.001
157
+ elsif @problem.objective_type == Objective::SPACE
158
+ size = @indexes.sum_by(&:size)
159
+ fail InvalidResultsException unless (size - @total_size).abs < 0.001
160
+ end
161
+ end
162
+
163
+ # Ensure that all the query plans use valid indexes
164
+ # @return [void]
165
+ def validate_query_indexes(plans)
166
+ plans.each do |plan|
167
+ plan.each do |step|
168
+ valid_plan = !step.is_a?(Plans::IndexLookupPlanStep) ||
169
+ @indexes.include?(step.index)
170
+ fail InvalidResultsException unless valid_plan
171
+ end
172
+ end
173
+ end
174
+
175
+ # Validate the query plans from the original workload
176
+ # @return [void]
177
+ def validate_query_plans(plans)
178
+ # Check that these indexes are actually used by the query
179
+ plans.each do |plan|
180
+ fail InvalidResultsException unless \
181
+ plan.indexes.to_set == @query_indexes[plan.query]
182
+ end
183
+ end
184
+
185
+ # Validate the support query plans for each update
186
+ # @return [void]
187
+ def validate_update_plans
188
+ @update_plans.each do |plan|
189
+ plan.instance_variable_set :@workload, @workload
190
+
191
+ validate_query_plans plan.query_plans
192
+ end
193
+ end
194
+ end
195
+
196
+ # Thrown when a search produces invalid results
197
+ class InvalidResultsException < StandardError
198
+ end
199
+ end
200
+ end