nose 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,328 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logging'
4
+
5
+ begin
6
+ require 'mipper'
7
+ rescue LoadError
8
+ # We can't use most search functionality, but it won't explode
9
+ nil
10
+ end
11
+
12
+ module NoSE
13
+ module Search
14
+ # Simple enum for possible objective functions
15
+ module Objective
16
+ # Minimize the cost of statements in the workload
17
+ COST = 1
18
+
19
+ # Minimize the space usage of generated indexes
20
+ SPACE = 2
21
+
22
+ # Minimize the total number of indexes
23
+ INDEXES = 3
24
+ end
25
+
26
+ # A representation of a search problem as an ILP
27
+ class Problem
28
+ attr_reader :model, :status, :queries, :updates,
29
+ :index_vars, :query_vars, :indexes, :data,
30
+ :objective_type, :objective_value
31
+
32
+ def initialize(queries, updates, data, objective = Objective::COST)
33
+ @queries = queries
34
+ @updates = updates
35
+ @data = data
36
+ @indexes = @data[:costs].flat_map { |_, ic| ic.keys }.uniq
37
+ @logger = Logging.logger['nose::search::problem']
38
+ @status = nil
39
+ @objective_type = objective
40
+
41
+ setup_model
42
+ end
43
+
44
+ # Run the solver and make the selected indexes available
45
+ # @return [void]
46
+ def solve(previous_type = nil)
47
+ return unless @status.nil?
48
+
49
+ # Run the optimization
50
+ @model.optimize
51
+ @status = model.status
52
+ fail NoSolutionException, @status if @status != :optimized
53
+
54
+ # Store the objective value
55
+ @objective_value = @obj_var.value
56
+
57
+ if @objective_type != Objective::INDEXES && previous_type.nil?
58
+ solve_next Objective::INDEXES
59
+ return
60
+ elsif !previous_type.nil? && previous_type != Objective::SPACE
61
+ solve_next Objective::SPACE
62
+ return
63
+ elsif @objective_value.nil?
64
+ @objective_value = @model.objective_value
65
+ end
66
+
67
+ @logger.debug do
68
+ "Final objective value is #{@objective.inspect}" \
69
+ " = #{@objective_value}"
70
+ end
71
+ end
72
+
73
+ # Return the selected indices
74
+ # @return [Set<Index>]
75
+ def selected_indexes
76
+ return if @status.nil?
77
+ return @selected_indexes if @selected_indexes
78
+
79
+ @selected_indexes = @index_vars.each_key.select do |index|
80
+ @index_vars[index].value
81
+ end.to_set
82
+ end
83
+
84
+ # Return relevant data on the results of the ILP
85
+ # @return [Results]
86
+ def result
87
+ result = Results.new self, @data[:by_id_graph]
88
+ result.enumerated_indexes = indexes
89
+ result.indexes = selected_indexes
90
+
91
+ # TODO: Update for indexes grouped by ID path
92
+ result.total_size = selected_indexes.sum_by(&:size)
93
+ result.total_cost = @objective_value
94
+
95
+ result
96
+ end
97
+
98
+ # Get the size of all indexes in the workload
99
+ # @return [MIPPeR::LinExpr]
100
+ def total_size
101
+ # TODO: Update for indexes grouped by ID path
102
+ @indexes.map do |index|
103
+ @index_vars[index] * (index.size * 1.0)
104
+ end.reduce(&:+)
105
+ end
106
+
107
+ # Get the cost of all queries in the workload
108
+ # @return [MIPPeR::LinExpr]
109
+ def total_cost
110
+ cost = @queries.reduce(MIPPeR::LinExpr.new) do |expr, query|
111
+ expr.add(@indexes.reduce(MIPPeR::LinExpr.new) do |subexpr, index|
112
+ subexpr.add total_query_cost(@data[:costs][query][index],
113
+ @query_vars[index][query],
114
+ @sort_costs[query][index],
115
+ @sort_vars[query][index])
116
+ end)
117
+ end
118
+
119
+ cost = add_update_costs cost
120
+ cost
121
+ end
122
+
123
+ # The total number of indexes
124
+ # @return [MIPPeR::LinExpr]
125
+ def total_indexes
126
+ total = MIPPeR::LinExpr.new
127
+ @index_vars.each_value { |var| total += var * 1.0 }
128
+
129
+ total
130
+ end
131
+
132
+ private
133
+
134
+ # Pin the current objective value and set a new objective
135
+ # @return [void]
136
+ def solve_next(objective_type)
137
+ @obj_var.lower_bound = @objective_value
138
+ @obj_var.upper_bound = @objective_value
139
+
140
+ if objective_type == Objective::INDEXES
141
+ @objective_type = Objective::INDEXES
142
+ define_objective 'objective_indexes'
143
+ elsif objective_type == Objective::SPACE
144
+ @objective_type = Objective::SPACE
145
+ define_objective 'objective_space'
146
+ end
147
+
148
+ @status = nil
149
+ solve objective_type
150
+ end
151
+
152
+ # Write a model to a temporary file and log the file name
153
+ # @return [void]
154
+ def log_model(type)
155
+ @logger.debug do
156
+ tmpfile = Tempfile.new ['model', '.mps']
157
+ ObjectSpace.undefine_finalizer tmpfile
158
+ @model.write_mps tmpfile.path
159
+ "#{type} written to #{tmpfile.path}"
160
+ end
161
+ end
162
+
163
+ # Build the ILP by creating all the variables and constraints
164
+ # @return [void]
165
+ def setup_model
166
+ # Set up solver environment
167
+ @model = MIPPeR::CbcModel.new
168
+
169
+ add_variables
170
+ prepare_sort_costs
171
+ @model.update
172
+
173
+ add_constraints
174
+ define_objective
175
+ @model.update
176
+
177
+ log_model 'Model'
178
+ end
179
+
180
+ private
181
+
182
+ # Set the value of the objective function (workload cost)
183
+ # @return [void]
184
+ def define_objective(var_name = 'objective')
185
+ obj = case @objective_type
186
+ when Objective::COST
187
+ total_cost
188
+ when Objective::SPACE
189
+ total_size
190
+ when Objective::INDEXES
191
+ total_indexes
192
+ end
193
+
194
+ # Add the objective function as a variable
195
+ var_name = nil unless ENV['NOSE_LOG'] == 'debug'
196
+ @obj_var = MIPPeR::Variable.new 0, Float::INFINITY, 1.0,
197
+ :continuous, var_name
198
+ @model << @obj_var
199
+ @model.update
200
+
201
+ @model << MIPPeR::Constraint.new(obj + @obj_var * -1.0, :==, 0.0)
202
+
203
+ @logger.debug { "Objective function is #{obj.inspect}" }
204
+
205
+ @objective = obj
206
+ @model.sense = :min
207
+ end
208
+
209
+ # Initialize query and index variables
210
+ # @return [void]
211
+ def add_variables
212
+ @index_vars = {}
213
+ @query_vars = {}
214
+ @indexes.each do |index|
215
+ @query_vars[index] = {}
216
+ @queries.each_with_index do |query, q|
217
+ query_var = "q#{q}_#{index.key}" if ENV['NOSE_LOG'] == 'debug'
218
+ var = MIPPeR::Variable.new 0, 1, 0, :binary, query_var
219
+ @model << var
220
+ @query_vars[index][query] = var
221
+ end
222
+
223
+ var_name = index.key if ENV['NOSE_LOG'] == 'debug'
224
+ @index_vars[index] = MIPPeR::Variable.new 0, 1, 0, :binary, var_name
225
+
226
+ # If needed when grouping by ID graph, add an extra
227
+ # variable for the base index based on the ID graph
228
+ next unless @data[:by_id_graph]
229
+ id_graph = index.to_id_graph
230
+ next if id_graph == index
231
+
232
+ # Add a new variable for the ID graph if needed
233
+ unless @index_vars.key? id_graph
234
+ var_name = index.key if ENV['NOSE_LOG'] == 'debug'
235
+ @index_vars[id_graph] = MIPPeR::Variable.new 0, 1, 0, :binary,
236
+ var_name
237
+ end
238
+
239
+ # Ensure that the ID graph of this index is present if we use it
240
+ name = "ID_#{id_graph.key}_#{index.key}" \
241
+ if ENV['NOSE_LOG'] == 'debug'
242
+ constr = MIPPeR::Constraint.new @index_vars[id_graph] * 1.0 + \
243
+ @index_vars[index] * -1.0,
244
+ :>=, 0, name
245
+ @model << constr
246
+ end
247
+
248
+ @index_vars.each_value { |var| @model << var }
249
+ end
250
+
251
+ # Prepare variables and constraints to account for the cost of sorting
252
+ # @return [void]
253
+ def prepare_sort_costs
254
+ @sort_costs = {}
255
+ @sort_vars = {}
256
+ @data[:costs].each do |query, index_costs|
257
+ @sort_costs[query] = {}
258
+ @sort_vars[query] = {}
259
+
260
+ index_costs.each do |index, (steps, _)|
261
+ sort_step = steps.find { |s| s.is_a?(Plans::SortPlanStep) }
262
+ next if sort_step.nil?
263
+
264
+ @sort_costs[query][index] ||= sort_step.cost
265
+ q = @queries.index query
266
+
267
+ name = "s#{q}" if ENV['NOSE_LOG'] == 'debug'
268
+ sort_var = MIPPeR::Variable.new 0, 1, 0, :binary, name
269
+ @sort_vars[query][index] ||= sort_var
270
+ @model << sort_var
271
+
272
+ name = "q#{q}_#{index.key}_sort" if ENV['NOSE_LOG'] == 'debug'
273
+ constr = MIPPeR::Constraint.new @sort_vars[query][index] * 1.0 +
274
+ @query_vars[index][query] * -1.0,
275
+ :>=, 0, name
276
+ @model << constr
277
+ end
278
+ end
279
+ end
280
+
281
+ # Add all necessary constraints to the model
282
+ # @return [void]
283
+ def add_constraints
284
+ [
285
+ IndexPresenceConstraints,
286
+ SpaceConstraint,
287
+ CompletePlanConstraints
288
+ ].each { |constraint| constraint.apply self }
289
+
290
+ @logger.debug do
291
+ "Added #{@model.constraints.count} constraints to model"
292
+ end
293
+ end
294
+
295
+ # Deal with updates which do not require support queries
296
+ # @return [MIPPeR::LinExpr]
297
+ def add_update_costs(min_cost)
298
+ @updates.each do |update|
299
+ @indexes.each do |index|
300
+ index = index.to_id_graph if data[:by_id_graph]
301
+ next unless update.modifies_index?(index)
302
+
303
+ min_cost.add @index_vars[index] *
304
+ @data[:update_costs][update][index]
305
+ end
306
+ end
307
+
308
+ min_cost
309
+ end
310
+
311
+ # Get the total cost of the query for the objective function
312
+ # @return [MIPPeR::LinExpr]
313
+ def total_query_cost(cost, query_var, sort_cost, sort_var)
314
+ return MIPPeR::LinExpr.new if cost.nil?
315
+ query_cost = cost.last * 1.0
316
+
317
+ cost_expr = query_var * query_cost
318
+ cost_expr += sort_var * sort_cost unless sort_cost.nil?
319
+
320
+ cost_expr
321
+ end
322
+ end
323
+
324
+ # Thrown when no solution can be found to the ILP
325
+ class NoSolutionException < StandardError
326
+ end
327
+ end
328
+ end
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Search
5
+ # A container for results from a schema search
6
+ class Results
7
+ attr_reader :cost_model
8
+ attr_accessor :enumerated_indexes, :indexes, :total_size, :total_cost,
9
+ :workload, :update_plans, :plans,
10
+ :revision, :time, :command, :by_id_graph
11
+
12
+ def initialize(problem = nil, by_id_graph = false)
13
+ @problem = problem
14
+ return if problem.nil?
15
+ @by_id_graph = by_id_graph
16
+
17
+ # Find the indexes the ILP says the query should use
18
+ @query_indexes = Hash.new { |h, k| h[k] = Set.new }
19
+ @problem.query_vars.each do |index, query_vars|
20
+ query_vars.each do |query, var|
21
+ next unless var.value
22
+ @query_indexes[query].add index
23
+ end
24
+ end
25
+ end
26
+
27
+ # Provide access to the underlying model in the workload
28
+ # @return [Model]
29
+ def model
30
+ @workload.nil? ? @model : @workload.model
31
+ end
32
+
33
+ # Assign the model to the workload if it exists, otherwise store it
34
+ # @return [void]
35
+ def model=(model)
36
+ if @workload.nil?
37
+ @model = model
38
+ else
39
+ @workload.instance_variable_set :@model, model
40
+ end
41
+ end
42
+
43
+ # After setting the cost model, recalculate the cost
44
+ # @return [void]
45
+ def cost_model=(new_cost_model)
46
+ recalculate_cost new_cost_model
47
+ @cost_model = new_cost_model
48
+ end
49
+
50
+ # After setting the cost model, recalculate the cost
51
+ # @return [void]
52
+ def recalculate_cost(new_cost_model = nil)
53
+ new_cost_model = @cost_model if new_cost_model.nil?
54
+
55
+ (@plans || []).each do |plan|
56
+ plan.each { |s| s.calculate_cost new_cost_model }
57
+ end
58
+ (@update_plans || []).each do |plan|
59
+ plan.update_steps.each { |s| s.calculate_cost new_cost_model }
60
+ plan.query_plans.each do |query_plan|
61
+ query_plan.each { |s| s.calculate_cost new_cost_model }
62
+ end
63
+ end
64
+
65
+ # Recalculate the total
66
+ query_cost = (@plans || []).sum_by do |plan|
67
+ plan.cost * @workload.statement_weights[plan.query]
68
+ end
69
+ update_cost = (@update_plans || []).sum_by do |plan|
70
+ plan.cost * @workload.statement_weights[plan.statement]
71
+ end
72
+ @total_cost = query_cost + update_cost
73
+ end
74
+
75
+ # Validate that the results of the search are consistent
76
+ # @return [void]
77
+ def validate
78
+ validate_indexes
79
+ validate_query_indexes @plans
80
+ validate_update_indexes
81
+
82
+ planned_queries = plans.map(&:query).to_set
83
+ fail InvalidResultsException unless \
84
+ (@workload.queries.to_set - planned_queries).empty?
85
+ validate_query_plans @plans
86
+
87
+ validate_update_plans
88
+ validate_objective
89
+
90
+ freeze
91
+ end
92
+
93
+ # Set the query plans which should be used based on the entire tree
94
+ # @return [void]
95
+ def plans_from_trees(trees)
96
+ @plans = trees.map do |tree|
97
+ # Exclude support queries since they will be in update plans
98
+ query = tree.query
99
+ next if query.is_a?(SupportQuery)
100
+
101
+ select_plan tree
102
+ end.compact
103
+ end
104
+
105
+ # Select the single query plan from a tree of plans
106
+ # @return [Plans::QueryPlan]
107
+ # @raise [InvalidResultsException]
108
+ def select_plan(tree)
109
+ query = tree.query
110
+ plan = tree.find do |tree_plan|
111
+ tree_plan.indexes.to_set == @query_indexes[query]
112
+ end
113
+ plan.instance_variable_set :@workload, @workload
114
+
115
+ fail InvalidResultsException if plan.nil?
116
+ plan
117
+ end
118
+
119
+ private
120
+
121
+ # Check that the indexes selected were actually enumerated
122
+ # @return [void]
123
+ def validate_indexes
124
+ # We may not have enumerated ID graphs
125
+ check_indexes = @indexes.dup
126
+ @indexes.each do |index|
127
+ check_indexes.delete index.to_id_graph
128
+ end if @by_id_graph
129
+
130
+ fail InvalidResultsException unless \
131
+ (check_indexes - @enumerated_indexes).empty?
132
+ end
133
+
134
+ # Ensure we only have necessary update plans which use available indexes
135
+ # @return [void]
136
+ def validate_update_indexes
137
+ @update_plans.each do |plan|
138
+ validate_query_indexes plan.query_plans
139
+ valid_plan = @indexes.include?(plan.index)
140
+ fail InvalidResultsException unless valid_plan
141
+ end
142
+ end
143
+
144
+ # Check that the objective function has the expected value
145
+ # @return [void]
146
+ def validate_objective
147
+ if @problem.objective_type == Objective::COST
148
+ query_cost = @plans.reduce 0 do |sum, plan|
149
+ sum + @workload.statement_weights[plan.query] * plan.cost
150
+ end
151
+ update_cost = @update_plans.reduce 0 do |sum, plan|
152
+ sum + @workload.statement_weights[plan.statement] * plan.cost
153
+ end
154
+ cost = query_cost + update_cost
155
+
156
+ fail InvalidResultsException unless (cost - @total_cost).abs < 0.001
157
+ elsif @problem.objective_type == Objective::SPACE
158
+ size = @indexes.sum_by(&:size)
159
+ fail InvalidResultsException unless (size - @total_size).abs < 0.001
160
+ end
161
+ end
162
+
163
+ # Ensure that all the query plans use valid indexes
164
+ # @return [void]
165
+ def validate_query_indexes(plans)
166
+ plans.each do |plan|
167
+ plan.each do |step|
168
+ valid_plan = !step.is_a?(Plans::IndexLookupPlanStep) ||
169
+ @indexes.include?(step.index)
170
+ fail InvalidResultsException unless valid_plan
171
+ end
172
+ end
173
+ end
174
+
175
+ # Validate the query plans from the original workload
176
+ # @return [void]
177
+ def validate_query_plans(plans)
178
+ # Check that these indexes are actually used by the query
179
+ plans.each do |plan|
180
+ fail InvalidResultsException unless \
181
+ plan.indexes.to_set == @query_indexes[plan.query]
182
+ end
183
+ end
184
+
185
+ # Validate the support query plans for each update
186
+ # @return [void]
187
+ def validate_update_plans
188
+ @update_plans.each do |plan|
189
+ plan.instance_variable_set :@workload, @workload
190
+
191
+ validate_query_plans plan.query_plans
192
+ end
193
+ end
194
+ end
195
+
196
+ # Thrown when a search produces invalid results
197
+ class InvalidResultsException < StandardError
198
+ end
199
+ end
200
+ end