nose 0.1.0pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,266 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'search/constraints'
4
+ require_relative 'search/problem'
5
+ require_relative 'search/results'
6
+
7
+ require 'logging'
8
+ require 'ostruct'
9
+ require 'tempfile'
10
+
11
+ module NoSE
12
+ # ILP construction and schema search
13
+ module Search
14
+ # Searches for the optimal indices for a given workload
15
+ class Search
16
+ def initialize(workload, cost_model, objective = Objective::COST,
17
+ by_id_graph = false)
18
+ @logger = Logging.logger['nose::search']
19
+ @workload = workload
20
+ @cost_model = cost_model
21
+ @objective = objective
22
+ @by_id_graph = by_id_graph
23
+
24
+ # For now we only support optimization based on cost when grouping by
25
+ # ID graphs, but support for other objectives is still feasible
26
+ fail 'Only cost-based optimization allowed when using ID graphs' \
27
+ if @by_id_graph && objective != Objective::COST
28
+ end
29
+
30
+ # Search for optimal indices using an ILP which searches for
31
+ # non-overlapping indices
32
+ # @return [Results]
33
+ def search_overlap(indexes, max_space = Float::INFINITY)
34
+ return if indexes.empty?
35
+
36
+ # Get the costs of all queries and updates
37
+ query_weights = combine_query_weights indexes
38
+ costs, trees = query_costs query_weights, indexes
39
+ update_costs, update_plans = update_costs trees, indexes
40
+
41
+ log_search_start costs, query_weights
42
+
43
+ solver_params = {
44
+ max_space: max_space,
45
+ costs: costs,
46
+ update_costs: update_costs,
47
+ cost_model: @cost_model,
48
+ by_id_graph: @by_id_graph
49
+ }
50
+ search_result query_weights, indexes, solver_params, trees,
51
+ update_plans
52
+ end
53
+
54
+ private
55
+
56
+ # Combine the weights of queries and statements
57
+ # @return [void]
58
+ def combine_query_weights(indexes)
59
+ indexes = indexes.map(&:to_id_graph).uniq if @by_id_graph
60
+ query_weights = Hash[@workload.support_queries(indexes).map do |query|
61
+ [query, @workload.statement_weights[query.statement]]
62
+ end]
63
+ query_weights.merge!(@workload.statement_weights.select do |stmt, _|
64
+ stmt.is_a? Query
65
+ end.to_h)
66
+
67
+ query_weights
68
+ end
69
+
70
+ # Produce a useful log message before starting the search
71
+ # @return [void]
72
+ def log_search_start(costs, query_weights)
73
+ @logger.debug do
74
+ "Costs: \n" + pp_s(costs) + "\n" \
75
+ "Search with queries:\n" + \
76
+ query_weights.each_key.map.with_index do |query, i|
77
+ "#{i} #{query.inspect}"
78
+ end.join("\n")
79
+ end
80
+ end
81
+
82
+ # Run the solver and get the results of search
83
+ # @return [Results]
84
+ def search_result(query_weights, indexes, solver_params, trees,
85
+ update_plans)
86
+ # Solve the LP using MIPPeR
87
+ result = solve_mipper query_weights.keys, indexes, **solver_params
88
+
89
+ result.workload = @workload
90
+ result.plans_from_trees trees
91
+ result.cost_model = @cost_model
92
+
93
+ # Select the relevant update plans
94
+ update_plans = update_plans.values.flatten(1).select do |plan|
95
+ result.indexes.include? plan.index
96
+ end
97
+ update_plans.each do |plan|
98
+ plan.select_query_plans(&result.method(:select_plan))
99
+ end
100
+ result.update_plans = update_plans
101
+
102
+ result.validate
103
+
104
+ result
105
+ end
106
+
107
+ # Select the plans to use for a given set of indexes
108
+ # @return [Array<Plans::QueryPlan>]
109
+ def select_plans(trees, indexes)
110
+ trees.map do |tree|
111
+ # Exclude support queries since they will be in update plans
112
+ query = tree.query
113
+ next if query.is_a?(SupportQuery)
114
+
115
+ # Select the exact plan to use for these indexes
116
+ tree.select_using_indexes(indexes).min_by(&:cost)
117
+ end.compact
118
+ end
119
+
120
+ # Solve the index selection problem using MIPPeR
121
+ # @return [Results]
122
+ def solve_mipper(queries, indexes, data)
123
+ # Construct and solve the ILP
124
+ problem = Problem.new queries, @workload.updates, data, @objective
125
+ problem.solve
126
+
127
+ # We won't get here if there's no valdi solution
128
+ @logger.debug 'Found solution with total cost ' \
129
+ "#{problem.objective_value}"
130
+
131
+ # Return the selected indices
132
+ selected_indexes = problem.selected_indexes
133
+
134
+ @logger.debug do
135
+ "Selected indexes:\n" + selected_indexes.map do |index|
136
+ "#{indexes.index index} #{index.inspect}"
137
+ end.join("\n")
138
+ end
139
+
140
+ problem.result
141
+ end
142
+
143
+ # Produce the cost of updates in the workload
144
+ def update_costs(trees, indexes)
145
+ planner = Plans::UpdatePlanner.new @workload.model, trees, @cost_model,
146
+ @by_id_graph
147
+ update_costs = Hash.new { |h, k| h[k] = {} }
148
+ update_plans = Hash.new { |h, k| h[k] = [] }
149
+ @workload.statements.each do |statement|
150
+ next if statement.is_a? Query
151
+
152
+ populate_update_costs planner, statement, indexes,
153
+ update_costs, update_plans
154
+ end
155
+
156
+ [update_costs, update_plans]
157
+ end
158
+
159
+ # Populate the cost of all necessary plans for the given satement
160
+ # @return [void]
161
+ def populate_update_costs(planner, statement, indexes,
162
+ update_costs, update_plans)
163
+ planner.find_plans_for_update(statement, indexes).each do |plan|
164
+ weight = @workload.statement_weights[statement]
165
+ update_costs[statement][plan.index] = plan.update_cost * weight
166
+ update_plans[statement] << plan
167
+ end
168
+ end
169
+
170
+ # Get the cost of using each index for each query in a workload
171
+ def query_costs(query_weights, indexes)
172
+ planner = Plans::QueryPlanner.new @workload, indexes, @cost_model
173
+
174
+ results = Parallel.map(query_weights) do |query, weight|
175
+ query_cost planner, query, weight
176
+ end
177
+ costs = Hash[query_weights.each_key.map.with_index do |query, q|
178
+ [query, results[q].first]
179
+ end]
180
+
181
+ [costs, results.map(&:last)]
182
+ end
183
+
184
+ # Get the cost for indices for an individual query
185
+ def query_cost(planner, query, weight)
186
+ query_costs = {}
187
+
188
+ tree = planner.find_plans_for_query(query)
189
+ tree.each do |plan|
190
+ steps_by_index = []
191
+ plan.each do |step|
192
+ if step.is_a? Plans::IndexLookupPlanStep
193
+ steps_by_index.push [step]
194
+ else
195
+ steps_by_index.last.push step
196
+ end
197
+ end
198
+
199
+ populate_query_costs query_costs, steps_by_index, weight, query, tree
200
+ end
201
+
202
+ [query_costs, tree]
203
+ end
204
+
205
+ # Store the costs and indexes for this plan in a nested hash
206
+ # @return [void]
207
+ def populate_query_costs(query_costs, steps_by_index, weight,
208
+ query, tree)
209
+ # The first key is the query and the second is the index
210
+ #
211
+ # The value is a two-element array with the indices which are
212
+ # jointly used to answer a step in the query plan along with
213
+ # the cost of all plan steps for the part of the query graph
214
+ steps_by_index.each do |steps|
215
+ # Get the indexes for these plan steps
216
+ index_step = steps.first
217
+
218
+ # Calculate the cost for just these steps in the plan
219
+ cost = steps.sum_by(&:cost) * weight
220
+
221
+ # Don't count the cost for sorting at the end
222
+ sort_step = steps.find { |s| s.is_a? Plans::SortPlanStep }
223
+ cost -= sort_step.cost * weight unless sort_step.nil?
224
+
225
+ if query_costs.key? index_step.index
226
+ current_cost = query_costs[index_step.index].last
227
+
228
+ # We must always have the same cost
229
+ if (current_cost - cost).abs >= 10E-6
230
+ index = index_step.index
231
+ p query
232
+ puts "Index #{index.key} does not have equivalent cost"
233
+ puts "Current cost: #{current_cost}, discovered cost: #{cost}"
234
+
235
+ puts "\nCurrent steps"
236
+ query_costs[index_step.index].first.each { |s| p s }
237
+
238
+ puts "\nDiscovered steps"
239
+ steps.each { |s| p s }
240
+ puts
241
+
242
+ puts '======================================='
243
+ tree.sort_by(&:cost).each do |plan|
244
+ next unless plan.indexes.include?(index_step.index)
245
+ plan.each do |step|
246
+ print(format('%.3f', step.cost).rjust(7) + ' ')
247
+ p step
248
+ end
249
+ puts "#{format('%.3f', plan.cost).rjust(7)} total"
250
+ puts '======================================='
251
+ end
252
+
253
+ puts
254
+ p tree
255
+
256
+ fail
257
+ end
258
+ else
259
+ # We either found a new plan or something cheaper
260
+ query_costs[index_step.index] = [steps, cost]
261
+ end
262
+ end
263
+ end
264
+ end
265
+ end
266
+ end