nose 0.1.0pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Plans
5
+ # Limit results from a previous lookup
6
+ # This should only ever occur at the end of a plan
7
+ class LimitPlanStep < PlanStep
8
+ attr_reader :limit
9
+
10
+ def initialize(limit, state = nil)
11
+ super()
12
+ @limit = limit
13
+
14
+ return if state.nil?
15
+ @state = state.dup
16
+ @state.cardinality = @limit
17
+ end
18
+
19
+ # Two limit steps are equal if they have the same value for the limit
20
+ def ==(other)
21
+ other.instance_of?(self.class) && @limit == other.limit
22
+ end
23
+ alias eql? ==
24
+
25
+ def hash
26
+ @limit
27
+ end
28
+
29
+ # Check if we can apply a limit
30
+ # @return [LimitPlanStep]
31
+ def self.apply(_parent, state)
32
+ # TODO: Apply if have IDs of the last entity set
33
+ # with no filter/sort needed
34
+
35
+ return nil if state.query.limit.nil?
36
+ return nil unless state.answered? check_limit: false
37
+
38
+ LimitPlanStep.new state.query.limit, state
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,361 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'forwardable'
4
+ require 'ostruct'
5
+
6
+ module NoSE
7
+ module Plans
8
+ # Ongoing state of a query throughout the execution plan
9
+ class QueryState
10
+ attr_accessor :fields, :eq, :range, :order_by, :graph,
11
+ :joins, :cardinality, :hash_cardinality, :given_fields
12
+ attr_reader :query, :model
13
+
14
+ def initialize(query, model)
15
+ @query = query
16
+ @model = model
17
+ @fields = query.select
18
+ @eq = query.eq_fields.dup
19
+ @range = query.range_field
20
+ @graph = query.graph
21
+ @joins = query.materialize_view.graph.join_order(@eq)
22
+ @order_by = query.order.dup
23
+
24
+ # We never need to order by fields listed in equality predicates
25
+ # since we'll only ever have rows with a single value
26
+ @order_by -= @eq.to_a
27
+
28
+ @cardinality = 1 # this will be updated on the first index lookup
29
+ @hash_cardinality = 1
30
+ @given_fields = @eq.dup
31
+ end
32
+
33
+ # All the fields referenced anywhere in the query
34
+ def all_fields
35
+ all_fields = @fields + @eq
36
+ all_fields << @range unless @range.nil?
37
+ all_fields
38
+ end
39
+
40
+ # :nocov:
41
+ def to_color
42
+ @query.text +
43
+ "\n fields: " + @fields.map(&:to_color).to_a.to_color +
44
+ "\n eq: " + @eq.map(&:to_color).to_a.to_color +
45
+ "\n range: " + (@range.nil? ? '(nil)' : @range.name) +
46
+ "\n order: " + @order_by.map(&:to_color).to_a.to_color +
47
+ "\n graph: " + @graph.inspect
48
+ end
49
+ # :nocov:
50
+
51
+ # Check if the query has been fully answered
52
+ # @return [Boolean]
53
+ def answered?(check_limit: true)
54
+ done = @fields.empty? && @eq.empty? && @range.nil? &&
55
+ @order_by.empty? && @joins.empty? && @graph.empty?
56
+
57
+ # Check if the limit has been applied
58
+ done &&= @cardinality <= @query.limit unless @query.limit.nil? ||
59
+ !check_limit
60
+
61
+ done
62
+ end
63
+
64
+ # Get all fields relevant for filtering in the given
65
+ # graph, optionally including selected fields
66
+ # @return [Array<Field>]
67
+ def fields_for_graph(graph, include_entity, select: false)
68
+ graph_fields = @eq + @order_by
69
+ graph_fields << @range unless @range.nil?
70
+
71
+ # If necessary, include ALL the fields which should be selected,
72
+ # otherwise we can exclude fields from leaf entity sets since
73
+ # we may end up selecting these with a separate index lookup
74
+ entities = graph.entities
75
+ graph_fields += @fields.select do |field|
76
+ entities.include?(field.parent) &&
77
+ (select || !graph.leaf_entity?(field.parent) ||
78
+ (field.parent == include_entity && graph.size > 1))
79
+ end
80
+
81
+ graph_fields.select { |field| entities.include? field.parent }
82
+ end
83
+ end
84
+
85
+ # A tree of possible query plans
86
+ class QueryPlanTree
87
+ include Enumerable
88
+
89
+ attr_reader :root
90
+ attr_accessor :cost_model
91
+
92
+ def initialize(state, cost_model)
93
+ @root = RootPlanStep.new(state)
94
+ @cost_model = cost_model
95
+ end
96
+
97
+ # Select all plans which use only a given set of indexes
98
+ def select_using_indexes(indexes)
99
+ select do |plan|
100
+ plan.all? do |step|
101
+ !step.is_a?(Plans::IndexLookupPlanStep) ||
102
+ indexes.include?(step.index)
103
+ end
104
+ end
105
+ end
106
+
107
+ # The query this tree of plans is generated for
108
+ # @return [Query]
109
+ def query
110
+ @root.state.query
111
+ end
112
+
113
+ # Enumerate all plans in the tree
114
+ def each
115
+ nodes = [@root]
116
+
117
+ until nodes.empty?
118
+ node = nodes.pop
119
+ if node.children.empty?
120
+ # This is just an extra check to make absolutely
121
+ # sure we never consider invalid statement plans
122
+ fail unless node.state.answered?
123
+
124
+ yield node.parent_steps @cost_model
125
+ else
126
+ nodes.concat node.children.to_a
127
+ end
128
+ end
129
+ end
130
+
131
+ # Return the total number of plans for this statement
132
+ # @return [Integer]
133
+ def size
134
+ to_a.size
135
+ end
136
+
137
+ # :nocov:
138
+ def to_color(step = nil, indent = 0)
139
+ step = @root if step.nil?
140
+ this_step = ' ' * indent + step.to_color
141
+ this_step << " [yellow]$#{step.cost.round 5}[/]" \
142
+ unless step.is_a?(RootPlanStep) || step.cost.nil?
143
+ this_step + "\n" + step.children.map do |child_step|
144
+ to_color child_step, indent + 1
145
+ end.reduce('', &:+)
146
+ end
147
+ # :nocov:
148
+ end
149
+
150
+ # Thrown when it is not possible to construct a plan for a statement
151
+ class NoPlanException < StandardError
152
+ end
153
+
154
+ # A single plan for a query
155
+ class QueryPlan < AbstractPlan
156
+ attr_reader :steps
157
+ attr_accessor :query, :cost_model
158
+
159
+ include Comparable
160
+ include Enumerable
161
+
162
+ # Most of the work is delegated to the array
163
+ extend Forwardable
164
+ def_delegators :@steps, :each, :<<, :[], :==, :===, :eql?,
165
+ :inspect, :to_s, :to_a, :to_ary, :last, :length, :count
166
+
167
+ def initialize(query, cost_model)
168
+ @steps = []
169
+ @query = query
170
+ @cost_model = cost_model
171
+ end
172
+
173
+ # The weight of this query for a given workload
174
+ # @return [Fixnum]
175
+ def weight
176
+ return 1 if @workload.nil?
177
+
178
+ @workload.statement_weights[@query]
179
+ end
180
+
181
+ # Groups for plans are stored in the query
182
+ # @return [String]
183
+ def group
184
+ @query.group
185
+ end
186
+
187
+ # Name plans after the associated query
188
+ # @return [String]
189
+ def name
190
+ @query.text
191
+ end
192
+
193
+ # Fields selected by this plan
194
+ # @return [Array<Fields::Field>]
195
+ def select_fields
196
+ @query.select
197
+ end
198
+
199
+ # Parameters to this execution plan
200
+ def params
201
+ @query.conditions
202
+ end
203
+
204
+ # Two plans are compared by their execution cost
205
+ # @return [Boolean]
206
+ def <=>(other)
207
+ cost <=> other.cost
208
+ end
209
+
210
+ # The estimated cost of executing the query using this plan
211
+ # @return [Numeric]
212
+ def cost
213
+ costs = @steps.map(&:cost)
214
+ costs.inject(0, &:+) unless costs.any?(&:nil?)
215
+ end
216
+
217
+ # Get the indexes used by this query plan
218
+ # @return [Array<Index>]
219
+ def indexes
220
+ @steps.select { |step| step.is_a? IndexLookupPlanStep }.map(&:index)
221
+ end
222
+ end
223
+
224
+ # A query planner which can construct a tree of query plans
225
+ class QueryPlanner
226
+ def initialize(model, indexes, cost_model)
227
+ @logger = Logging.logger['nose::query_planner']
228
+
229
+ @model = model
230
+ @indexes = indexes
231
+ @cost_model = cost_model
232
+ end
233
+
234
+ # Find a tree of plans for the given query
235
+ # @return [QueryPlanTree]
236
+ # @raise [NoPlanException]
237
+ def find_plans_for_query(query)
238
+ state = QueryState.new query, @model
239
+ state.freeze
240
+ tree = QueryPlanTree.new state, @cost_model
241
+
242
+ indexes_by_joins = indexes_for_query(query, state.joins)
243
+ find_plans_for_step tree.root, indexes_by_joins
244
+
245
+ if tree.root.children.empty?
246
+ tree = QueryPlanTree.new state, @cost_model
247
+ find_plans_for_step tree.root, indexes_by_joins, prune: false
248
+ fail NoPlanException, "#{query.inspect} #{tree.inspect}"
249
+ end
250
+
251
+ @logger.debug { "Plans for #{query.inspect}: #{tree.inspect}" }
252
+
253
+ tree
254
+ end
255
+
256
+ # Get the minimum cost plan for executing this query
257
+ # @return [QueryPlan]
258
+ def min_plan(query)
259
+ find_plans_for_query(query).min
260
+ end
261
+
262
+ private
263
+
264
+ # Produce indexes possibly useful for this query
265
+ # grouped by the first entity they join on
266
+ # @return [Hash]
267
+ def indexes_for_query(query, joins)
268
+ indexes_by_joins = Hash.new { |h, k| h[k] = Set.new }
269
+ @indexes.each do |index|
270
+ # Limit indices to those which cross the query path
271
+ next unless index.graph.entities.to_set.subset? \
272
+ query.graph.entities.to_set
273
+
274
+ first_entity = joins.find do |entity|
275
+ index.graph.entities.include?(entity)
276
+ end
277
+ indexes_by_joins[first_entity].add index
278
+ end
279
+
280
+ indexes_by_joins
281
+ end
282
+
283
+ # Remove plans ending with this step in the tree
284
+ # @return[Boolean] true if pruning resulted in an empty tree
285
+ def prune_plan(prune_step)
286
+ # Walk up the tree and remove the branch for the failed plan
287
+ while prune_step.children.length <= 1 &&
288
+ !prune_step.is_a?(RootPlanStep)
289
+ prune_step = prune_step.parent
290
+ prev_step = prune_step
291
+ end
292
+
293
+ # If we reached the root, we have no plan
294
+ return true if prune_step.is_a? RootPlanStep
295
+
296
+ prune_step.children.delete prev_step
297
+
298
+ false
299
+ end
300
+
301
+ # Find possible query plans for a query starting at the given step
302
+ # @return [void]
303
+ def find_plans_for_step(step, indexes_by_joins, prune: true)
304
+ return if step.state.answered?
305
+
306
+ steps = find_steps_for_state step, step.state, indexes_by_joins
307
+
308
+ if !steps.empty?
309
+ step.children = steps
310
+ steps.each { |new_step| new_step.calculate_cost @cost_model }
311
+ steps.each do |child_step|
312
+ find_plans_for_step child_step, indexes_by_joins
313
+
314
+ # Remove this step if finding a plan from here failed
315
+ if child_step.children.empty? && !child_step.state.answered?
316
+ step.children.delete child_step
317
+ end
318
+ end
319
+ elsif prune
320
+ return if step.is_a?(RootPlanStep) || prune_plan(step.parent)
321
+ else
322
+ step.children = [PrunedPlanStep.new]
323
+ end
324
+ end
325
+
326
+ # Find all possible plan steps not using indexes
327
+ # @return [Array<PlanStep>]
328
+ def find_nonindexed_steps(parent, state)
329
+ steps = []
330
+ return steps if parent.is_a? RootPlanStep
331
+
332
+ [SortPlanStep, FilterPlanStep, LimitPlanStep].each \
333
+ { |step| steps.push step.apply(parent, state) }
334
+ steps.flatten!
335
+ steps.compact!
336
+
337
+ steps
338
+ end
339
+
340
+ # Get a list of possible next steps for a query in the given state
341
+ # @return [Array<PlanStep>]
342
+ def find_steps_for_state(parent, state, indexes_by_joins)
343
+ steps = find_nonindexed_steps parent, state
344
+ return steps unless steps.empty?
345
+
346
+ # Don't allow indices to be used multiple times
347
+ indexes = (indexes_by_joins[state.joins.first] || Set.new).to_set
348
+ used_indexes = parent.parent_steps.indexes.to_set
349
+ (indexes - used_indexes).each do |index|
350
+ new_step = IndexLookupPlanStep.apply parent, index, state
351
+ next if new_step.nil?
352
+
353
+ new_step.add_fields_from_index index
354
+ steps.push new_step
355
+ end
356
+
357
+ steps
358
+ end
359
+ end
360
+ end
361
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Plans
5
+ # A query plan step performing external sort
6
+ class SortPlanStep < PlanStep
7
+ attr_reader :sort_fields
8
+
9
+ def initialize(sort_fields, state = nil)
10
+ super()
11
+
12
+ @sort_fields = sort_fields
13
+ @state = state
14
+ end
15
+
16
+ # :nocov:
17
+ def to_color
18
+ super + ' [' + @sort_fields.map(&:to_color).join(', ') + ']'
19
+ end
20
+ # :nocov:
21
+
22
+ # Two sorting steps are equal if they sort on the same fields
23
+ def ==(other)
24
+ other.instance_of?(self.class) && @sort_fields == other.sort_fields
25
+ end
26
+ alias eql? ==
27
+
28
+ def hash
29
+ @sort_fields.map(&:id).hash
30
+ end
31
+
32
+ # Check if an external sort can used (if a sort is the last step)
33
+ # @return [SortPlanStep]
34
+ def self.apply(parent, state)
35
+ fetched_all_ids = state.fields.none? { |f| f.is_a? Fields::IDField }
36
+ resolved_predicates = state.eq.empty? && state.range.nil?
37
+ can_order = !(state.order_by.to_set & parent.fields).empty?
38
+ return nil unless fetched_all_ids && resolved_predicates && can_order
39
+
40
+ new_state = state.dup
41
+ new_state.order_by = []
42
+ new_step = SortPlanStep.new(state.order_by, new_state)
43
+ new_step.state.freeze
44
+
45
+ new_step
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Plans
5
+ # A superclass for steps which modify indexes
6
+ class UpdatePlanStep < PlanStep
7
+ attr_reader :index
8
+ attr_accessor :state
9
+
10
+ def initialize(index, type, state = nil)
11
+ super()
12
+ @index = index
13
+ @type = type
14
+
15
+ return if state.nil?
16
+ @state = state.dup
17
+ @state.freeze
18
+ end
19
+
20
+ # :nocov:
21
+ def to_color
22
+ "#{super} #{@index.to_color} * #{@state.cardinality}"
23
+ end
24
+ # :nocov:
25
+
26
+ # Two insert steps are equal if they use the same index
27
+ def ==(other)
28
+ other.instance_of?(self.class) && @index == other.index && \
29
+ @type == other.instance_variable_get(:@type)
30
+ end
31
+ alias eql? ==
32
+
33
+ def hash
34
+ [@index, @type].hash
35
+ end
36
+ end
37
+
38
+ # A step which inserts data into a given index
39
+ class InsertPlanStep < UpdatePlanStep
40
+ attr_reader :fields
41
+
42
+ def initialize(index, state = nil, fields = Set.new)
43
+ super index, :insert, state
44
+ @fields = if fields.empty?
45
+ index.all_fields
46
+ else
47
+ fields.to_set & index.all_fields
48
+ end
49
+ @fields += index.hash_fields + index.order_fields.to_set
50
+ end
51
+ end
52
+
53
+ # A step which deletes data into a given index
54
+ class DeletePlanStep < UpdatePlanStep
55
+ def initialize(index, state = nil)
56
+ super index, :delete, state
57
+ end
58
+ end
59
+ end
60
+ end