nose 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Plans
5
+ # Limit results from a previous lookup
6
+ # This should only ever occur at the end of a plan
7
+ class LimitPlanStep < PlanStep
8
+ attr_reader :limit
9
+
10
+ def initialize(limit, state = nil)
11
+ super()
12
+ @limit = limit
13
+
14
+ return if state.nil?
15
+ @state = state.dup
16
+ @state.cardinality = @limit
17
+ end
18
+
19
+ # Two limit steps are equal if they have the same value for the limit
20
+ def ==(other)
21
+ other.instance_of?(self.class) && @limit == other.limit
22
+ end
23
+ alias eql? ==
24
+
25
+ def hash
26
+ @limit
27
+ end
28
+
29
+ # Check if we can apply a limit
30
+ # @return [LimitPlanStep]
31
+ def self.apply(_parent, state)
32
+ # TODO: Apply if have IDs of the last entity set
33
+ # with no filter/sort needed
34
+
35
+ return nil if state.query.limit.nil?
36
+ return nil unless state.answered? check_limit: false
37
+
38
+ LimitPlanStep.new state.query.limit, state
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,361 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'forwardable'
4
+ require 'ostruct'
5
+
6
+ module NoSE
7
+ module Plans
8
+ # Ongoing state of a query throughout the execution plan
9
+ class QueryState
10
+ attr_accessor :fields, :eq, :range, :order_by, :graph,
11
+ :joins, :cardinality, :hash_cardinality, :given_fields
12
+ attr_reader :query, :model
13
+
14
+ def initialize(query, model)
15
+ @query = query
16
+ @model = model
17
+ @fields = query.select
18
+ @eq = query.eq_fields.dup
19
+ @range = query.range_field
20
+ @graph = query.graph
21
+ @joins = query.materialize_view.graph.join_order(@eq)
22
+ @order_by = query.order.dup
23
+
24
+ # We never need to order by fields listed in equality predicates
25
+ # since we'll only ever have rows with a single value
26
+ @order_by -= @eq.to_a
27
+
28
+ @cardinality = 1 # this will be updated on the first index lookup
29
+ @hash_cardinality = 1
30
+ @given_fields = @eq.dup
31
+ end
32
+
33
+ # All the fields referenced anywhere in the query
34
+ def all_fields
35
+ all_fields = @fields + @eq
36
+ all_fields << @range unless @range.nil?
37
+ all_fields
38
+ end
39
+
40
+ # :nocov:
41
+ def to_color
42
+ @query.text +
43
+ "\n fields: " + @fields.map(&:to_color).to_a.to_color +
44
+ "\n eq: " + @eq.map(&:to_color).to_a.to_color +
45
+ "\n range: " + (@range.nil? ? '(nil)' : @range.name) +
46
+ "\n order: " + @order_by.map(&:to_color).to_a.to_color +
47
+ "\n graph: " + @graph.inspect
48
+ end
49
+ # :nocov:
50
+
51
+ # Check if the query has been fully answered
52
+ # @return [Boolean]
53
+ def answered?(check_limit: true)
54
+ done = @fields.empty? && @eq.empty? && @range.nil? &&
55
+ @order_by.empty? && @joins.empty? && @graph.empty?
56
+
57
+ # Check if the limit has been applied
58
+ done &&= @cardinality <= @query.limit unless @query.limit.nil? ||
59
+ !check_limit
60
+
61
+ done
62
+ end
63
+
64
+ # Get all fields relevant for filtering in the given
65
+ # graph, optionally including selected fields
66
+ # @return [Array<Field>]
67
+ def fields_for_graph(graph, include_entity, select: false)
68
+ graph_fields = @eq + @order_by
69
+ graph_fields << @range unless @range.nil?
70
+
71
+ # If necessary, include ALL the fields which should be selected,
72
+ # otherwise we can exclude fields from leaf entity sets since
73
+ # we may end up selecting these with a separate index lookup
74
+ entities = graph.entities
75
+ graph_fields += @fields.select do |field|
76
+ entities.include?(field.parent) &&
77
+ (select || !graph.leaf_entity?(field.parent) ||
78
+ (field.parent == include_entity && graph.size > 1))
79
+ end
80
+
81
+ graph_fields.select { |field| entities.include? field.parent }
82
+ end
83
+ end
84
+
85
+ # A tree of possible query plans
86
+ class QueryPlanTree
87
+ include Enumerable
88
+
89
+ attr_reader :root
90
+ attr_accessor :cost_model
91
+
92
+ def initialize(state, cost_model)
93
+ @root = RootPlanStep.new(state)
94
+ @cost_model = cost_model
95
+ end
96
+
97
+ # Select all plans which use only a given set of indexes
98
+ def select_using_indexes(indexes)
99
+ select do |plan|
100
+ plan.all? do |step|
101
+ !step.is_a?(Plans::IndexLookupPlanStep) ||
102
+ indexes.include?(step.index)
103
+ end
104
+ end
105
+ end
106
+
107
+ # The query this tree of plans is generated for
108
+ # @return [Query]
109
+ def query
110
+ @root.state.query
111
+ end
112
+
113
+ # Enumerate all plans in the tree
114
+ def each
115
+ nodes = [@root]
116
+
117
+ until nodes.empty?
118
+ node = nodes.pop
119
+ if node.children.empty?
120
+ # This is just an extra check to make absolutely
121
+ # sure we never consider invalid statement plans
122
+ fail unless node.state.answered?
123
+
124
+ yield node.parent_steps @cost_model
125
+ else
126
+ nodes.concat node.children.to_a
127
+ end
128
+ end
129
+ end
130
+
131
+ # Return the total number of plans for this statement
132
+ # @return [Integer]
133
+ def size
134
+ to_a.size
135
+ end
136
+
137
+ # :nocov:
138
+ def to_color(step = nil, indent = 0)
139
+ step = @root if step.nil?
140
+ this_step = ' ' * indent + step.to_color
141
+ this_step << " [yellow]$#{step.cost.round 5}[/]" \
142
+ unless step.is_a?(RootPlanStep) || step.cost.nil?
143
+ this_step + "\n" + step.children.map do |child_step|
144
+ to_color child_step, indent + 1
145
+ end.reduce('', &:+)
146
+ end
147
+ # :nocov:
148
+ end
149
+
150
+ # Thrown when it is not possible to construct a plan for a statement
151
+ class NoPlanException < StandardError
152
+ end
153
+
154
+ # A single plan for a query
155
+ class QueryPlan < AbstractPlan
156
+ attr_reader :steps
157
+ attr_accessor :query, :cost_model
158
+
159
+ include Comparable
160
+ include Enumerable
161
+
162
+ # Most of the work is delegated to the array
163
+ extend Forwardable
164
+ def_delegators :@steps, :each, :<<, :[], :==, :===, :eql?,
165
+ :inspect, :to_s, :to_a, :to_ary, :last, :length, :count
166
+
167
+ def initialize(query, cost_model)
168
+ @steps = []
169
+ @query = query
170
+ @cost_model = cost_model
171
+ end
172
+
173
+ # The weight of this query for a given workload
174
+ # @return [Fixnum]
175
+ def weight
176
+ return 1 if @workload.nil?
177
+
178
+ @workload.statement_weights[@query]
179
+ end
180
+
181
+ # Groups for plans are stored in the query
182
+ # @return [String]
183
+ def group
184
+ @query.group
185
+ end
186
+
187
+ # Name plans after the associated query
188
+ # @return [String]
189
+ def name
190
+ @query.text
191
+ end
192
+
193
+ # Fields selected by this plan
194
+ # @return [Array<Fields::Field>]
195
+ def select_fields
196
+ @query.select
197
+ end
198
+
199
+ # Parameters to this execution plan
200
+ def params
201
+ @query.conditions
202
+ end
203
+
204
+ # Two plans are compared by their execution cost
205
+ # @return [Boolean]
206
+ def <=>(other)
207
+ cost <=> other.cost
208
+ end
209
+
210
+ # The estimated cost of executing the query using this plan
211
+ # @return [Numeric]
212
+ def cost
213
+ costs = @steps.map(&:cost)
214
+ costs.inject(0, &:+) unless costs.any?(&:nil?)
215
+ end
216
+
217
+ # Get the indexes used by this query plan
218
+ # @return [Array<Index>]
219
+ def indexes
220
+ @steps.select { |step| step.is_a? IndexLookupPlanStep }.map(&:index)
221
+ end
222
+ end
223
+
224
+ # A query planner which can construct a tree of query plans
225
+ class QueryPlanner
226
+ def initialize(model, indexes, cost_model)
227
+ @logger = Logging.logger['nose::query_planner']
228
+
229
+ @model = model
230
+ @indexes = indexes
231
+ @cost_model = cost_model
232
+ end
233
+
234
+ # Find a tree of plans for the given query
235
+ # @return [QueryPlanTree]
236
+ # @raise [NoPlanException]
237
+ def find_plans_for_query(query)
238
+ state = QueryState.new query, @model
239
+ state.freeze
240
+ tree = QueryPlanTree.new state, @cost_model
241
+
242
+ indexes_by_joins = indexes_for_query(query, state.joins)
243
+ find_plans_for_step tree.root, indexes_by_joins
244
+
245
+ if tree.root.children.empty?
246
+ tree = QueryPlanTree.new state, @cost_model
247
+ find_plans_for_step tree.root, indexes_by_joins, prune: false
248
+ fail NoPlanException, "#{query.inspect} #{tree.inspect}"
249
+ end
250
+
251
+ @logger.debug { "Plans for #{query.inspect}: #{tree.inspect}" }
252
+
253
+ tree
254
+ end
255
+
256
+ # Get the minimum cost plan for executing this query
257
+ # @return [QueryPlan]
258
+ def min_plan(query)
259
+ find_plans_for_query(query).min
260
+ end
261
+
262
+ private
263
+
264
+ # Produce indexes possibly useful for this query
265
+ # grouped by the first entity they join on
266
+ # @return [Hash]
267
+ def indexes_for_query(query, joins)
268
+ indexes_by_joins = Hash.new { |h, k| h[k] = Set.new }
269
+ @indexes.each do |index|
270
+ # Limit indices to those which cross the query path
271
+ next unless index.graph.entities.to_set.subset? \
272
+ query.graph.entities.to_set
273
+
274
+ first_entity = joins.find do |entity|
275
+ index.graph.entities.include?(entity)
276
+ end
277
+ indexes_by_joins[first_entity].add index
278
+ end
279
+
280
+ indexes_by_joins
281
+ end
282
+
283
+ # Remove plans ending with this step in the tree
284
+ # @return[Boolean] true if pruning resulted in an empty tree
285
+ def prune_plan(prune_step)
286
+ # Walk up the tree and remove the branch for the failed plan
287
+ while prune_step.children.length <= 1 &&
288
+ !prune_step.is_a?(RootPlanStep)
289
+ prune_step = prune_step.parent
290
+ prev_step = prune_step
291
+ end
292
+
293
+ # If we reached the root, we have no plan
294
+ return true if prune_step.is_a? RootPlanStep
295
+
296
+ prune_step.children.delete prev_step
297
+
298
+ false
299
+ end
300
+
301
+ # Find possible query plans for a query starting at the given step
302
+ # @return [void]
303
+ def find_plans_for_step(step, indexes_by_joins, prune: true)
304
+ return if step.state.answered?
305
+
306
+ steps = find_steps_for_state step, step.state, indexes_by_joins
307
+
308
+ if !steps.empty?
309
+ step.children = steps
310
+ steps.each { |new_step| new_step.calculate_cost @cost_model }
311
+ steps.each do |child_step|
312
+ find_plans_for_step child_step, indexes_by_joins
313
+
314
+ # Remove this step if finding a plan from here failed
315
+ if child_step.children.empty? && !child_step.state.answered?
316
+ step.children.delete child_step
317
+ end
318
+ end
319
+ elsif prune
320
+ return if step.is_a?(RootPlanStep) || prune_plan(step.parent)
321
+ else
322
+ step.children = [PrunedPlanStep.new]
323
+ end
324
+ end
325
+
326
+ # Find all possible plan steps not using indexes
327
+ # @return [Array<PlanStep>]
328
+ def find_nonindexed_steps(parent, state)
329
+ steps = []
330
+ return steps if parent.is_a? RootPlanStep
331
+
332
+ [SortPlanStep, FilterPlanStep, LimitPlanStep].each \
333
+ { |step| steps.push step.apply(parent, state) }
334
+ steps.flatten!
335
+ steps.compact!
336
+
337
+ steps
338
+ end
339
+
340
+ # Get a list of possible next steps for a query in the given state
341
+ # @return [Array<PlanStep>]
342
+ def find_steps_for_state(parent, state, indexes_by_joins)
343
+ steps = find_nonindexed_steps parent, state
344
+ return steps unless steps.empty?
345
+
346
+ # Don't allow indices to be used multiple times
347
+ indexes = (indexes_by_joins[state.joins.first] || Set.new).to_set
348
+ used_indexes = parent.parent_steps.indexes.to_set
349
+ (indexes - used_indexes).each do |index|
350
+ new_step = IndexLookupPlanStep.apply parent, index, state
351
+ next if new_step.nil?
352
+
353
+ new_step.add_fields_from_index index
354
+ steps.push new_step
355
+ end
356
+
357
+ steps
358
+ end
359
+ end
360
+ end
361
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Plans
5
+ # A query plan step performing external sort
6
+ class SortPlanStep < PlanStep
7
+ attr_reader :sort_fields
8
+
9
+ def initialize(sort_fields, state = nil)
10
+ super()
11
+
12
+ @sort_fields = sort_fields
13
+ @state = state
14
+ end
15
+
16
+ # :nocov:
17
+ def to_color
18
+ super + ' [' + @sort_fields.map(&:to_color).join(', ') + ']'
19
+ end
20
+ # :nocov:
21
+
22
+ # Two sorting steps are equal if they sort on the same fields
23
+ def ==(other)
24
+ other.instance_of?(self.class) && @sort_fields == other.sort_fields
25
+ end
26
+ alias eql? ==
27
+
28
+ def hash
29
+ @sort_fields.map(&:id).hash
30
+ end
31
+
32
+ # Check if an external sort can used (if a sort is the last step)
33
+ # @return [SortPlanStep]
34
+ def self.apply(parent, state)
35
+ fetched_all_ids = state.fields.none? { |f| f.is_a? Fields::IDField }
36
+ resolved_predicates = state.eq.empty? && state.range.nil?
37
+ can_order = !(state.order_by.to_set & parent.fields).empty?
38
+ return nil unless fetched_all_ids && resolved_predicates && can_order
39
+
40
+ new_state = state.dup
41
+ new_state.order_by = []
42
+ new_step = SortPlanStep.new(state.order_by, new_state)
43
+ new_step.state.freeze
44
+
45
+ new_step
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Plans
5
+ # A superclass for steps which modify indexes
6
+ class UpdatePlanStep < PlanStep
7
+ attr_reader :index
8
+ attr_accessor :state
9
+
10
+ def initialize(index, type, state = nil)
11
+ super()
12
+ @index = index
13
+ @type = type
14
+
15
+ return if state.nil?
16
+ @state = state.dup
17
+ @state.freeze
18
+ end
19
+
20
+ # :nocov:
21
+ def to_color
22
+ "#{super} #{@index.to_color} * #{@state.cardinality}"
23
+ end
24
+ # :nocov:
25
+
26
+ # Two insert steps are equal if they use the same index
27
+ def ==(other)
28
+ other.instance_of?(self.class) && @index == other.index && \
29
+ @type == other.instance_variable_get(:@type)
30
+ end
31
+ alias eql? ==
32
+
33
+ def hash
34
+ [@index, @type].hash
35
+ end
36
+ end
37
+
38
+ # A step which inserts data into a given index
39
+ class InsertPlanStep < UpdatePlanStep
40
+ attr_reader :fields
41
+
42
+ def initialize(index, state = nil, fields = Set.new)
43
+ super index, :insert, state
44
+ @fields = if fields.empty?
45
+ index.all_fields
46
+ else
47
+ fields.to_set & index.all_fields
48
+ end
49
+ @fields += index.hash_fields + index.order_fields.to_set
50
+ end
51
+ end
52
+
53
+ # A step which deletes data into a given index
54
+ class DeletePlanStep < UpdatePlanStep
55
+ def initialize(index, state = nil)
56
+ super index, :delete, state
57
+ end
58
+ end
59
+ end
60
+ end