nose 0.1.0pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/nose/backend/cassandra.rb +390 -0
- data/lib/nose/backend/file.rb +185 -0
- data/lib/nose/backend/mongo.rb +242 -0
- data/lib/nose/backend.rb +557 -0
- data/lib/nose/cost/cassandra.rb +33 -0
- data/lib/nose/cost/entity_count.rb +27 -0
- data/lib/nose/cost/field_size.rb +31 -0
- data/lib/nose/cost/request_count.rb +32 -0
- data/lib/nose/cost.rb +68 -0
- data/lib/nose/debug.rb +45 -0
- data/lib/nose/enumerator.rb +199 -0
- data/lib/nose/indexes.rb +239 -0
- data/lib/nose/loader/csv.rb +99 -0
- data/lib/nose/loader/mysql.rb +199 -0
- data/lib/nose/loader/random.rb +48 -0
- data/lib/nose/loader/sql.rb +105 -0
- data/lib/nose/loader.rb +38 -0
- data/lib/nose/model/entity.rb +136 -0
- data/lib/nose/model/fields.rb +293 -0
- data/lib/nose/model.rb +113 -0
- data/lib/nose/parser.rb +202 -0
- data/lib/nose/plans/execution_plan.rb +282 -0
- data/lib/nose/plans/filter.rb +99 -0
- data/lib/nose/plans/index_lookup.rb +302 -0
- data/lib/nose/plans/limit.rb +42 -0
- data/lib/nose/plans/query_planner.rb +361 -0
- data/lib/nose/plans/sort.rb +49 -0
- data/lib/nose/plans/update.rb +60 -0
- data/lib/nose/plans/update_planner.rb +270 -0
- data/lib/nose/plans.rb +135 -0
- data/lib/nose/proxy/mysql.rb +275 -0
- data/lib/nose/proxy.rb +102 -0
- data/lib/nose/query_graph.rb +481 -0
- data/lib/nose/random/barbasi_albert.rb +48 -0
- data/lib/nose/random/watts_strogatz.rb +50 -0
- data/lib/nose/random.rb +391 -0
- data/lib/nose/schema.rb +89 -0
- data/lib/nose/search/constraints.rb +143 -0
- data/lib/nose/search/problem.rb +328 -0
- data/lib/nose/search/results.rb +200 -0
- data/lib/nose/search.rb +266 -0
- data/lib/nose/serialize.rb +747 -0
- data/lib/nose/statements/connection.rb +160 -0
- data/lib/nose/statements/delete.rb +83 -0
- data/lib/nose/statements/insert.rb +146 -0
- data/lib/nose/statements/query.rb +161 -0
- data/lib/nose/statements/update.rb +101 -0
- data/lib/nose/statements.rb +645 -0
- data/lib/nose/timing.rb +79 -0
- data/lib/nose/util.rb +305 -0
- data/lib/nose/workload.rb +244 -0
- data/lib/nose.rb +37 -0
- data/templates/workload.erb +42 -0
- metadata +700 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NoSE
|
4
|
+
module Plans
|
5
|
+
# Limit results from a previous lookup
|
6
|
+
# This should only ever occur at the end of a plan
|
7
|
+
class LimitPlanStep < PlanStep
|
8
|
+
attr_reader :limit
|
9
|
+
|
10
|
+
def initialize(limit, state = nil)
|
11
|
+
super()
|
12
|
+
@limit = limit
|
13
|
+
|
14
|
+
return if state.nil?
|
15
|
+
@state = state.dup
|
16
|
+
@state.cardinality = @limit
|
17
|
+
end
|
18
|
+
|
19
|
+
# Two limit steps are equal if they have the same value for the limit
|
20
|
+
def ==(other)
|
21
|
+
other.instance_of?(self.class) && @limit == other.limit
|
22
|
+
end
|
23
|
+
alias eql? ==
|
24
|
+
|
25
|
+
def hash
|
26
|
+
@limit
|
27
|
+
end
|
28
|
+
|
29
|
+
# Check if we can apply a limit
|
30
|
+
# @return [LimitPlanStep]
|
31
|
+
def self.apply(_parent, state)
|
32
|
+
# TODO: Apply if have IDs of the last entity set
|
33
|
+
# with no filter/sort needed
|
34
|
+
|
35
|
+
return nil if state.query.limit.nil?
|
36
|
+
return nil unless state.answered? check_limit: false
|
37
|
+
|
38
|
+
LimitPlanStep.new state.query.limit, state
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,361 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'forwardable'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
module NoSE
|
7
|
+
module Plans
|
8
|
+
# Ongoing state of a query throughout the execution plan
|
9
|
+
class QueryState
|
10
|
+
attr_accessor :fields, :eq, :range, :order_by, :graph,
|
11
|
+
:joins, :cardinality, :hash_cardinality, :given_fields
|
12
|
+
attr_reader :query, :model
|
13
|
+
|
14
|
+
def initialize(query, model)
|
15
|
+
@query = query
|
16
|
+
@model = model
|
17
|
+
@fields = query.select
|
18
|
+
@eq = query.eq_fields.dup
|
19
|
+
@range = query.range_field
|
20
|
+
@graph = query.graph
|
21
|
+
@joins = query.materialize_view.graph.join_order(@eq)
|
22
|
+
@order_by = query.order.dup
|
23
|
+
|
24
|
+
# We never need to order by fields listed in equality predicates
|
25
|
+
# since we'll only ever have rows with a single value
|
26
|
+
@order_by -= @eq.to_a
|
27
|
+
|
28
|
+
@cardinality = 1 # this will be updated on the first index lookup
|
29
|
+
@hash_cardinality = 1
|
30
|
+
@given_fields = @eq.dup
|
31
|
+
end
|
32
|
+
|
33
|
+
# All the fields referenced anywhere in the query
|
34
|
+
def all_fields
|
35
|
+
all_fields = @fields + @eq
|
36
|
+
all_fields << @range unless @range.nil?
|
37
|
+
all_fields
|
38
|
+
end
|
39
|
+
|
40
|
+
# :nocov:
|
41
|
+
def to_color
|
42
|
+
@query.text +
|
43
|
+
"\n fields: " + @fields.map(&:to_color).to_a.to_color +
|
44
|
+
"\n eq: " + @eq.map(&:to_color).to_a.to_color +
|
45
|
+
"\n range: " + (@range.nil? ? '(nil)' : @range.name) +
|
46
|
+
"\n order: " + @order_by.map(&:to_color).to_a.to_color +
|
47
|
+
"\n graph: " + @graph.inspect
|
48
|
+
end
|
49
|
+
# :nocov:
|
50
|
+
|
51
|
+
# Check if the query has been fully answered
|
52
|
+
# @return [Boolean]
|
53
|
+
def answered?(check_limit: true)
|
54
|
+
done = @fields.empty? && @eq.empty? && @range.nil? &&
|
55
|
+
@order_by.empty? && @joins.empty? && @graph.empty?
|
56
|
+
|
57
|
+
# Check if the limit has been applied
|
58
|
+
done &&= @cardinality <= @query.limit unless @query.limit.nil? ||
|
59
|
+
!check_limit
|
60
|
+
|
61
|
+
done
|
62
|
+
end
|
63
|
+
|
64
|
+
# Get all fields relevant for filtering in the given
|
65
|
+
# graph, optionally including selected fields
|
66
|
+
# @return [Array<Field>]
|
67
|
+
def fields_for_graph(graph, include_entity, select: false)
|
68
|
+
graph_fields = @eq + @order_by
|
69
|
+
graph_fields << @range unless @range.nil?
|
70
|
+
|
71
|
+
# If necessary, include ALL the fields which should be selected,
|
72
|
+
# otherwise we can exclude fields from leaf entity sets since
|
73
|
+
# we may end up selecting these with a separate index lookup
|
74
|
+
entities = graph.entities
|
75
|
+
graph_fields += @fields.select do |field|
|
76
|
+
entities.include?(field.parent) &&
|
77
|
+
(select || !graph.leaf_entity?(field.parent) ||
|
78
|
+
(field.parent == include_entity && graph.size > 1))
|
79
|
+
end
|
80
|
+
|
81
|
+
graph_fields.select { |field| entities.include? field.parent }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# A tree of possible query plans
|
86
|
+
class QueryPlanTree
|
87
|
+
include Enumerable
|
88
|
+
|
89
|
+
attr_reader :root
|
90
|
+
attr_accessor :cost_model
|
91
|
+
|
92
|
+
def initialize(state, cost_model)
|
93
|
+
@root = RootPlanStep.new(state)
|
94
|
+
@cost_model = cost_model
|
95
|
+
end
|
96
|
+
|
97
|
+
# Select all plans which use only a given set of indexes
|
98
|
+
def select_using_indexes(indexes)
|
99
|
+
select do |plan|
|
100
|
+
plan.all? do |step|
|
101
|
+
!step.is_a?(Plans::IndexLookupPlanStep) ||
|
102
|
+
indexes.include?(step.index)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# The query this tree of plans is generated for
|
108
|
+
# @return [Query]
|
109
|
+
def query
|
110
|
+
@root.state.query
|
111
|
+
end
|
112
|
+
|
113
|
+
# Enumerate all plans in the tree
|
114
|
+
def each
|
115
|
+
nodes = [@root]
|
116
|
+
|
117
|
+
until nodes.empty?
|
118
|
+
node = nodes.pop
|
119
|
+
if node.children.empty?
|
120
|
+
# This is just an extra check to make absolutely
|
121
|
+
# sure we never consider invalid statement plans
|
122
|
+
fail unless node.state.answered?
|
123
|
+
|
124
|
+
yield node.parent_steps @cost_model
|
125
|
+
else
|
126
|
+
nodes.concat node.children.to_a
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# Return the total number of plans for this statement
|
132
|
+
# @return [Integer]
|
133
|
+
def size
|
134
|
+
to_a.size
|
135
|
+
end
|
136
|
+
|
137
|
+
# :nocov:
|
138
|
+
def to_color(step = nil, indent = 0)
|
139
|
+
step = @root if step.nil?
|
140
|
+
this_step = ' ' * indent + step.to_color
|
141
|
+
this_step << " [yellow]$#{step.cost.round 5}[/]" \
|
142
|
+
unless step.is_a?(RootPlanStep) || step.cost.nil?
|
143
|
+
this_step + "\n" + step.children.map do |child_step|
|
144
|
+
to_color child_step, indent + 1
|
145
|
+
end.reduce('', &:+)
|
146
|
+
end
|
147
|
+
# :nocov:
|
148
|
+
end
|
149
|
+
|
150
|
+
# Thrown when it is not possible to construct a plan for a statement
|
151
|
+
class NoPlanException < StandardError
|
152
|
+
end
|
153
|
+
|
154
|
+
# A single plan for a query
|
155
|
+
class QueryPlan < AbstractPlan
|
156
|
+
attr_reader :steps
|
157
|
+
attr_accessor :query, :cost_model
|
158
|
+
|
159
|
+
include Comparable
|
160
|
+
include Enumerable
|
161
|
+
|
162
|
+
# Most of the work is delegated to the array
|
163
|
+
extend Forwardable
|
164
|
+
def_delegators :@steps, :each, :<<, :[], :==, :===, :eql?,
|
165
|
+
:inspect, :to_s, :to_a, :to_ary, :last, :length, :count
|
166
|
+
|
167
|
+
def initialize(query, cost_model)
|
168
|
+
@steps = []
|
169
|
+
@query = query
|
170
|
+
@cost_model = cost_model
|
171
|
+
end
|
172
|
+
|
173
|
+
# The weight of this query for a given workload
|
174
|
+
# @return [Fixnum]
|
175
|
+
def weight
|
176
|
+
return 1 if @workload.nil?
|
177
|
+
|
178
|
+
@workload.statement_weights[@query]
|
179
|
+
end
|
180
|
+
|
181
|
+
# Groups for plans are stored in the query
|
182
|
+
# @return [String]
|
183
|
+
def group
|
184
|
+
@query.group
|
185
|
+
end
|
186
|
+
|
187
|
+
# Name plans after the associated query
|
188
|
+
# @return [String]
|
189
|
+
def name
|
190
|
+
@query.text
|
191
|
+
end
|
192
|
+
|
193
|
+
# Fields selected by this plan
|
194
|
+
# @return [Array<Fields::Field>]
|
195
|
+
def select_fields
|
196
|
+
@query.select
|
197
|
+
end
|
198
|
+
|
199
|
+
# Parameters to this execution plan
|
200
|
+
def params
|
201
|
+
@query.conditions
|
202
|
+
end
|
203
|
+
|
204
|
+
# Two plans are compared by their execution cost
|
205
|
+
# @return [Boolean]
|
206
|
+
def <=>(other)
|
207
|
+
cost <=> other.cost
|
208
|
+
end
|
209
|
+
|
210
|
+
# The estimated cost of executing the query using this plan
|
211
|
+
# @return [Numeric]
|
212
|
+
def cost
|
213
|
+
costs = @steps.map(&:cost)
|
214
|
+
costs.inject(0, &:+) unless costs.any?(&:nil?)
|
215
|
+
end
|
216
|
+
|
217
|
+
# Get the indexes used by this query plan
|
218
|
+
# @return [Array<Index>]
|
219
|
+
def indexes
|
220
|
+
@steps.select { |step| step.is_a? IndexLookupPlanStep }.map(&:index)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# A query planner which can construct a tree of query plans
|
225
|
+
class QueryPlanner
|
226
|
+
def initialize(model, indexes, cost_model)
|
227
|
+
@logger = Logging.logger['nose::query_planner']
|
228
|
+
|
229
|
+
@model = model
|
230
|
+
@indexes = indexes
|
231
|
+
@cost_model = cost_model
|
232
|
+
end
|
233
|
+
|
234
|
+
# Find a tree of plans for the given query
|
235
|
+
# @return [QueryPlanTree]
|
236
|
+
# @raise [NoPlanException]
|
237
|
+
def find_plans_for_query(query)
|
238
|
+
state = QueryState.new query, @model
|
239
|
+
state.freeze
|
240
|
+
tree = QueryPlanTree.new state, @cost_model
|
241
|
+
|
242
|
+
indexes_by_joins = indexes_for_query(query, state.joins)
|
243
|
+
find_plans_for_step tree.root, indexes_by_joins
|
244
|
+
|
245
|
+
if tree.root.children.empty?
|
246
|
+
tree = QueryPlanTree.new state, @cost_model
|
247
|
+
find_plans_for_step tree.root, indexes_by_joins, prune: false
|
248
|
+
fail NoPlanException, "#{query.inspect} #{tree.inspect}"
|
249
|
+
end
|
250
|
+
|
251
|
+
@logger.debug { "Plans for #{query.inspect}: #{tree.inspect}" }
|
252
|
+
|
253
|
+
tree
|
254
|
+
end
|
255
|
+
|
256
|
+
# Get the minimum cost plan for executing this query
|
257
|
+
# @return [QueryPlan]
|
258
|
+
def min_plan(query)
|
259
|
+
find_plans_for_query(query).min
|
260
|
+
end
|
261
|
+
|
262
|
+
private
|
263
|
+
|
264
|
+
# Produce indexes possibly useful for this query
|
265
|
+
# grouped by the first entity they join on
|
266
|
+
# @return [Hash]
|
267
|
+
def indexes_for_query(query, joins)
|
268
|
+
indexes_by_joins = Hash.new { |h, k| h[k] = Set.new }
|
269
|
+
@indexes.each do |index|
|
270
|
+
# Limit indices to those which cross the query path
|
271
|
+
next unless index.graph.entities.to_set.subset? \
|
272
|
+
query.graph.entities.to_set
|
273
|
+
|
274
|
+
first_entity = joins.find do |entity|
|
275
|
+
index.graph.entities.include?(entity)
|
276
|
+
end
|
277
|
+
indexes_by_joins[first_entity].add index
|
278
|
+
end
|
279
|
+
|
280
|
+
indexes_by_joins
|
281
|
+
end
|
282
|
+
|
283
|
+
# Remove plans ending with this step in the tree
|
284
|
+
# @return[Boolean] true if pruning resulted in an empty tree
|
285
|
+
def prune_plan(prune_step)
|
286
|
+
# Walk up the tree and remove the branch for the failed plan
|
287
|
+
while prune_step.children.length <= 1 &&
|
288
|
+
!prune_step.is_a?(RootPlanStep)
|
289
|
+
prune_step = prune_step.parent
|
290
|
+
prev_step = prune_step
|
291
|
+
end
|
292
|
+
|
293
|
+
# If we reached the root, we have no plan
|
294
|
+
return true if prune_step.is_a? RootPlanStep
|
295
|
+
|
296
|
+
prune_step.children.delete prev_step
|
297
|
+
|
298
|
+
false
|
299
|
+
end
|
300
|
+
|
301
|
+
# Find possible query plans for a query starting at the given step
|
302
|
+
# @return [void]
|
303
|
+
def find_plans_for_step(step, indexes_by_joins, prune: true)
|
304
|
+
return if step.state.answered?
|
305
|
+
|
306
|
+
steps = find_steps_for_state step, step.state, indexes_by_joins
|
307
|
+
|
308
|
+
if !steps.empty?
|
309
|
+
step.children = steps
|
310
|
+
steps.each { |new_step| new_step.calculate_cost @cost_model }
|
311
|
+
steps.each do |child_step|
|
312
|
+
find_plans_for_step child_step, indexes_by_joins
|
313
|
+
|
314
|
+
# Remove this step if finding a plan from here failed
|
315
|
+
if child_step.children.empty? && !child_step.state.answered?
|
316
|
+
step.children.delete child_step
|
317
|
+
end
|
318
|
+
end
|
319
|
+
elsif prune
|
320
|
+
return if step.is_a?(RootPlanStep) || prune_plan(step.parent)
|
321
|
+
else
|
322
|
+
step.children = [PrunedPlanStep.new]
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
# Find all possible plan steps not using indexes
|
327
|
+
# @return [Array<PlanStep>]
|
328
|
+
def find_nonindexed_steps(parent, state)
|
329
|
+
steps = []
|
330
|
+
return steps if parent.is_a? RootPlanStep
|
331
|
+
|
332
|
+
[SortPlanStep, FilterPlanStep, LimitPlanStep].each \
|
333
|
+
{ |step| steps.push step.apply(parent, state) }
|
334
|
+
steps.flatten!
|
335
|
+
steps.compact!
|
336
|
+
|
337
|
+
steps
|
338
|
+
end
|
339
|
+
|
340
|
+
# Get a list of possible next steps for a query in the given state
|
341
|
+
# @return [Array<PlanStep>]
|
342
|
+
def find_steps_for_state(parent, state, indexes_by_joins)
|
343
|
+
steps = find_nonindexed_steps parent, state
|
344
|
+
return steps unless steps.empty?
|
345
|
+
|
346
|
+
# Don't allow indices to be used multiple times
|
347
|
+
indexes = (indexes_by_joins[state.joins.first] || Set.new).to_set
|
348
|
+
used_indexes = parent.parent_steps.indexes.to_set
|
349
|
+
(indexes - used_indexes).each do |index|
|
350
|
+
new_step = IndexLookupPlanStep.apply parent, index, state
|
351
|
+
next if new_step.nil?
|
352
|
+
|
353
|
+
new_step.add_fields_from_index index
|
354
|
+
steps.push new_step
|
355
|
+
end
|
356
|
+
|
357
|
+
steps
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NoSE
|
4
|
+
module Plans
|
5
|
+
# A query plan step performing external sort
|
6
|
+
class SortPlanStep < PlanStep
|
7
|
+
attr_reader :sort_fields
|
8
|
+
|
9
|
+
def initialize(sort_fields, state = nil)
|
10
|
+
super()
|
11
|
+
|
12
|
+
@sort_fields = sort_fields
|
13
|
+
@state = state
|
14
|
+
end
|
15
|
+
|
16
|
+
# :nocov:
|
17
|
+
def to_color
|
18
|
+
super + ' [' + @sort_fields.map(&:to_color).join(', ') + ']'
|
19
|
+
end
|
20
|
+
# :nocov:
|
21
|
+
|
22
|
+
# Two sorting steps are equal if they sort on the same fields
|
23
|
+
def ==(other)
|
24
|
+
other.instance_of?(self.class) && @sort_fields == other.sort_fields
|
25
|
+
end
|
26
|
+
alias eql? ==
|
27
|
+
|
28
|
+
def hash
|
29
|
+
@sort_fields.map(&:id).hash
|
30
|
+
end
|
31
|
+
|
32
|
+
# Check if an external sort can used (if a sort is the last step)
|
33
|
+
# @return [SortPlanStep]
|
34
|
+
def self.apply(parent, state)
|
35
|
+
fetched_all_ids = state.fields.none? { |f| f.is_a? Fields::IDField }
|
36
|
+
resolved_predicates = state.eq.empty? && state.range.nil?
|
37
|
+
can_order = !(state.order_by.to_set & parent.fields).empty?
|
38
|
+
return nil unless fetched_all_ids && resolved_predicates && can_order
|
39
|
+
|
40
|
+
new_state = state.dup
|
41
|
+
new_state.order_by = []
|
42
|
+
new_step = SortPlanStep.new(state.order_by, new_state)
|
43
|
+
new_step.state.freeze
|
44
|
+
|
45
|
+
new_step
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NoSE
|
4
|
+
module Plans
|
5
|
+
# A superclass for steps which modify indexes
|
6
|
+
class UpdatePlanStep < PlanStep
|
7
|
+
attr_reader :index
|
8
|
+
attr_accessor :state
|
9
|
+
|
10
|
+
def initialize(index, type, state = nil)
|
11
|
+
super()
|
12
|
+
@index = index
|
13
|
+
@type = type
|
14
|
+
|
15
|
+
return if state.nil?
|
16
|
+
@state = state.dup
|
17
|
+
@state.freeze
|
18
|
+
end
|
19
|
+
|
20
|
+
# :nocov:
|
21
|
+
def to_color
|
22
|
+
"#{super} #{@index.to_color} * #{@state.cardinality}"
|
23
|
+
end
|
24
|
+
# :nocov:
|
25
|
+
|
26
|
+
# Two insert steps are equal if they use the same index
|
27
|
+
def ==(other)
|
28
|
+
other.instance_of?(self.class) && @index == other.index && \
|
29
|
+
@type == other.instance_variable_get(:@type)
|
30
|
+
end
|
31
|
+
alias eql? ==
|
32
|
+
|
33
|
+
def hash
|
34
|
+
[@index, @type].hash
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# A step which inserts data into a given index
|
39
|
+
class InsertPlanStep < UpdatePlanStep
|
40
|
+
attr_reader :fields
|
41
|
+
|
42
|
+
def initialize(index, state = nil, fields = Set.new)
|
43
|
+
super index, :insert, state
|
44
|
+
@fields = if fields.empty?
|
45
|
+
index.all_fields
|
46
|
+
else
|
47
|
+
fields.to_set & index.all_fields
|
48
|
+
end
|
49
|
+
@fields += index.hash_fields + index.order_fields.to_set
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# A step which deletes data into a given index
|
54
|
+
class DeletePlanStep < UpdatePlanStep
|
55
|
+
def initialize(index, state = nil)
|
56
|
+
super index, :delete, state
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|