nose 0.1.0pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,391 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pickup'
4
+
5
+ module NoSE
6
+ module Random
7
+ # A simple representation of a random ER diagram
8
+ class Network
9
+ attr_reader :entities
10
+
11
+ def initialize(params = {})
12
+ @nodes_nb = params.fetch :nodes_nb, 10
13
+ @field_count = RandomGaussian.new params.fetch(:num_fields, 3), 1
14
+ @neighbours = Array.new(@nodes_nb) { Set.new }
15
+ end
16
+
17
+ # :nocov:
18
+ def inspect
19
+ @nodes.map do |node|
20
+ @entities[node].inspect
21
+ end.join "\n"
22
+ end
23
+ # :nocov:
24
+
25
+ protected
26
+
27
+ # Create a random entity to use in the model
28
+ # @return [Entity]
29
+ def create_entity(node)
30
+ num_entities = RandomGaussian.new 10_000, 100
31
+ entity = Entity.new('E' + random_name(node)) * num_entities.rand
32
+ pick_fields entity
33
+
34
+ entity
35
+ end
36
+
37
+ # Probabilities of selecting various field types
38
+ FIELD_TYPES = {
39
+ Fields::IntegerField => 45,
40
+ Fields::StringField => 35,
41
+ Fields::DateField => 10,
42
+ Fields::FloatField => 10
43
+ }.freeze
44
+
45
+ # Select random fields for an entity
46
+ # @return [void]
47
+ def pick_fields(entity)
48
+ entity << Fields::IDField.new(entity.name + 'ID')
49
+ 0.upto(@field_count.rand).each do |field_index|
50
+ entity << random_field(field_index)
51
+ end
52
+ end
53
+
54
+ # Generate a random field to add to an entity
55
+ # @return [Fields::Field]
56
+ def random_field(field_index)
57
+ Pickup.new(FIELD_TYPES).pick.send(:new, 'F' + random_name(field_index))
58
+ end
59
+
60
+ # Add foreign key relationships for neighbouring nodes
61
+ # @return [void]
62
+ def add_foreign_keys
63
+ @neighbours.each_with_index do |other_nodes, node|
64
+ other_nodes.each do |other_node|
65
+ @neighbours[other_node].delete node
66
+
67
+ if rand > 0.5
68
+ from_node = node
69
+ to_node = other_node
70
+ else
71
+ from_node = other_node
72
+ to_node = node
73
+ end
74
+
75
+ from_field = Fields::ForeignKeyField.new(
76
+ 'FK' + @entities[to_node].name + 'ID',
77
+ @entities[to_node]
78
+ )
79
+ to_field = Fields::ForeignKeyField.new(
80
+ 'FK' + @entities[from_node].name + 'ID',
81
+ @entities[from_node]
82
+ )
83
+
84
+ from_field.reverse = to_field
85
+ to_field.reverse = from_field
86
+
87
+ @entities[from_node] << from_field
88
+ @entities[to_node] << to_field
89
+ end
90
+ end
91
+ end
92
+
93
+ # Add a new link between two nodes
94
+ # @return [void]
95
+ def add_link(node, other_node)
96
+ @neighbours[node] << other_node
97
+ @neighbours[other_node] << node
98
+ end
99
+
100
+ # Remove a link between two nodes
101
+ # @return [void]
102
+ def remove_link(node, other_node)
103
+ @neighbours[node].delete other_node
104
+ @neighbours[other_node].delete node
105
+ end
106
+
107
+ # Find a new neighbour for a node
108
+ def new_neighbour(node, neighbour)
109
+ unlinkable_nodes = [node, neighbour] + @neighbours[node].to_a
110
+ (@nodes.to_a - unlinkable_nodes).sample
111
+ end
112
+
113
+ # Random names of variables combined to create random names
114
+ VARIABLE_NAMES = %w(Foo Bar Baz Quux Corge Grault
115
+ Garply Waldo Fred Plugh).freeze
116
+
117
+ # Generate a random name for an attribute
118
+ # @return [String]
119
+ def random_name(index)
120
+ index.to_s.chars.map(&:to_i).map { |digit| VARIABLE_NAMES[digit] }.join
121
+ end
122
+ end
123
+
124
+ # Generates random queries over entities in a given model
125
+ class StatementGenerator
126
+ def initialize(model)
127
+ @model = model
128
+ end
129
+
130
+ # Generate a new random insertion to entities in the model
131
+ # @return [Insert]
132
+ def random_insert(connect = true)
133
+ entity = @model.entities.values.sample
134
+ settings = entity.fields.each_value.map do |field|
135
+ "#{field.name}=?"
136
+ end.join ', '
137
+ insert = "INSERT INTO #{entity.name} SET #{settings} "
138
+
139
+ # Optionally add connections to other entities
140
+ insert += random_connection(entity) if connect
141
+
142
+ Statement.parse insert, @model
143
+ end
144
+
145
+ # Generate a random connection for an Insert
146
+ def random_connection(entity)
147
+ connections = entity.foreign_keys.values.sample(2)
148
+ 'AND CONNECT TO ' + connections.map do |connection|
149
+ "#{connection.name}(?)"
150
+ end.join(', ')
151
+ end
152
+
153
+ # Generate a new random update of entities in the model
154
+ # @return [Update]
155
+ def random_update(path_length = 1, updated_fields = 2,
156
+ condition_count = 1)
157
+ path = random_path(path_length)
158
+ settings = random_settings path, updated_fields
159
+ from = [path.first.parent.name] + path.entries[1..-1].map(&:name)
160
+ update = "UPDATE #{from.first} FROM #{from.join '.'} " \
161
+ "SET #{settings} " +
162
+ random_where_clause(path, condition_count)
163
+
164
+ Statement.parse update, @model
165
+ end
166
+
167
+ # Get random settings for an update
168
+ # @return [String]
169
+ def random_settings(path, updated_fields)
170
+ # Don't update key fields
171
+ update_fields = path.entities.first.fields.values
172
+ update_fields.reject! { |field| field.is_a? Fields::IDField }
173
+
174
+ update_fields.sample(updated_fields).map do |field|
175
+ "#{field.name}=?"
176
+ end.join ', '
177
+ end
178
+
179
+ # Generate a new random deletion of entities in the model
180
+ # @return [Delete]
181
+ def random_delete
182
+ path = random_path(1)
183
+
184
+ from = [path.first.parent.name] + path.entries[1..-1].map(&:name)
185
+ delete = "DELETE #{from.first} FROM #{from.join '.'} " +
186
+ random_where_clause(path, 1)
187
+
188
+ Statement.parse delete, @model
189
+ end
190
+
191
+ # Generate a new random query from entities in the model
192
+ # @return [Query]
193
+ def random_query(path_length = 3, selected_fields = 2,
194
+ condition_count = 2, order = false)
195
+ path = random_path path_length
196
+ graph = QueryGraph::Graph.from_path path
197
+
198
+ conditions = [
199
+ Condition.new(path.entities.first.fields.values.sample, :'=', nil)
200
+ ]
201
+ condition_count -= 1
202
+
203
+ new_fields = random_where_conditions(path, condition_count,
204
+ conditions.map(&:field).to_set)
205
+ conditions += new_fields.map do |field|
206
+ Condition.new(field, :'>', nil)
207
+ end
208
+
209
+ conditions = Hash[conditions.map do |condition|
210
+ [condition.field.id, condition]
211
+ end]
212
+
213
+ params = {
214
+ select: random_select(path, selected_fields),
215
+ model: @model,
216
+ graph: graph,
217
+ key_path: graph.longest_path,
218
+ entity: graph.longest_path.first.parent,
219
+ conditions: conditions,
220
+ order: order ? [graph.entities.to_a.sample.fields.values.sample] : []
221
+ }
222
+
223
+ query = Query.new params, nil
224
+ query.hash
225
+
226
+ query
227
+ end
228
+
229
+ # Get random fields to select for a Query
230
+ # @return [Set<Fields::Field>]
231
+ def random_select(path, selected_fields)
232
+ fields = Set.new
233
+ while fields.length < selected_fields
234
+ fields.add path.entities.sample.fields.values.sample
235
+ end
236
+
237
+ fields
238
+ end
239
+
240
+ # Produce a random statement according to a given set of weights
241
+ # @return [Statement]
242
+ def random_statement(weights = { query: 80, insert: 10, update: 5,
243
+ delete: 5 })
244
+ pick = Pickup.new(weights)
245
+ type = pick.pick
246
+ send(('random_' + type.to_s).to_sym)
247
+ end
248
+
249
+ # Return a random path through the entity graph
250
+ # @return [KeyPath]
251
+ def random_path(max_length)
252
+ # Pick the start of path weighted based on
253
+ # the number of deges from each entity
254
+ pick = Pickup.new(Hash[@model.entities.each_value.map do |entity|
255
+ [entity, entity.foreign_keys.length]
256
+ end])
257
+ path = [pick.pick.id_field]
258
+
259
+ while path.length < max_length
260
+ # Find a list of keys to entities we have not seen before
261
+ last_entity = path.last.entity
262
+ keys = last_entity.foreign_keys.values
263
+ keys.reject! { |key| path.map(&:entity).include? key.entity }
264
+ break if keys.empty?
265
+
266
+ # Add a random new key to the path
267
+ path << keys.sample
268
+ end
269
+
270
+ KeyPath.new path
271
+ end
272
+
273
+ # Produce a random query graph over the entity graph
274
+ def random_graph(max_nodes)
275
+ graph = QueryGraph::Graph.new
276
+ last_node = graph.add_node @model.entities.values.sample
277
+ while graph.size < max_nodes
278
+ # Get the possible foreign keys to use
279
+ keys = last_node.entity.foreign_keys.values
280
+ keys.reject! { |key| graph.nodes.map(&:entity).include? key.entity }
281
+ break if keys.empty?
282
+
283
+ # Pick a random foreign key to traverse
284
+ next_key = keys.sample
285
+ graph.add_edge last_node, next_key.entity, next_key
286
+
287
+ # Select a new node to start from, making sure we pick one
288
+ # that still has valid outgoing edges
289
+ last_node = graph.nodes.reject do |node|
290
+ (node.entity.foreign_keys.each_value.map(&:entity) -
291
+ graph.nodes.map(&:entity)).empty?
292
+ end.sample
293
+ break if last_node.nil?
294
+ end
295
+
296
+ graph
297
+ end
298
+
299
+ private
300
+
301
+ # Produce a random where clause using fields along a given path
302
+ # @return [String]
303
+ def random_where_clause(path, count = 2)
304
+ # Ensure we have at least one condition at the beginning of the path
305
+ conditions = [path.entities.first.fields.values.sample]
306
+ conditions += random_where_conditions path, count - 1
307
+
308
+ return '' if conditions.empty?
309
+ "WHERE #{conditions.map do |field|
310
+ "#{path.find_field_parent(field).name}.#{field.name} = ?"
311
+ end.join ' AND '}"
312
+ end
313
+
314
+ # Produce a random set of fields for a where clause
315
+ # @return [Array<Fields::Field>]
316
+ def random_where_conditions(path, count, exclude = Set.new)
317
+ 1.upto(count).map do
318
+ field = path.entities.sample.fields.values.sample
319
+ next nil if field.name == '**' || exclude.include?(field)
320
+
321
+ field
322
+ end.compact
323
+ end
324
+
325
+ # Get the name to be used in the query for a condition field
326
+ # @return [String]
327
+ def condition_field_name(field, path)
328
+ field_path = path.first.name
329
+ path_end = path.index(field.parent)
330
+ last_entity = path.first
331
+ path[1..path_end].each do |entity|
332
+ fk = last_entity.foreign_keys.each_value.find do |key|
333
+ key.entity == entity
334
+ end
335
+ field_path += '.' << fk.name
336
+ last_entity = entity
337
+ end
338
+
339
+ field_path
340
+ end
341
+ end
342
+ end
343
+
344
+ # Generate random numbers according to a Guassian distribution
345
+ class RandomGaussian
346
+ def initialize(mean, stddev, integer = true, min = 1)
347
+ @mean = mean
348
+ @stddev = stddev
349
+ @valid = false
350
+ @next = 0
351
+ @integer = integer
352
+ @min = min
353
+ end
354
+
355
+ # Return the next valid random number
356
+ # @return [Fixnum]
357
+ def rand
358
+ if @valid
359
+ @valid = false
360
+ clamp @next
361
+ else
362
+ @valid = true
363
+ x, y = self.class.gaussian(@mean, @stddev)
364
+ @next = y
365
+ clamp x
366
+ end
367
+ end
368
+
369
+ # Return a random number for the given distribution
370
+ # @return [Array<Fixnum>]
371
+ def self.gaussian(mean, stddev)
372
+ theta = 2 * Math::PI * rand
373
+ rho = Math.sqrt(-2 * Math.log(1 - rand))
374
+ scale = stddev * rho
375
+ x = mean + scale * Math.cos(theta)
376
+ y = mean + scale * Math.sin(theta)
377
+ [x, y]
378
+ end
379
+
380
+ private
381
+
382
+ # Clamp the value to the given minimum
383
+ def clamp(value)
384
+ value = value.to_i if @integer
385
+ [@min, value].max unless @min.nil?
386
+ end
387
+ end
388
+ end
389
+
390
+ require_relative 'random/barbasi_albert'
391
+ require_relative 'random/watts_strogatz'
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ # Simple DSL for constructing indexes
5
+ class Schema
6
+ attr_reader :model, :indexes
7
+
8
+ def initialize(&block)
9
+ @indexes = {}
10
+ instance_eval(&block) if block_given?
11
+ end
12
+
13
+ # Find the schema with the given name
14
+ def self.load(name)
15
+ filename = File.expand_path "../../../schemas/#{name}.rb", __FILE__
16
+ contents = File.read(filename)
17
+ binding.eval contents, filename
18
+ end
19
+
20
+ # rubocop:disable MethodName
21
+
22
+ # Set the model to be used by the schema
23
+ # @return [void]
24
+ def Model(name)
25
+ @model = Model.load name
26
+ NoSE::DSL.mixin_fields @model.entities, IndexDSL
27
+ end
28
+
29
+ # Add a simple index for an entity
30
+ # @return [void]
31
+ def SimpleIndex(entity)
32
+ @indexes[entity] = @model[entity].simple_index
33
+ end
34
+
35
+ # Wrap commands for defining index attributes
36
+ # @return [void]
37
+ def Index(key, &block)
38
+ # Apply the DSL
39
+ dsl = IndexDSL.new(self)
40
+ dsl.instance_eval(&block) if block_given?
41
+ index = Index.new dsl.hash_fields, dsl.order_fields, dsl.extra,
42
+ QueryGraph::Graph.from_path(dsl.path_keys), key
43
+ @indexes[index.key] = index
44
+ end
45
+
46
+ # rubocop:enable MethodName
47
+ end
48
+
49
+ # DSL for index creation within a schema
50
+ class IndexDSL
51
+ attr_reader :hash_fields, :order_fields, :extra, :path_keys
52
+
53
+ def initialize(schema)
54
+ @schema = schema
55
+ @hash_fields = []
56
+ @order_fields = []
57
+ @extra = []
58
+ @path_keys = []
59
+ end
60
+
61
+ # rubocop:disable MethodName
62
+
63
+ # Define a list of hash fields
64
+ # @return [void]
65
+ def Hash(*fields)
66
+ @hash_fields += fields.flatten
67
+ end
68
+
69
+ # Define a list of ordered fields
70
+ # @return [void]
71
+ def Ordered(*fields)
72
+ @order_fields += fields.flatten
73
+ end
74
+
75
+ # Define a list of extra fields
76
+ # @return [void]
77
+ def Extra(*fields)
78
+ @extra += fields.flatten
79
+ end
80
+
81
+ # Define the keys for the index path
82
+ # @return [void]
83
+ def Path(*keys)
84
+ @path_keys += keys
85
+ end
86
+
87
+ # rubocop:enable MethodName
88
+ end
89
+ end
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Search
5
+ # Base class for constraints
6
+ class Constraint
7
+ # If this is not overridden, apply query-specific constraints
8
+ # @return [void]
9
+ def self.apply(problem)
10
+ problem.queries.each_with_index do |query, q|
11
+ apply_query query, q, problem
12
+ end
13
+ end
14
+
15
+ # To be implemented in subclasses for query-specific constraints
16
+ # @return [void]
17
+ def self.apply_query(*_args)
18
+ end
19
+ end
20
+
21
+ # Constraints which force indexes to be present to be used
22
+ class IndexPresenceConstraints < Constraint
23
+ # Add constraint for indices being present
24
+ def self.apply(problem)
25
+ problem.indexes.each do |index|
26
+ problem.queries.each_with_index do |query, q|
27
+ name = "q#{q}_#{index.key}_avail" if ENV['NOSE_LOG'] == 'debug'
28
+ constr = MIPPeR::Constraint.new problem.query_vars[index][query] +
29
+ problem.index_vars[index] * -1,
30
+ :<=, 0, name
31
+ problem.model << constr
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ # The single constraint used to enforce a maximum storage cost
38
+ class SpaceConstraint < Constraint
39
+ # Add space constraint if needed
40
+ def self.apply(problem)
41
+ return unless problem.data[:max_space].finite?
42
+
43
+ fail 'Space constraint not supported when grouping by ID graph' \
44
+ if problem.data[:by_id_graph]
45
+
46
+ space = problem.total_size
47
+ constr = MIPPeR::Constraint.new space, :<=,
48
+ problem.data[:max_space] * 1.0,
49
+ 'max_space'
50
+ problem.model << constr
51
+ end
52
+ end
53
+
54
+ # Constraints that force each query to have an available plan
55
+ class CompletePlanConstraints < Constraint
56
+ # Add the discovered constraints to the problem
57
+ def self.add_query_constraints(query, q, constraints, problem)
58
+ constraints.each do |entities, constraint|
59
+ name = "q#{q}_#{entities.map(&:name).join '_'}" \
60
+ if ENV['NOSE_LOG'] == 'debug'
61
+
62
+ # If this is a support query, then we might not need a plan
63
+ if query.is_a? SupportQuery
64
+ # Find the index associated with the support query and make
65
+ # the requirement of a plan conditional on this index
66
+ index_var = if problem.data[:by_id_graph]
67
+ problem.index_vars[query.index.to_id_graph]
68
+ else
69
+ problem.index_vars[query.index]
70
+ end
71
+ next if index_var.nil?
72
+
73
+ constr = MIPPeR::Constraint.new constraint + index_var * -1.0,
74
+ :==, 0, name
75
+ else
76
+ constr = MIPPeR::Constraint.new constraint, :==, 1, name
77
+ end
78
+
79
+ problem.model << constr
80
+ end
81
+ end
82
+
83
+ # Add complete query plan constraints
84
+ def self.apply_query(query, q, problem)
85
+ entities = query.join_order
86
+ query_constraints = Hash[entities.each_cons(2).map do |e, next_e|
87
+ [[e, next_e], MIPPeR::LinExpr.new]
88
+ end]
89
+
90
+ # Add the sentinel entities at the end and beginning
91
+ last = Entity.new '__LAST__'
92
+ query_constraints[[entities.last, last]] = MIPPeR::LinExpr.new
93
+ first = Entity.new '__FIRST__'
94
+ query_constraints[[entities.first, first]] = MIPPeR::LinExpr.new
95
+
96
+ problem.data[:costs][query].each do |index, (steps, _)|
97
+ # All indexes should advance a step if possible unless
98
+ # this is either the last step from IDs to entity
99
+ # data or the first step going from data to IDs
100
+ index_step = steps.first
101
+ fail if entities.length > 1 && index.graph.size == 1 && \
102
+ !(steps.last.state.answered? ||
103
+ index_step.parent.is_a?(Plans::RootPlanStep))
104
+
105
+ # Join each step in the query graph
106
+ index_var = problem.query_vars[index][query]
107
+ index_entities = index.graph.entities.sort_by do |entity|
108
+ entities.index entity
109
+ end
110
+ index_entities.each_cons(2) do |entity, next_entity|
111
+ # Make sure the constraints go in the correct direction
112
+ if query_constraints.key?([entity, next_entity])
113
+ query_constraints[[entity, next_entity]] += index_var
114
+ else
115
+ query_constraints[[next_entity, entity]] += index_var
116
+ end
117
+ end
118
+
119
+ # If this query has been answered, add the jump to the last step
120
+ query_constraints[[entities.last, last]] += index_var \
121
+ if steps.last.state.answered?
122
+
123
+ # If this index is the first step, add this index to the beginning
124
+ query_constraints[[entities.first, first]] += index_var \
125
+ if index_step.parent.is_a?(Plans::RootPlanStep)
126
+
127
+ # Ensure the previous index is available
128
+ parent_index = index_step.parent.parent_index
129
+ next if parent_index.nil?
130
+
131
+ parent_var = problem.query_vars[parent_index][query]
132
+ name = "q#{q}_#{index.key}_parent" if ENV['NOSE_LOG'] == 'debug'
133
+ constr = MIPPeR::Constraint.new index_var * 1.0 + parent_var * -1.0,
134
+ :<=, 0, name
135
+ problem.model << constr
136
+ end
137
+
138
+ # Ensure we have exactly one index on each component of the query graph
139
+ add_query_constraints query, q, query_constraints, problem
140
+ end
141
+ end
142
+ end
143
+ end