nose 0.1.0pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
data/lib/nose/util.rb ADDED
@@ -0,0 +1,305 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+ require 'formatador'
5
+ require 'parallel'
6
+ require 'pp'
7
+ require 'stringio'
8
+
9
+ # Reopen to add utility methods
10
+ module Enumerable
11
+ # Enumerate all non-empty prefixes of the enumerable
12
+ # @return [Enumerator]
13
+ def prefixes
14
+ Enumerator.new do |enum|
15
+ prefix = []
16
+ each do |elem|
17
+ prefix = prefix.dup << elem
18
+ enum.yield prefix
19
+ end
20
+ end
21
+ end
22
+
23
+ # Enumerate all partitionings of an enumerable
24
+ # @return [Enumerator]
25
+ def partitions(max_length = nil)
26
+ max_length = length if max_length.nil?
27
+ Enumerator.new do |enum|
28
+ 1.upto(max_length).map do |length|
29
+ enum.yield partition.with_index { |_, i| i < length }
30
+ end
31
+ end
32
+ end
33
+
34
+ # Take the sum of the result of calling the block on each item
35
+ # @return [Object]
36
+ def sum_by(initial = 0)
37
+ reduce(initial) { |sum, item| sum + yield(item) }
38
+ end
39
+
40
+ # Take the product of the result of calling the block on each item
41
+ # @return [Object]
42
+ def product_by(initial = 1)
43
+ reduce(initial) { |product, item| product * yield(item) }
44
+ end
45
+ end
46
+
47
+ # Extend with some convenience methods
48
+ class Array
49
+ # Find the longest common prefix of two arrays
50
+ # @return [Array<Object>]
51
+ def longest_common_prefix(other)
52
+ fail TypeError unless other.is_a? Array
53
+ (prefixes.to_a & other.prefixes.to_a).max_by(&:length) || []
54
+ end
55
+ end
56
+
57
+ # Reopen to present as finite as with Float
58
+ class Integer
59
+ # Convenience methods to allow integers to be considered finite
60
+ # @return [Boolean]
61
+ def finite?
62
+ true
63
+ end
64
+ end
65
+
66
+ # Extend Object to print coloured output
67
+ class Object
68
+ def inspect
69
+ Formatador.parse(respond_to?(:to_color) ? to_color : to_s)
70
+ end
71
+
72
+ # Get a colored representation of the object
73
+ # @return [String]
74
+ def to_color
75
+ to_s
76
+ end
77
+ end
78
+
79
+ # Allow a supertype to look up a class given the
80
+ # name of a subtype inheriting from this class
81
+ module Supertype
82
+ # Add class methods when this module is included
83
+ # @return [void]
84
+ def self.included(base)
85
+ base.extend ClassMethods
86
+ end
87
+
88
+ # Add a single method to get a class given the subtype name
89
+ module ClassMethods
90
+ # Get the class given the name of a subtype
91
+ # @return [Class] the concrete class with the given subtype name
92
+ def subtype_class(name)
93
+ class_name = self.name.split('::')[0..-2]
94
+ class_name << name.split('_').map do |name_part|
95
+ name_part = name_part[0].upcase + name_part[1..-1]
96
+ name_part.sub 'Id', 'ID'
97
+ end.join
98
+ class_name[-1] = class_name[-1] + self.name.split('::').last
99
+
100
+ class_name.reduce(Object) do |mod, name_part|
101
+ mod.const_get(name_part)
102
+ end
103
+ end
104
+ end
105
+ end
106
+
107
+ # Allow subclasses to return a string representing of the
108
+ # class, minus a common suffix also used in the superclass
109
+ module Subtype
110
+ # Add instance and class methods when this module is included
111
+ # @return [void]
112
+ def self.included(base)
113
+ base.send :include, InstanceMethods
114
+ base.extend ClassMethods
115
+ end
116
+
117
+ # Mirror the subtype method on class instances
118
+ module InstanceMethods
119
+ # A mirror of {Subtype::ClassMethods#subtype_name}
120
+ # @return [String]
121
+ def subtype_name(**args)
122
+ self.class.subtype_name(**args)
123
+ end
124
+ end
125
+
126
+ # Add a single method to retrieve the subtype name
127
+ module ClassMethods
128
+ # Get a unique string identify this subclass amongst sibling classes
129
+ # @return [String]
130
+ def subtype_name(name_case: :snake)
131
+ super_name = name_array superclass
132
+ self_name = name_array self
133
+ self_name = self_name.reverse.drop_while do |part|
134
+ super_name.include? part
135
+ end.reverse
136
+
137
+ if name_case == :snake
138
+ name = self_name.join('_').freeze
139
+ elsif name_case == :camel
140
+ name = self_name.map do |part|
141
+ part[0].upcase + part[1..-1]
142
+ end.join('').freeze
143
+ end
144
+
145
+ name
146
+ end
147
+
148
+ private
149
+
150
+ # Convert camel case class names to an array
151
+ # @return [Array<String>]
152
+ def name_array(cls)
153
+ frozen_name = cls.name
154
+ frozen_name.gsub!(/^.*::/, '')
155
+ frozen_name.gsub!('ID', 'Id')
156
+ frozen_name.freeze
157
+
158
+ frozen_name.split(/(?=[A-Z]+)/).map(&:freeze) \
159
+ .map! do |s|
160
+ s.downcase.freeze
161
+ end
162
+ end
163
+ end
164
+ end
165
+
166
+ # Simple helper class to facilitate cardinality estimates
167
+ class Cardinality
168
+ # Update the cardinality based on filtering implicit to the index
169
+ # @return [Fixnum]
170
+ def self.filter(cardinality, eq_filter, range_filter)
171
+ filtered = (range_filter.nil? ? 1.0 : 0.1) * cardinality
172
+ filtered *= eq_filter.map do |field|
173
+ 1.0 / field.cardinality
174
+ end.inject(1.0, &:*)
175
+
176
+ filtered
177
+ end
178
+ end
179
+
180
+ # Add a simple function for pretty printing strings
181
+ module Kernel
182
+ private
183
+
184
+ # Pretty print to a string
185
+ # @return [String]
186
+ def pp_s(*objs)
187
+ s = StringIO.new
188
+ objs.each { |obj| PP.pp(obj, s) }
189
+ s.rewind
190
+ s.read
191
+ end
192
+
193
+ module_function :pp_s
194
+ end
195
+
196
+ # Add simple convenience methods
197
+ class Object
198
+ # Convert all the keys of a hash to symbols
199
+ # @return [Object]
200
+ def deep_symbolize_keys
201
+ return each_with_object({}) do |(k, v), memo|
202
+ memo[k.to_sym] = v.deep_symbolize_keys
203
+ memo
204
+ end if is_a? Hash
205
+
206
+ return each_with_object([]) do |v, memo|
207
+ memo << v.deep_symbolize_keys
208
+ memo
209
+ end if is_a? Array
210
+
211
+ self
212
+ end
213
+ end
214
+
215
+ # Extend the kernel to allow warning suppression
216
+ module Kernel
217
+ # Allow the suppression of warnings for a block of code
218
+ # @return [void]
219
+ def suppress_warnings
220
+ original_verbosity = $VERBOSE
221
+ $VERBOSE = nil
222
+ result = yield
223
+ $VERBOSE = original_verbosity
224
+
225
+ result
226
+ end
227
+ end
228
+
229
+ module NoSE
230
+ # Helper functions for building DSLs
231
+ module DSL
232
+ # Add methods to the class which can be used to access entities and fields
233
+ # @return [void]
234
+ def mixin_fields(entities, cls)
235
+ entities.each do |entity_name, entity|
236
+ # Add fake entity object for the DSL
237
+ fake = Object.new
238
+
239
+ # Add a method named by the entity to allow field creation
240
+ cls.send :define_method, entity_name.to_sym, (proc do
241
+ metaclass = class << fake; self; end
242
+
243
+ # Allow fields to be defined using [] access
244
+ metaclass.send :define_method, :[] do |field_name|
245
+ if field_name == '*'
246
+ entity.fields.values
247
+ else
248
+ entity.fields[field_name] || entity.foreign_keys[field_name]
249
+ end
250
+ end
251
+
252
+ # Define methods named for fields so things like 'user.id' work
253
+ entity.fields.merge(entity.foreign_keys).each do |field_name, field|
254
+ metaclass.send :define_method, field_name.to_sym, -> { field }
255
+ end
256
+
257
+ fake
258
+ end)
259
+ end
260
+ end
261
+
262
+ module_function :mixin_fields
263
+ end
264
+
265
+ # Add loading of class instances from the filesystem
266
+ module Loader
267
+ attr_reader :source_code
268
+
269
+ def self.included(base)
270
+ base.extend ClassMethods
271
+ end
272
+
273
+ # Add a class method to load class instances from file
274
+ module ClassMethods
275
+ # Load a class with the given name from a directory specified
276
+ # by the LOAD_PATH class constant
277
+ # @return [Object] an instance of the class which included this module
278
+ def load(name)
279
+ path = const_get(:LOAD_PATH)
280
+ filename = File.expand_path "../../../#{path}/#{name}.rb", __FILE__
281
+ source_code = File.read(filename)
282
+
283
+ instance = binding.eval source_code, filename
284
+ instance.instance_variable_set :@source_code, source_code
285
+ instance
286
+ end
287
+ end
288
+ end
289
+ end
290
+
291
+ # Extend Time to allow conversion to DateTime instances
292
+ class Time
293
+ # Convert to a DateTime instance
294
+ # http://stackoverflow.com/a/279785/123695
295
+ # @return [DateTime]
296
+ def to_datetime
297
+ # Convert seconds + microseconds into a fractional number of seconds
298
+ seconds = sec + Rational(usec, 10**6)
299
+
300
+ # Convert a UTC offset measured in minutes to one measured in a
301
+ # fraction of a day.
302
+ offset = Rational(utc_offset, 60 * 60 * 24)
303
+ DateTime.new(year, month, day, hour, min, seconds, offset)
304
+ end
305
+ end
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'model'
4
+ require_relative 'parser'
5
+
6
+ require 'erb'
7
+
8
+ module NoSE
9
+ # A representation of a query workload over a given set of entities
10
+ class Workload
11
+ # The subdirectory workloads are loaded from
12
+ LOAD_PATH = 'workloads'
13
+ include Loader
14
+
15
+ attr_reader :model
16
+ attr_accessor :mix
17
+
18
+ def initialize(model = nil, &block)
19
+ @statement_weights = { default: {} }
20
+ @model = model || Model.new
21
+ @mix = :default
22
+
23
+ # Apply the DSL
24
+ WorkloadDSL.new(self).instance_eval(&block) if block_given?
25
+ end
26
+
27
+ # Compare models and statements
28
+ # @return [Boolean]
29
+ def ==(other)
30
+ other.is_a?(Workload) && @model == other.model &&
31
+ statement_weights == other.statement_weights
32
+ end
33
+ alias eql? ==
34
+
35
+ # Add a new {Entity} or {Statement} to the workload
36
+ # @return [self] the current workload to allow chaining
37
+ def <<(other)
38
+ if other.is_a? Entity
39
+ @model.add_entity other.freeze
40
+ elsif other.is_a? Statement
41
+ add_statement other.freeze
42
+ else
43
+ fail TypeError, 'can only add queries and entities to a workload'
44
+ end
45
+
46
+ self
47
+ end
48
+
49
+ # Add a new {Statement} to the workload or parse a string
50
+ # @return [void]
51
+ def add_statement(statement, mixes = {}, group: nil, label: nil)
52
+ statement = Statement.parse(statement, @model,
53
+ group: group, label: label) \
54
+ if statement.is_a? String
55
+ statement.freeze
56
+
57
+ mixes = { default: mixes } if mixes.is_a? Numeric
58
+ mixes = { default: 1.0 } if mixes.empty?
59
+ mixes.each do |mix, weight|
60
+ @statement_weights[mix] = {} unless @statement_weights.key? mix
61
+ @statement_weights[mix][statement] = weight
62
+ end
63
+ end
64
+
65
+ # Strip the weights from the query dictionary and return a list of queries
66
+ # @return [Array<Statement>]
67
+ def queries
68
+ @statement_weights[@mix].keys.select do |statement|
69
+ statement.is_a? Query
70
+ end
71
+ end
72
+
73
+ # Strip the weights and return a list of statements
74
+ # @return [Array<Statement>]
75
+ def statements
76
+ (@statement_weights[@mix] || {}).keys
77
+ end
78
+
79
+ # Retrieve the weights for the current mix
80
+ # @return [Hash]
81
+ def statement_weights
82
+ @statement_weights[@mix]
83
+ end
84
+
85
+ # Strip the weights from the query dictionary and return a list of updates
86
+ # @return [Array<Statement>]
87
+ def updates
88
+ @statement_weights[@mix].keys.reject do |statement|
89
+ statement.is_a? Query
90
+ end
91
+ end
92
+
93
+ # Find a statement in the workload with the provided tag
94
+ # @return [Statement]
95
+ def find_with_tag(tag)
96
+ statements.find do |s|
97
+ s.text.end_with? "-- #{tag}"
98
+ end
99
+ end
100
+
101
+ # Remove any updates from the workload
102
+ # @return [void]
103
+ def remove_updates
104
+ @statement_weights[@mix].select! { |stmt, _| stmt.is_a? Query }
105
+ end
106
+
107
+ # Get all the support queries for updates in the workload
108
+ # @return[Array<Statement>]
109
+ def support_queries(indexes)
110
+ updates.map do |update|
111
+ indexes.map { |index| update.support_queries(index) }
112
+ end.flatten(2)
113
+ end
114
+
115
+ # Check if all the fields used by queries in the workload exist
116
+ # @return [Boolean]
117
+ def fields_exist?
118
+ @statement_weights[@mix].each_key do |query|
119
+ # Projected fields and fields in the where clause exist
120
+ fields = query.where.map(&:field) + query.fields
121
+ fields.each do |field|
122
+ return false unless @model.find_field field.value
123
+ end
124
+ end
125
+
126
+ true
127
+ end
128
+
129
+ # Produce the source code used to define this workload
130
+ # @return [String]
131
+ def source_code
132
+ return @source_code unless @source_code.nil?
133
+
134
+ ns = OpenStruct.new(workload: self)
135
+ tmpl = File.read File.join(File.dirname(__FILE__),
136
+ '../../templates/workload.erb')
137
+ tmpl = ERB.new(tmpl, nil, '>')
138
+ @source_code = tmpl.result(ns.instance_eval { binding })
139
+ end
140
+ end
141
+
142
+ # A helper class for DSL creation to avoid messing with {Workload}
143
+ class WorkloadDSL
144
+ def initialize(arg)
145
+ if arg.is_a? Workload
146
+ @workload = arg
147
+ @model = arg.model
148
+ elsif arg.is_a? Model
149
+ @model = arg
150
+ end
151
+ end
152
+
153
+ # rubocop:disable MethodName
154
+
155
+ # Allow the use of an external model
156
+ def Model(name)
157
+ @workload.instance_variable_set(:@model, NoSE::Model.load(name))
158
+ end
159
+
160
+ # Shortcut to add a new {Entity} to the workload
161
+ # @return [Entity]
162
+ def Entity(*args, &block)
163
+ @model.add_entity Entity.new(*args, &block)
164
+ end
165
+
166
+ # Add a HasMany relationship which is just the opposite of HasOne
167
+ # @return [void]
168
+ def HasMany(from_name, to_name, entities, **options)
169
+ HasOne to_name, from_name, Hash[[entities.first.reverse]], **options
170
+ end
171
+
172
+ # Separate function for foreign keys to avoid circular dependencies
173
+ # @return [void]
174
+ def HasOne(from_name, to_name, entities, **options)
175
+ from_entity, to_entity = entities.first
176
+ from_field = Fields::ForeignKeyField.new from_name,
177
+ @model[to_entity],
178
+ **options
179
+
180
+ # Add the key in the opposite direction
181
+ options[:count] = @model[from_entity].count
182
+ options[:relationship] = :many
183
+ to_field = Fields::ForeignKeyField.new to_name,
184
+ @model[from_entity],
185
+ **options
186
+
187
+ # Set the opposite keys and add to entities
188
+ to_field.reverse = from_field
189
+ from_field.reverse = to_field
190
+ @model[from_entity] << from_field
191
+ @model[to_entity] << to_field
192
+ end
193
+
194
+ # Shortcut to add a new {Statement} to the workload
195
+ # @return [void]
196
+ def Q(statement, weight = 1.0, group: nil, label: nil, **mixes)
197
+ fail 'Statements require a workload' if @workload.nil?
198
+
199
+ return if weight.zero? && mixes.empty?
200
+ mixes = { default: weight } if mixes.empty?
201
+ @workload.add_statement statement, mixes, group: group, label: label
202
+ end
203
+
204
+ # Allow setting the default workload mix
205
+ # @return [void]
206
+ def DefaultMix(mix)
207
+ @workload.mix = mix
208
+ end
209
+
210
+ # Allow grouping statements with an associated weight
211
+ # @return [void]
212
+ def Group(name, weight = 1.0, **mixes, &block)
213
+ fail 'Groups require a workload' if @workload.nil?
214
+
215
+ # Apply the DSL
216
+ dsl = GroupDSL.new
217
+ dsl.instance_eval(&block) if block_given?
218
+ dsl.statements.each do |statement|
219
+ Q(statement, weight, **mixes, group: name)
220
+ end
221
+ end
222
+
223
+ # rubocop:enable MethodName
224
+ end
225
+
226
+ # A helper class for DSL creation to allow groups of statements
227
+ class GroupDSL
228
+ attr_reader :statements
229
+
230
+ def initialize
231
+ @statements = []
232
+ end
233
+
234
+ # rubocop:disable MethodName
235
+
236
+ # Track a new statement to be added
237
+ # @return [void]
238
+ def Q(statement)
239
+ @statements << statement
240
+ end
241
+
242
+ # rubocop:enable MethodName
243
+ end
244
+ end
data/lib/nose.rb ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Namespace module for the whole project
4
+ module NoSE
5
+ end
6
+
7
+ require_relative 'nose/util'
8
+
9
+ require_relative 'nose/backend'
10
+ require_relative 'nose/cost'
11
+ require_relative 'nose/debug'
12
+ require_relative 'nose/enumerator'
13
+ require_relative 'nose/indexes'
14
+ require_relative 'nose/loader'
15
+ require_relative 'nose/model'
16
+ require_relative 'nose/parser'
17
+ require_relative 'nose/plans'
18
+ require_relative 'nose/proxy'
19
+ require_relative 'nose/query_graph'
20
+ require_relative 'nose/random'
21
+ require_relative 'nose/schema'
22
+ require_relative 'nose/search'
23
+ require_relative 'nose/statements'
24
+ require_relative 'nose/timing'
25
+ require_relative 'nose/workload'
26
+
27
+ require_relative 'nose/serialize'
28
+
29
+ # :nocov:
30
+ require 'logging'
31
+
32
+ logger = Logging.logger['nose']
33
+ logger.level = (ENV['NOSE_LOG'] || 'info').downcase.to_sym
34
+
35
+ logger.add_appenders Logging.appenders.stderr
36
+ logger = nil # rubocop:disable Lint/UselessAssignment
37
+ # :nocov:
@@ -0,0 +1,42 @@
1
+ <% foreign_keys = [] %>
2
+ # rubocop:disable all
3
+
4
+ workload = NoSE::Workload.new do
5
+ # Define entities along with the size and cardinality of their fields
6
+ # as well as an estimated number of each entity
7
+ <% workload.model.entities.each_value do |entity| %>
8
+ (Entity '<%= entity.name %>' do
9
+ <% entity.fields.each_value do |field| %>
10
+ <%
11
+ if field.is_a? NoSE::Fields::ForeignKeyField
12
+ foreign_keys << field
13
+ next
14
+ end
15
+ %>
16
+ <%= field.subtype_name name_case: :camel %> '<%= field.name %>',<%=
17
+ case [field.class]
18
+ when [NoSE::Fields::StringField]
19
+ "#{field.size}, "
20
+ else
21
+ ''
22
+ end
23
+ %> count: <%= field.cardinality %>
24
+
25
+ <% end %>
26
+ end) * <%= entity.count %>
27
+
28
+
29
+ <% end %>
30
+
31
+ <% foreign_keys.each do |key| %>
32
+ ForeignKey '<%= key.name %>', '<%= key.parent.name %>', '<%= key.entity.name %>', count: <%= key.cardinality %>
33
+
34
+ <% end %>
35
+
36
+ # Define queries and their relative weights
37
+ <% workload.statement_weights.each do |statement, weight| %>
38
+ Q '<%= statement.query %>', <%= weight %>
39
+
40
+ <% end %>
41
+ end
42
+ # rubocop:enable all