nose 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
data/lib/nose/util.rb ADDED
@@ -0,0 +1,305 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+ require 'formatador'
5
+ require 'parallel'
6
+ require 'pp'
7
+ require 'stringio'
8
+
9
+ # Reopen to add utility methods
10
+ module Enumerable
11
+ # Enumerate all non-empty prefixes of the enumerable
12
+ # @return [Enumerator]
13
+ def prefixes
14
+ Enumerator.new do |enum|
15
+ prefix = []
16
+ each do |elem|
17
+ prefix = prefix.dup << elem
18
+ enum.yield prefix
19
+ end
20
+ end
21
+ end
22
+
23
+ # Enumerate all partitionings of an enumerable
24
+ # @return [Enumerator]
25
+ def partitions(max_length = nil)
26
+ max_length = length if max_length.nil?
27
+ Enumerator.new do |enum|
28
+ 1.upto(max_length).map do |length|
29
+ enum.yield partition.with_index { |_, i| i < length }
30
+ end
31
+ end
32
+ end
33
+
34
+ # Take the sum of the result of calling the block on each item
35
+ # @return [Object]
36
+ def sum_by(initial = 0)
37
+ reduce(initial) { |sum, item| sum + yield(item) }
38
+ end
39
+
40
+ # Take the product of the result of calling the block on each item
41
+ # @return [Object]
42
+ def product_by(initial = 1)
43
+ reduce(initial) { |product, item| product * yield(item) }
44
+ end
45
+ end
46
+
47
+ # Extend with some convenience methods
48
+ class Array
49
+ # Find the longest common prefix of two arrays
50
+ # @return [Array<Object>]
51
+ def longest_common_prefix(other)
52
+ fail TypeError unless other.is_a? Array
53
+ (prefixes.to_a & other.prefixes.to_a).max_by(&:length) || []
54
+ end
55
+ end
56
+
57
+ # Reopen to present as finite as with Float
58
+ class Integer
59
+ # Convenience methods to allow integers to be considered finite
60
+ # @return [Boolean]
61
+ def finite?
62
+ true
63
+ end
64
+ end
65
+
66
+ # Extend Object to print coloured output
67
+ class Object
68
+ def inspect
69
+ Formatador.parse(respond_to?(:to_color) ? to_color : to_s)
70
+ end
71
+
72
+ # Get a colored representation of the object
73
+ # @return [String]
74
+ def to_color
75
+ to_s
76
+ end
77
+ end
78
+
79
+ # Allow a supertype to look up a class given the
80
+ # name of a subtype inheriting from this class
81
+ module Supertype
82
+ # Add class methods when this module is included
83
+ # @return [void]
84
+ def self.included(base)
85
+ base.extend ClassMethods
86
+ end
87
+
88
+ # Add a single method to get a class given the subtype name
89
+ module ClassMethods
90
+ # Get the class given the name of a subtype
91
+ # @return [Class] the concrete class with the given subtype name
92
+ def subtype_class(name)
93
+ class_name = self.name.split('::')[0..-2]
94
+ class_name << name.split('_').map do |name_part|
95
+ name_part = name_part[0].upcase + name_part[1..-1]
96
+ name_part.sub 'Id', 'ID'
97
+ end.join
98
+ class_name[-1] = class_name[-1] + self.name.split('::').last
99
+
100
+ class_name.reduce(Object) do |mod, name_part|
101
+ mod.const_get(name_part)
102
+ end
103
+ end
104
+ end
105
+ end
106
+
107
+ # Allow subclasses to return a string representing of the
108
+ # class, minus a common suffix also used in the superclass
109
+ module Subtype
110
+ # Add instance and class methods when this module is included
111
+ # @return [void]
112
+ def self.included(base)
113
+ base.send :include, InstanceMethods
114
+ base.extend ClassMethods
115
+ end
116
+
117
+ # Mirror the subtype method on class instances
118
+ module InstanceMethods
119
+ # A mirror of {Subtype::ClassMethods#subtype_name}
120
+ # @return [String]
121
+ def subtype_name(**args)
122
+ self.class.subtype_name(**args)
123
+ end
124
+ end
125
+
126
+ # Add a single method to retrieve the subtype name
127
+ module ClassMethods
128
+ # Get a unique string identify this subclass amongst sibling classes
129
+ # @return [String]
130
+ def subtype_name(name_case: :snake)
131
+ super_name = name_array superclass
132
+ self_name = name_array self
133
+ self_name = self_name.reverse.drop_while do |part|
134
+ super_name.include? part
135
+ end.reverse
136
+
137
+ if name_case == :snake
138
+ name = self_name.join('_').freeze
139
+ elsif name_case == :camel
140
+ name = self_name.map do |part|
141
+ part[0].upcase + part[1..-1]
142
+ end.join('').freeze
143
+ end
144
+
145
+ name
146
+ end
147
+
148
+ private
149
+
150
+ # Convert camel case class names to an array
151
+ # @return [Array<String>]
152
+ def name_array(cls)
153
+ frozen_name = cls.name
154
+ frozen_name.gsub!(/^.*::/, '')
155
+ frozen_name.gsub!('ID', 'Id')
156
+ frozen_name.freeze
157
+
158
+ frozen_name.split(/(?=[A-Z]+)/).map(&:freeze) \
159
+ .map! do |s|
160
+ s.downcase.freeze
161
+ end
162
+ end
163
+ end
164
+ end
165
+
166
+ # Simple helper class to facilitate cardinality estimates
167
+ class Cardinality
168
+ # Update the cardinality based on filtering implicit to the index
169
+ # @return [Fixnum]
170
+ def self.filter(cardinality, eq_filter, range_filter)
171
+ filtered = (range_filter.nil? ? 1.0 : 0.1) * cardinality
172
+ filtered *= eq_filter.map do |field|
173
+ 1.0 / field.cardinality
174
+ end.inject(1.0, &:*)
175
+
176
+ filtered
177
+ end
178
+ end
179
+
180
+ # Add a simple function for pretty printing strings
181
+ module Kernel
182
+ private
183
+
184
+ # Pretty print to a string
185
+ # @return [String]
186
+ def pp_s(*objs)
187
+ s = StringIO.new
188
+ objs.each { |obj| PP.pp(obj, s) }
189
+ s.rewind
190
+ s.read
191
+ end
192
+
193
+ module_function :pp_s
194
+ end
195
+
196
+ # Add simple convenience methods
197
+ class Object
198
+ # Convert all the keys of a hash to symbols
199
+ # @return [Object]
200
+ def deep_symbolize_keys
201
+ return each_with_object({}) do |(k, v), memo|
202
+ memo[k.to_sym] = v.deep_symbolize_keys
203
+ memo
204
+ end if is_a? Hash
205
+
206
+ return each_with_object([]) do |v, memo|
207
+ memo << v.deep_symbolize_keys
208
+ memo
209
+ end if is_a? Array
210
+
211
+ self
212
+ end
213
+ end
214
+
215
+ # Extend the kernel to allow warning suppression
216
+ module Kernel
217
+ # Allow the suppression of warnings for a block of code
218
+ # @return [void]
219
+ def suppress_warnings
220
+ original_verbosity = $VERBOSE
221
+ $VERBOSE = nil
222
+ result = yield
223
+ $VERBOSE = original_verbosity
224
+
225
+ result
226
+ end
227
+ end
228
+
229
+ module NoSE
230
+ # Helper functions for building DSLs
231
+ module DSL
232
+ # Add methods to the class which can be used to access entities and fields
233
+ # @return [void]
234
+ def mixin_fields(entities, cls)
235
+ entities.each do |entity_name, entity|
236
+ # Add fake entity object for the DSL
237
+ fake = Object.new
238
+
239
+ # Add a method named by the entity to allow field creation
240
+ cls.send :define_method, entity_name.to_sym, (proc do
241
+ metaclass = class << fake; self; end
242
+
243
+ # Allow fields to be defined using [] access
244
+ metaclass.send :define_method, :[] do |field_name|
245
+ if field_name == '*'
246
+ entity.fields.values
247
+ else
248
+ entity.fields[field_name] || entity.foreign_keys[field_name]
249
+ end
250
+ end
251
+
252
+ # Define methods named for fields so things like 'user.id' work
253
+ entity.fields.merge(entity.foreign_keys).each do |field_name, field|
254
+ metaclass.send :define_method, field_name.to_sym, -> { field }
255
+ end
256
+
257
+ fake
258
+ end)
259
+ end
260
+ end
261
+
262
+ module_function :mixin_fields
263
+ end
264
+
265
+ # Add loading of class instances from the filesystem
266
+ module Loader
267
+ attr_reader :source_code
268
+
269
+ def self.included(base)
270
+ base.extend ClassMethods
271
+ end
272
+
273
+ # Add a class method to load class instances from file
274
+ module ClassMethods
275
+ # Load a class with the given name from a directory specified
276
+ # by the LOAD_PATH class constant
277
+ # @return [Object] an instance of the class which included this module
278
+ def load(name)
279
+ path = const_get(:LOAD_PATH)
280
+ filename = File.expand_path "../../../#{path}/#{name}.rb", __FILE__
281
+ source_code = File.read(filename)
282
+
283
+ instance = binding.eval source_code, filename
284
+ instance.instance_variable_set :@source_code, source_code
285
+ instance
286
+ end
287
+ end
288
+ end
289
+ end
290
+
291
+ # Extend Time to allow conversion to DateTime instances
292
+ class Time
293
+ # Convert to a DateTime instance
294
+ # http://stackoverflow.com/a/279785/123695
295
+ # @return [DateTime]
296
+ def to_datetime
297
+ # Convert seconds + microseconds into a fractional number of seconds
298
+ seconds = sec + Rational(usec, 10**6)
299
+
300
+ # Convert a UTC offset measured in minutes to one measured in a
301
+ # fraction of a day.
302
+ offset = Rational(utc_offset, 60 * 60 * 24)
303
+ DateTime.new(year, month, day, hour, min, seconds, offset)
304
+ end
305
+ end
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'model'
4
+ require_relative 'parser'
5
+
6
+ require 'erb'
7
+
8
+ module NoSE
9
+ # A representation of a query workload over a given set of entities
10
+ class Workload
11
+ # The subdirectory workloads are loaded from
12
+ LOAD_PATH = 'workloads'
13
+ include Loader
14
+
15
+ attr_reader :model
16
+ attr_accessor :mix
17
+
18
+ def initialize(model = nil, &block)
19
+ @statement_weights = { default: {} }
20
+ @model = model || Model.new
21
+ @mix = :default
22
+
23
+ # Apply the DSL
24
+ WorkloadDSL.new(self).instance_eval(&block) if block_given?
25
+ end
26
+
27
+ # Compare models and statements
28
+ # @return [Boolean]
29
+ def ==(other)
30
+ other.is_a?(Workload) && @model == other.model &&
31
+ statement_weights == other.statement_weights
32
+ end
33
+ alias eql? ==
34
+
35
+ # Add a new {Entity} or {Statement} to the workload
36
+ # @return [self] the current workload to allow chaining
37
+ def <<(other)
38
+ if other.is_a? Entity
39
+ @model.add_entity other.freeze
40
+ elsif other.is_a? Statement
41
+ add_statement other.freeze
42
+ else
43
+ fail TypeError, 'can only add queries and entities to a workload'
44
+ end
45
+
46
+ self
47
+ end
48
+
49
+ # Add a new {Statement} to the workload or parse a string
50
+ # @return [void]
51
+ def add_statement(statement, mixes = {}, group: nil, label: nil)
52
+ statement = Statement.parse(statement, @model,
53
+ group: group, label: label) \
54
+ if statement.is_a? String
55
+ statement.freeze
56
+
57
+ mixes = { default: mixes } if mixes.is_a? Numeric
58
+ mixes = { default: 1.0 } if mixes.empty?
59
+ mixes.each do |mix, weight|
60
+ @statement_weights[mix] = {} unless @statement_weights.key? mix
61
+ @statement_weights[mix][statement] = weight
62
+ end
63
+ end
64
+
65
+ # Strip the weights from the query dictionary and return a list of queries
66
+ # @return [Array<Statement>]
67
+ def queries
68
+ @statement_weights[@mix].keys.select do |statement|
69
+ statement.is_a? Query
70
+ end
71
+ end
72
+
73
+ # Strip the weights and return a list of statements
74
+ # @return [Array<Statement>]
75
+ def statements
76
+ (@statement_weights[@mix] || {}).keys
77
+ end
78
+
79
+ # Retrieve the weights for the current mix
80
+ # @return [Hash]
81
+ def statement_weights
82
+ @statement_weights[@mix]
83
+ end
84
+
85
+ # Strip the weights from the query dictionary and return a list of updates
86
+ # @return [Array<Statement>]
87
+ def updates
88
+ @statement_weights[@mix].keys.reject do |statement|
89
+ statement.is_a? Query
90
+ end
91
+ end
92
+
93
+ # Find a statement in the workload with the provided tag
94
+ # @return [Statement]
95
+ def find_with_tag(tag)
96
+ statements.find do |s|
97
+ s.text.end_with? "-- #{tag}"
98
+ end
99
+ end
100
+
101
+ # Remove any updates from the workload
102
+ # @return [void]
103
+ def remove_updates
104
+ @statement_weights[@mix].select! { |stmt, _| stmt.is_a? Query }
105
+ end
106
+
107
+ # Get all the support queries for updates in the workload
108
+ # @return[Array<Statement>]
109
+ def support_queries(indexes)
110
+ updates.map do |update|
111
+ indexes.map { |index| update.support_queries(index) }
112
+ end.flatten(2)
113
+ end
114
+
115
+ # Check if all the fields used by queries in the workload exist
116
+ # @return [Boolean]
117
+ def fields_exist?
118
+ @statement_weights[@mix].each_key do |query|
119
+ # Projected fields and fields in the where clause exist
120
+ fields = query.where.map(&:field) + query.fields
121
+ fields.each do |field|
122
+ return false unless @model.find_field field.value
123
+ end
124
+ end
125
+
126
+ true
127
+ end
128
+
129
+ # Produce the source code used to define this workload
130
+ # @return [String]
131
+ def source_code
132
+ return @source_code unless @source_code.nil?
133
+
134
+ ns = OpenStruct.new(workload: self)
135
+ tmpl = File.read File.join(File.dirname(__FILE__),
136
+ '../../templates/workload.erb')
137
+ tmpl = ERB.new(tmpl, nil, '>')
138
+ @source_code = tmpl.result(ns.instance_eval { binding })
139
+ end
140
+ end
141
+
142
+ # A helper class for DSL creation to avoid messing with {Workload}
143
+ class WorkloadDSL
144
+ def initialize(arg)
145
+ if arg.is_a? Workload
146
+ @workload = arg
147
+ @model = arg.model
148
+ elsif arg.is_a? Model
149
+ @model = arg
150
+ end
151
+ end
152
+
153
+ # rubocop:disable MethodName
154
+
155
+ # Allow the use of an external model
156
+ def Model(name)
157
+ @workload.instance_variable_set(:@model, NoSE::Model.load(name))
158
+ end
159
+
160
+ # Shortcut to add a new {Entity} to the workload
161
+ # @return [Entity]
162
+ def Entity(*args, &block)
163
+ @model.add_entity Entity.new(*args, &block)
164
+ end
165
+
166
+ # Add a HasMany relationship which is just the opposite of HasOne
167
+ # @return [void]
168
+ def HasMany(from_name, to_name, entities, **options)
169
+ HasOne to_name, from_name, Hash[[entities.first.reverse]], **options
170
+ end
171
+
172
+ # Separate function for foreign keys to avoid circular dependencies
173
+ # @return [void]
174
+ def HasOne(from_name, to_name, entities, **options)
175
+ from_entity, to_entity = entities.first
176
+ from_field = Fields::ForeignKeyField.new from_name,
177
+ @model[to_entity],
178
+ **options
179
+
180
+ # Add the key in the opposite direction
181
+ options[:count] = @model[from_entity].count
182
+ options[:relationship] = :many
183
+ to_field = Fields::ForeignKeyField.new to_name,
184
+ @model[from_entity],
185
+ **options
186
+
187
+ # Set the opposite keys and add to entities
188
+ to_field.reverse = from_field
189
+ from_field.reverse = to_field
190
+ @model[from_entity] << from_field
191
+ @model[to_entity] << to_field
192
+ end
193
+
194
+ # Shortcut to add a new {Statement} to the workload
195
+ # @return [void]
196
+ def Q(statement, weight = 1.0, group: nil, label: nil, **mixes)
197
+ fail 'Statements require a workload' if @workload.nil?
198
+
199
+ return if weight.zero? && mixes.empty?
200
+ mixes = { default: weight } if mixes.empty?
201
+ @workload.add_statement statement, mixes, group: group, label: label
202
+ end
203
+
204
+ # Allow setting the default workload mix
205
+ # @return [void]
206
+ def DefaultMix(mix)
207
+ @workload.mix = mix
208
+ end
209
+
210
+ # Allow grouping statements with an associated weight
211
+ # @return [void]
212
+ def Group(name, weight = 1.0, **mixes, &block)
213
+ fail 'Groups require a workload' if @workload.nil?
214
+
215
+ # Apply the DSL
216
+ dsl = GroupDSL.new
217
+ dsl.instance_eval(&block) if block_given?
218
+ dsl.statements.each do |statement|
219
+ Q(statement, weight, **mixes, group: name)
220
+ end
221
+ end
222
+
223
+ # rubocop:enable MethodName
224
+ end
225
+
226
+ # A helper class for DSL creation to allow groups of statements
227
+ class GroupDSL
228
+ attr_reader :statements
229
+
230
+ def initialize
231
+ @statements = []
232
+ end
233
+
234
+ # rubocop:disable MethodName
235
+
236
+ # Track a new statement to be added
237
+ # @return [void]
238
+ def Q(statement)
239
+ @statements << statement
240
+ end
241
+
242
+ # rubocop:enable MethodName
243
+ end
244
+ end
data/lib/nose.rb ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Namespace module for the whole project
4
+ module NoSE
5
+ end
6
+
7
+ require_relative 'nose/util'
8
+
9
+ require_relative 'nose/backend'
10
+ require_relative 'nose/cost'
11
+ require_relative 'nose/debug'
12
+ require_relative 'nose/enumerator'
13
+ require_relative 'nose/indexes'
14
+ require_relative 'nose/loader'
15
+ require_relative 'nose/model'
16
+ require_relative 'nose/parser'
17
+ require_relative 'nose/plans'
18
+ require_relative 'nose/proxy'
19
+ require_relative 'nose/query_graph'
20
+ require_relative 'nose/random'
21
+ require_relative 'nose/schema'
22
+ require_relative 'nose/search'
23
+ require_relative 'nose/statements'
24
+ require_relative 'nose/timing'
25
+ require_relative 'nose/workload'
26
+
27
+ require_relative 'nose/serialize'
28
+
29
+ # :nocov:
30
+ require 'logging'
31
+
32
+ logger = Logging.logger['nose']
33
+ logger.level = (ENV['NOSE_LOG'] || 'info').downcase.to_sym
34
+
35
+ logger.add_appenders Logging.appenders.stderr
36
+ logger = nil # rubocop:disable Lint/UselessAssignment
37
+ # :nocov:
@@ -0,0 +1,42 @@
1
+ <% foreign_keys = [] %>
2
+ # rubocop:disable all
3
+
4
+ workload = NoSE::Workload.new do
5
+ # Define entities along with the size and cardinality of their fields
6
+ # as well as an estimated number of each entity
7
+ <% workload.model.entities.each_value do |entity| %>
8
+ (Entity '<%= entity.name %>' do
9
+ <% entity.fields.each_value do |field| %>
10
+ <%
11
+ if field.is_a? NoSE::Fields::ForeignKeyField
12
+ foreign_keys << field
13
+ next
14
+ end
15
+ %>
16
+ <%= field.subtype_name name_case: :camel %> '<%= field.name %>',<%=
17
+ case [field.class]
18
+ when [NoSE::Fields::StringField]
19
+ "#{field.size}, "
20
+ else
21
+ ''
22
+ end
23
+ %> count: <%= field.cardinality %>
24
+
25
+ <% end %>
26
+ end) * <%= entity.count %>
27
+
28
+
29
+ <% end %>
30
+
31
+ <% foreign_keys.each do |key| %>
32
+ ForeignKey '<%= key.name %>', '<%= key.parent.name %>', '<%= key.entity.name %>', count: <%= key.cardinality %>
33
+
34
+ <% end %>
35
+
36
+ # Define queries and their relative weights
37
+ <% workload.statement_weights.each do |statement, weight| %>
38
+ Q '<%= statement.query %>', <%= weight %>
39
+
40
+ <% end %>
41
+ end
42
+ # rubocop:enable all