nose 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,293 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+ require 'faker'
5
+ require 'forwardable'
6
+ require 'zlib'
7
+
8
+ module NoSE
9
+ # Fields attached to each entity in the entity graph model
10
+ module Fields
11
+ # A single field on an {Entity}
12
+ class Field
13
+ include Supertype
14
+
15
+ attr_reader :name, :size, :parent
16
+ attr_accessor :primary_key
17
+ alias primary_key? primary_key
18
+
19
+ # The Ruby type of values stored in this field
20
+ TYPE = nil
21
+
22
+ def initialize(name, size, count: nil)
23
+ @name = name
24
+ @size = size
25
+ @cardinality = count
26
+ @primary_key = false
27
+ end
28
+
29
+ # Compare by parent entity and name
30
+ def ==(other)
31
+ other.is_a?(Field) && @parent == other.parent &&
32
+ @name == other.name
33
+ end
34
+ alias eql? ==
35
+
36
+ # Hash by entity and name
37
+ # @return [Fixnum]
38
+ def hash
39
+ @hash ||= id.hash
40
+ end
41
+
42
+ # :nocov:
43
+ def to_color
44
+ "[blue]#{@parent.name}[/].[blue]#{@name}[/]"
45
+ end
46
+ # :nocov:
47
+
48
+ # :nocov:
49
+ def to_s
50
+ "#{@parent.name}.#{@name}"
51
+ end
52
+ # :nocov:
53
+
54
+ # A simple string representing the field
55
+ def id
56
+ @id ||= "#{@parent.name}_#{@name}"
57
+ end
58
+
59
+ # Set the estimated cardinality of the field
60
+ # @return [Field]
61
+ def *(other)
62
+ @cardinality = other
63
+ self
64
+ end
65
+
66
+ # Return the previously set cardinality, falling back to the number of
67
+ # entities for the field if set, or just 1
68
+ def cardinality
69
+ @cardinality || @parent.count || 1
70
+ end
71
+
72
+ # @abstract Subclasses should produce a typed value from a string
73
+ # :nocov:
74
+ def self.value_from_string(_string)
75
+ fail NotImplementedError
76
+ end
77
+ # :nocov:
78
+
79
+ # @abstract Subclasses should produce a random value of the correct type
80
+ # :nocov:
81
+ def random_value
82
+ fail NotImplementedError
83
+ end
84
+ # :nocov:
85
+
86
+ # Populate a helper DSL object with all subclasses of Field
87
+ def self.inherited(child_class)
88
+ # We use separate methods for foreign keys
89
+ begin
90
+ fk_class = Fields.const_get('ForeignKeyField')
91
+ rescue NameError
92
+ fk_class = nil
93
+ end
94
+ return if !fk_class.nil? && child_class <= fk_class
95
+
96
+ add_field_method(child_class)
97
+ child_class.send(:include, Subtype)
98
+ end
99
+ private_class_method :inherited
100
+
101
+ # Add convenience methods for all field types for an entity DSL
102
+ def self.add_field_method(child_class)
103
+ method_regex = /^NoSE::Fields::(.*?)(Field)?$/
104
+ method_name = child_class.name.sub(method_regex, '\1')
105
+ EntityDSL.send :define_method, method_name,
106
+ (proc do |*args|
107
+ send(:instance_variable_get, :@entity).send \
108
+ :<<, child_class.new(*args)
109
+ end)
110
+ end
111
+ private_class_method :add_field_method
112
+ end
113
+
114
+ # Field holding an integer
115
+ class IntegerField < Field
116
+ # Integers are stored as integers
117
+ TYPE = Integer
118
+
119
+ def initialize(name, **options)
120
+ super(name, 8, **options)
121
+ @cardinality = 10
122
+ end
123
+
124
+ # Parse an Integer from the provided parameter
125
+ # @return [Fixnum]
126
+ def self.value_from_string(string)
127
+ string.to_i
128
+ end
129
+
130
+ # Random numbers up to the given size
131
+ # @return [Fixnum]
132
+ def random_value
133
+ rand(@cardinality)
134
+ end
135
+ end
136
+
137
+ # Field holding a boolean value
138
+ class BooleanField < Field
139
+ # Since Ruby has no boolean type, we use Object
140
+ # but all values will be either false or true
141
+ TYPE = Object
142
+
143
+ def initialize(name, **options)
144
+ super(name, 1, **options)
145
+ @cardinality = 2
146
+ end
147
+
148
+ # Check for strings true or false otherwise assume integer
149
+ # @return [Boolean]
150
+ def self.value_from_string(string)
151
+ string = string.downcase
152
+ if string[0] == 't'
153
+ return true
154
+ elsif string[0] == 'f'
155
+ return false
156
+ else
157
+ [false, true][string.to_i]
158
+ end
159
+ end
160
+
161
+ # Randomly true or false
162
+ # @return [Boolean]
163
+ def random_value
164
+ [false, true][rand(2)]
165
+ end
166
+ end
167
+
168
+ # Field holding a float
169
+ class FloatField < Field
170
+ # Any Fixnum is a valid float
171
+ TYPE = Fixnum
172
+
173
+ def initialize(name, **options)
174
+ super(name, 8, **options)
175
+ end
176
+
177
+ # Parse a Float from the provided parameter
178
+ def self.value_from_string(string)
179
+ string.to_f
180
+ end
181
+
182
+ # Random numbers up to the given size
183
+ def random_value
184
+ rand(@cardinality).to_f
185
+ end
186
+ end
187
+
188
+ # Field holding a string of some average length
189
+ class StringField < Field
190
+ # Strings are stored as strings
191
+ TYPE = String
192
+
193
+ def initialize(name, length = 10, **options)
194
+ super(name, length, **options)
195
+ end
196
+
197
+ # Return the String parameter as-is
198
+ # @return [String]
199
+ def self.value_from_string(string)
200
+ string
201
+ end
202
+
203
+ # A random string of the correct length
204
+ # @return [String]
205
+ def random_value
206
+ Faker::Lorem.characters(@size)
207
+ end
208
+ end
209
+
210
+ # Field holding a date
211
+ class DateField < Field
212
+ # Time is used to store timestamps
213
+ TYPE = Time
214
+
215
+ def initialize(name, **options)
216
+ super(name, 8, **options)
217
+ end
218
+
219
+ # Parse a DateTime from the provided parameter
220
+ # @return [Time]
221
+ def self.value_from_string(string)
222
+ # rubocop:disable Style/RedundantBegin
223
+ begin
224
+ DateTime.parse(string).to_time
225
+ rescue ArgumentError
226
+ raise TypeError
227
+ end
228
+ # rubocop:enable Style/RedundantBegin
229
+ end
230
+
231
+ # A random date within 2 years surrounding today
232
+ # @return [Time]
233
+ def random_value
234
+ prev_year = DateTime.now.prev_year
235
+ prev_year = prev_year.new_offset(Rational(0, 24))
236
+
237
+ next_year = DateTime.now.next_year
238
+ next_year = next_year.new_offset(Rational(0, 24))
239
+
240
+ Faker::Time.between prev_year, next_year
241
+ end
242
+ end
243
+
244
+ # Field representing a hash of multiple values
245
+ class HashField < Field
246
+ def initialize(name, size = 1, **options)
247
+ super(name, size, **options)
248
+ end
249
+ end
250
+
251
+ # Field holding a unique identifier
252
+ class IDField < Field
253
+ alias entity parent
254
+
255
+ def initialize(name, **options)
256
+ super(name, 16, **options)
257
+ @primary_key = true
258
+ end
259
+
260
+ # Return the String parameter as-is
261
+ # @return [String]
262
+ def self.value_from_string(string)
263
+ string
264
+ end
265
+
266
+ # nil value which is interpreted by the backend as requesting a new ID
267
+ # @return [nil]
268
+ def random_value
269
+ nil
270
+ end
271
+ end
272
+
273
+ # Field holding a foreign key to another entity
274
+ class ForeignKeyField < IDField
275
+ attr_reader :entity, :relationship
276
+ attr_accessor :reverse
277
+
278
+ def initialize(name, entity, **options)
279
+ @relationship = options.delete(:relationship) || :one
280
+ super(name, **options)
281
+ @primary_key = false
282
+ @entity = entity
283
+ end
284
+
285
+ # The number of entities associated with the foreign key,
286
+ # or a manually set cardinality
287
+ # @return [Fixnum]
288
+ def cardinality
289
+ @entity.count || super
290
+ end
291
+ end
292
+ end
293
+ end
data/lib/nose/model.rb ADDED
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'model/entity'
4
+ require_relative 'model/fields'
5
+
6
+ require 'graphviz'
7
+
8
+ module NoSE
9
+ # A conceptual data model of a set of entities
10
+ class Model
11
+ # The subdirectory models are loaded from
12
+ LOAD_PATH = 'models'
13
+ include Loader
14
+
15
+ attr_reader :entities
16
+
17
+ def initialize(&block)
18
+ @entities = {}
19
+
20
+ # Apply the DSL
21
+ WorkloadDSL.new(self).instance_eval(&block) if block_given?
22
+ end
23
+
24
+ # Compare all entities
25
+ # @return [Boolean]
26
+ def ==(other)
27
+ other.is_a?(Model) && @entities = other.entities
28
+ end
29
+ alias eql? ==
30
+
31
+ # Retrieve an entity by name
32
+ # @return [Entity]
33
+ def [](name)
34
+ return @entities[name] if @entities.key? name
35
+ fail EntityNotFound
36
+ end
37
+
38
+ # Add an {Entity} to the workload
39
+ # @return [Entity]
40
+ def add_entity(entity)
41
+ fail InvalidEntity, 'no primary key defined' if entity.id_field.nil?
42
+ @entities[entity.name] = entity
43
+ end
44
+
45
+ # Find a field given an +Enumerable+ of identifiers
46
+ # @return [Field]
47
+ def find_field(field)
48
+ if field.count > 2
49
+ find_field_chain field
50
+ else
51
+ find_entity_field(*field)
52
+ end
53
+ end
54
+
55
+ # Output a PNG representation of entities in the model
56
+ def output(format, filename, include_fields = false)
57
+ graph = GraphViz.new :G, type: :digraph
58
+ nodes = add_graph_nodes graph, include_fields
59
+ add_graph_edges graph, nodes
60
+
61
+ graph.output(**{ format => filename })
62
+ end
63
+
64
+ private
65
+
66
+ # Add the nodes (entities) to a GraphViz object
67
+ def add_graph_nodes(graph, include_fields)
68
+ Hash[@entities.each_value.map do |entity|
69
+ label = "#{entity.name}\n"
70
+ if include_fields
71
+ label += entity.fields.each_value.map do |field|
72
+ type = field.class.name.sub(/^NoSE::(.*?)(Field)?$/, '\1')
73
+ "#{field.name}: #{type}"
74
+ end.join("\n")
75
+ end
76
+
77
+ [entity.name, graph.add_nodes(label)]
78
+ end]
79
+ end
80
+
81
+ # Add the edges (foreign keys) to a GraphViz object
82
+ def add_graph_edges(graph, nodes)
83
+ @entities.each_value do |entity|
84
+ entity.foreign_keys.each_value do |key|
85
+ graph.add_edges nodes[entity.name], nodes[key.entity.name]
86
+ end
87
+ end
88
+ end
89
+
90
+ # Find a field in an entity where the entity may be a string or an object
91
+ def find_field_chain(field)
92
+ # Do a foreign key lookup
93
+ field = field.dup
94
+ key_field = @entities[field[0]][field[1]]
95
+ field[0..1] = key_field ? key_field.entity.name : field[1]
96
+ find_field field
97
+ end
98
+
99
+ # Find a field in an entity where the entity may be a string or an object
100
+ def find_entity_field(entity, field)
101
+ entity = entities[entity] if entity.is_a?(String)
102
+ entity[field]
103
+ end
104
+ end
105
+
106
+ # Raised when looking up an entity in the workload which does not exist
107
+ class EntityNotFound < StandardError
108
+ end
109
+
110
+ # Raised when attempting to add an invalid entity to a workload
111
+ class InvalidEntity < StandardError
112
+ end
113
+ end
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'parslet'
4
+
5
+ # rubocop:disable Style/ClassAndModuleChildren
6
+
7
+ # Parslet DSL extension for capturing the input source
8
+ class CaptureSource < Parslet::Atoms::Capture
9
+ # Ugly hack to capture the source string that was parsed
10
+ def apply(source, context, consume_all)
11
+ before = source.instance_variable_get(:@str).rest
12
+ success, value = result = super(source, context, consume_all)
13
+ if success
14
+ # Save the portion of the source string
15
+ after = source.instance_variable_get(:@str).rest
16
+ source_str = before[0..(before.length - after.length - 1)]
17
+ value[(name.to_s + '_source').to_sym] = source_str
18
+ end
19
+
20
+ result
21
+ end
22
+ end
23
+
24
+ # Modify named captures to allow arrays
25
+ class Parslet::Atoms::Named < Parslet::Atoms::Base
26
+ def initialize(parslet, name, array = false)
27
+ super()
28
+ @parslet = parslet
29
+ @name = name
30
+ @array = array
31
+ end
32
+
33
+ private
34
+
35
+ # Optionally wrap the produced single value in an array
36
+ def produce_return_value(val)
37
+ flatval = flatten(val, true)
38
+ flatval = [flatval] if @array && val.last == [:repetition]
39
+ { name => flatval }
40
+ end
41
+ end
42
+
43
+ # Extend the DSL to with some additional ways to capture the output
44
+ module Parslet::Atoms::DSL
45
+ # Like #as, but ensures that the result is always an array
46
+ # @return [Array<Parslet::Atoms::Named>]
47
+ def as_array(name)
48
+ Parslet::Atoms::Named.new(self, name, true)
49
+ end
50
+
51
+ # Capture some output along with the source string
52
+ # @return [CaptureSource]
53
+ def capture_source(name)
54
+ CaptureSource.new(self, name)
55
+ end
56
+ end
57
+
58
+ # rubocop:enable Style/ClassAndModuleChildren
59
+
60
+ module NoSE
61
+ # rubocop:disable Style/BlockEndNewline, Style/BlockDelimiters
62
+ # rubocop:disable Style/MultilineOperationIndentation
63
+
64
+ # Literals used in queries and updates
65
+ module Literals
66
+ include Parslet
67
+
68
+ rule(:integer) { match('[0-9]').repeat(1).as(:int) }
69
+ rule(:quote) { str('"') }
70
+ rule(:nonquote) { quote.absent? >> any }
71
+ rule(:string) { quote >> nonquote.repeat(1).as(:str) >> quote }
72
+ rule(:literal) { integer | string | str('?').as(:unknown) }
73
+ end
74
+
75
+ # Predicates used in queries and updates
76
+ module Predicates
77
+ include Parslet
78
+
79
+ rule(:operator) {
80
+ str('=') | str('!=') | str('<=') | str('>=') | str('<') | str('>') }
81
+ rule(:condition) {
82
+ field.as(:field) >> space? >> operator.as(:op) >> space? >>
83
+ literal.as(:value) }
84
+ rule(:expression) {
85
+ condition >> (space >> str('AND') >> space >> expression).repeat }
86
+ rule(:where) {
87
+ space >> str('WHERE') >> space >> expression.as_array(:expression) }
88
+ end
89
+
90
+ # Identifiers and combinations of them used in queries and updates
91
+ module Identifiers
92
+ include Parslet
93
+
94
+ rule(:identifier) { match('[A-z]').repeat(1).as(:identifier) }
95
+ rule(:field) { identifier >> (str('.') >> identifier).repeat(1) }
96
+ rule(:fields) { field >> (comma >> field).repeat }
97
+ rule(:select_field) {
98
+ field.as_array(:field) | (identifier >> str('.') >>
99
+ str('*').repeat(1, 2).as(:identifier2)) }
100
+ rule(:select_fields) { select_field >> (comma >> select_field).repeat }
101
+ rule(:path) { identifier >> (str('.') >> identifier).repeat }
102
+ end
103
+
104
+ # Field settings for update and insert statements
105
+ module UpdateSettings
106
+ include Parslet
107
+
108
+ rule(:setting) {
109
+ (identifier | str('**')).as(:field) >> space? >> str('=') >> space? >>
110
+ literal.as(:value)
111
+ }
112
+ rule(:settings) {
113
+ setting >> (space? >> str(',') >> space? >> setting).repeat
114
+ }
115
+ end
116
+
117
+ # Parser for a simple CQL-like grammar
118
+ class CQLP < Parslet::Parser
119
+ include Literals
120
+ include Identifiers
121
+ include Predicates
122
+ include UpdateSettings
123
+
124
+ rule(:space) { match('\s').repeat(1) }
125
+ rule(:space?) { space.maybe }
126
+ rule(:comma) { str(',') >> space? }
127
+
128
+ rule(:limit) { space >> str('LIMIT') >> space >> integer.as(:limit) }
129
+ rule(:order) {
130
+ space >> str('ORDER BY') >> space >> fields.as_array(:fields) }
131
+
132
+ rule(:comment) { str(' -- ') >> match('.').repeat }
133
+
134
+ rule(:query) {
135
+ str('SELECT') >> space >> select_fields.as_array(:select) >>
136
+ space >> str('FROM') >> space >> path.as_array(:path) >>
137
+ where.maybe.as(:where) >> order.maybe.as(:order) >>
138
+ limit.maybe.capture(:limit) >> comment.maybe.as(:comment) }
139
+
140
+ rule(:update) {
141
+ str('UPDATE') >> space >> identifier.as(:entity) >> space >>
142
+ (str('FROM') >> space >> path.as_array(:path) >> space).maybe >>
143
+ str('SET') >> space >> settings.as_array(:settings) >>
144
+ where.maybe.as(:where).capture_source(:where) >>
145
+ comment.maybe.as(:comment)
146
+ }
147
+
148
+ rule(:connect_item) {
149
+ identifier.as(:target) >> space? >> str('(') >> space? >>
150
+ literal.as(:target_pk) >> space? >> str(')')
151
+ }
152
+
153
+ rule(:connect_list) {
154
+ connect_item >> (space? >> str(',') >> space? >> connect_item).repeat
155
+ }
156
+
157
+ rule(:insert) {
158
+ str('INSERT INTO') >> space >> identifier.as(:entity) >> space >>
159
+ str('SET') >> space >> settings.as_array(:settings) >>
160
+ (space >> str('AND') >> space >> str('CONNECT') >> space >>
161
+ str('TO') >> space >> connect_list.as_array(:connections)).maybe >>
162
+ comment.maybe.as(:comment)
163
+ }
164
+
165
+ rule(:delete) {
166
+ str('DELETE') >> space >> identifier.as(:entity) >>
167
+ (space >> str('FROM') >> space >> path.as_array(:path)).maybe >>
168
+ where.maybe.as(:where).capture_source(:where) >>
169
+ comment.maybe.as(:comment)
170
+ }
171
+
172
+ rule(:connect) {
173
+ (str('CONNECT') | str('DISCONNECT')).capture(:type) >> space >>
174
+ identifier.as(:entity) >> space? >> str('(') >> space? >>
175
+ literal.as(:source_pk) >> space? >> str(')') >> space >>
176
+ dynamic do |_, context|
177
+ context.captures[:type] == 'CONNECT' ? str('TO') : str('FROM')
178
+ end >> space >> connect_item
179
+ }
180
+
181
+ rule(:statement) {
182
+ query | update | insert | delete | connect
183
+ }
184
+
185
+ root :statement
186
+ end
187
+
188
+ # Simple transformations to clean up the CQL parse tree
189
+ class CQLT < Parslet::Transform
190
+ rule(identifier: simple(:identifier)) { identifier }
191
+ rule(identifier: simple(:identifier), identifier2: simple(:identifier2)) {
192
+ [identifier.to_s, identifier2.to_s] }
193
+ rule(field: sequence(:id)) { id.map(&:to_s) }
194
+ rule(path: sequence(:id)) { id.map(&:to_s) }
195
+ rule(str: simple(:string)) { string.to_s }
196
+ rule(statement: subtree(:stmt)) { stmt.first.last }
197
+ rule(int: simple(:integer)) { integer }
198
+ rule(unknown: simple(:val)) { nil }
199
+ end
200
+
201
+ # rubocop:enable all
202
+ end