nose 0.1.0pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,293 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+ require 'faker'
5
+ require 'forwardable'
6
+ require 'zlib'
7
+
8
+ module NoSE
9
+ # Fields attached to each entity in the entity graph model
10
+ module Fields
11
+ # A single field on an {Entity}
12
+ class Field
13
+ include Supertype
14
+
15
+ attr_reader :name, :size, :parent
16
+ attr_accessor :primary_key
17
+ alias primary_key? primary_key
18
+
19
+ # The Ruby type of values stored in this field
20
+ TYPE = nil
21
+
22
+ def initialize(name, size, count: nil)
23
+ @name = name
24
+ @size = size
25
+ @cardinality = count
26
+ @primary_key = false
27
+ end
28
+
29
+ # Compare by parent entity and name
30
+ def ==(other)
31
+ other.is_a?(Field) && @parent == other.parent &&
32
+ @name == other.name
33
+ end
34
+ alias eql? ==
35
+
36
+ # Hash by entity and name
37
+ # @return [Fixnum]
38
+ def hash
39
+ @hash ||= id.hash
40
+ end
41
+
42
+ # :nocov:
43
+ def to_color
44
+ "[blue]#{@parent.name}[/].[blue]#{@name}[/]"
45
+ end
46
+ # :nocov:
47
+
48
+ # :nocov:
49
+ def to_s
50
+ "#{@parent.name}.#{@name}"
51
+ end
52
+ # :nocov:
53
+
54
+ # A simple string representing the field
55
+ def id
56
+ @id ||= "#{@parent.name}_#{@name}"
57
+ end
58
+
59
+ # Set the estimated cardinality of the field
60
+ # @return [Field]
61
+ def *(other)
62
+ @cardinality = other
63
+ self
64
+ end
65
+
66
+ # Return the previously set cardinality, falling back to the number of
67
+ # entities for the field if set, or just 1
68
+ def cardinality
69
+ @cardinality || @parent.count || 1
70
+ end
71
+
72
+ # @abstract Subclasses should produce a typed value from a string
73
+ # :nocov:
74
+ def self.value_from_string(_string)
75
+ fail NotImplementedError
76
+ end
77
+ # :nocov:
78
+
79
+ # @abstract Subclasses should produce a random value of the correct type
80
+ # :nocov:
81
+ def random_value
82
+ fail NotImplementedError
83
+ end
84
+ # :nocov:
85
+
86
+ # Populate a helper DSL object with all subclasses of Field
87
+ def self.inherited(child_class)
88
+ # We use separate methods for foreign keys
89
+ begin
90
+ fk_class = Fields.const_get('ForeignKeyField')
91
+ rescue NameError
92
+ fk_class = nil
93
+ end
94
+ return if !fk_class.nil? && child_class <= fk_class
95
+
96
+ add_field_method(child_class)
97
+ child_class.send(:include, Subtype)
98
+ end
99
+ private_class_method :inherited
100
+
101
+ # Add convenience methods for all field types for an entity DSL
102
+ def self.add_field_method(child_class)
103
+ method_regex = /^NoSE::Fields::(.*?)(Field)?$/
104
+ method_name = child_class.name.sub(method_regex, '\1')
105
+ EntityDSL.send :define_method, method_name,
106
+ (proc do |*args|
107
+ send(:instance_variable_get, :@entity).send \
108
+ :<<, child_class.new(*args)
109
+ end)
110
+ end
111
+ private_class_method :add_field_method
112
+ end
113
+
114
+ # Field holding an integer
115
+ class IntegerField < Field
116
+ # Integers are stored as integers
117
+ TYPE = Integer
118
+
119
+ def initialize(name, **options)
120
+ super(name, 8, **options)
121
+ @cardinality = 10
122
+ end
123
+
124
+ # Parse an Integer from the provided parameter
125
+ # @return [Fixnum]
126
+ def self.value_from_string(string)
127
+ string.to_i
128
+ end
129
+
130
+ # Random numbers up to the given size
131
+ # @return [Fixnum]
132
+ def random_value
133
+ rand(@cardinality)
134
+ end
135
+ end
136
+
137
+ # Field holding a boolean value
138
+ class BooleanField < Field
139
+ # Since Ruby has no boolean type, we use Object
140
+ # but all values will be either false or true
141
+ TYPE = Object
142
+
143
+ def initialize(name, **options)
144
+ super(name, 1, **options)
145
+ @cardinality = 2
146
+ end
147
+
148
+ # Check for strings true or false otherwise assume integer
149
+ # @return [Boolean]
150
+ def self.value_from_string(string)
151
+ string = string.downcase
152
+ if string[0] == 't'
153
+ return true
154
+ elsif string[0] == 'f'
155
+ return false
156
+ else
157
+ [false, true][string.to_i]
158
+ end
159
+ end
160
+
161
+ # Randomly true or false
162
+ # @return [Boolean]
163
+ def random_value
164
+ [false, true][rand(2)]
165
+ end
166
+ end
167
+
168
+ # Field holding a float
169
+ class FloatField < Field
170
+ # Any Fixnum is a valid float
171
+ TYPE = Fixnum
172
+
173
+ def initialize(name, **options)
174
+ super(name, 8, **options)
175
+ end
176
+
177
+ # Parse a Float from the provided parameter
178
+ def self.value_from_string(string)
179
+ string.to_f
180
+ end
181
+
182
+ # Random numbers up to the given size
183
+ def random_value
184
+ rand(@cardinality).to_f
185
+ end
186
+ end
187
+
188
+ # Field holding a string of some average length
189
+ class StringField < Field
190
+ # Strings are stored as strings
191
+ TYPE = String
192
+
193
+ def initialize(name, length = 10, **options)
194
+ super(name, length, **options)
195
+ end
196
+
197
+ # Return the String parameter as-is
198
+ # @return [String]
199
+ def self.value_from_string(string)
200
+ string
201
+ end
202
+
203
+ # A random string of the correct length
204
+ # @return [String]
205
+ def random_value
206
+ Faker::Lorem.characters(@size)
207
+ end
208
+ end
209
+
210
+ # Field holding a date
211
+ class DateField < Field
212
+ # Time is used to store timestamps
213
+ TYPE = Time
214
+
215
+ def initialize(name, **options)
216
+ super(name, 8, **options)
217
+ end
218
+
219
+ # Parse a DateTime from the provided parameter
220
+ # @return [Time]
221
+ def self.value_from_string(string)
222
+ # rubocop:disable Style/RedundantBegin
223
+ begin
224
+ DateTime.parse(string).to_time
225
+ rescue ArgumentError
226
+ raise TypeError
227
+ end
228
+ # rubocop:enable Style/RedundantBegin
229
+ end
230
+
231
+ # A random date within 2 years surrounding today
232
+ # @return [Time]
233
+ def random_value
234
+ prev_year = DateTime.now.prev_year
235
+ prev_year = prev_year.new_offset(Rational(0, 24))
236
+
237
+ next_year = DateTime.now.next_year
238
+ next_year = next_year.new_offset(Rational(0, 24))
239
+
240
+ Faker::Time.between prev_year, next_year
241
+ end
242
+ end
243
+
244
+ # Field representing a hash of multiple values
245
+ class HashField < Field
246
+ def initialize(name, size = 1, **options)
247
+ super(name, size, **options)
248
+ end
249
+ end
250
+
251
+ # Field holding a unique identifier
252
+ class IDField < Field
253
+ alias entity parent
254
+
255
+ def initialize(name, **options)
256
+ super(name, 16, **options)
257
+ @primary_key = true
258
+ end
259
+
260
+ # Return the String parameter as-is
261
+ # @return [String]
262
+ def self.value_from_string(string)
263
+ string
264
+ end
265
+
266
+ # nil value which is interpreted by the backend as requesting a new ID
267
+ # @return [nil]
268
+ def random_value
269
+ nil
270
+ end
271
+ end
272
+
273
+ # Field holding a foreign key to another entity
274
+ class ForeignKeyField < IDField
275
+ attr_reader :entity, :relationship
276
+ attr_accessor :reverse
277
+
278
+ def initialize(name, entity, **options)
279
+ @relationship = options.delete(:relationship) || :one
280
+ super(name, **options)
281
+ @primary_key = false
282
+ @entity = entity
283
+ end
284
+
285
+ # The number of entities associated with the foreign key,
286
+ # or a manually set cardinality
287
+ # @return [Fixnum]
288
+ def cardinality
289
+ @entity.count || super
290
+ end
291
+ end
292
+ end
293
+ end
data/lib/nose/model.rb ADDED
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'model/entity'
4
+ require_relative 'model/fields'
5
+
6
+ require 'graphviz'
7
+
8
+ module NoSE
9
+ # A conceptual data model of a set of entities
10
+ class Model
11
+ # The subdirectory models are loaded from
12
+ LOAD_PATH = 'models'
13
+ include Loader
14
+
15
+ attr_reader :entities
16
+
17
+ def initialize(&block)
18
+ @entities = {}
19
+
20
+ # Apply the DSL
21
+ WorkloadDSL.new(self).instance_eval(&block) if block_given?
22
+ end
23
+
24
+ # Compare all entities
25
+ # @return [Boolean]
26
+ def ==(other)
27
+ other.is_a?(Model) && @entities = other.entities
28
+ end
29
+ alias eql? ==
30
+
31
+ # Retrieve an entity by name
32
+ # @return [Entity]
33
+ def [](name)
34
+ return @entities[name] if @entities.key? name
35
+ fail EntityNotFound
36
+ end
37
+
38
+ # Add an {Entity} to the workload
39
+ # @return [Entity]
40
+ def add_entity(entity)
41
+ fail InvalidEntity, 'no primary key defined' if entity.id_field.nil?
42
+ @entities[entity.name] = entity
43
+ end
44
+
45
+ # Find a field given an +Enumerable+ of identifiers
46
+ # @return [Field]
47
+ def find_field(field)
48
+ if field.count > 2
49
+ find_field_chain field
50
+ else
51
+ find_entity_field(*field)
52
+ end
53
+ end
54
+
55
+ # Output a PNG representation of entities in the model
56
+ def output(format, filename, include_fields = false)
57
+ graph = GraphViz.new :G, type: :digraph
58
+ nodes = add_graph_nodes graph, include_fields
59
+ add_graph_edges graph, nodes
60
+
61
+ graph.output(**{ format => filename })
62
+ end
63
+
64
+ private
65
+
66
+ # Add the nodes (entities) to a GraphViz object
67
+ def add_graph_nodes(graph, include_fields)
68
+ Hash[@entities.each_value.map do |entity|
69
+ label = "#{entity.name}\n"
70
+ if include_fields
71
+ label += entity.fields.each_value.map do |field|
72
+ type = field.class.name.sub(/^NoSE::(.*?)(Field)?$/, '\1')
73
+ "#{field.name}: #{type}"
74
+ end.join("\n")
75
+ end
76
+
77
+ [entity.name, graph.add_nodes(label)]
78
+ end]
79
+ end
80
+
81
+ # Add the edges (foreign keys) to a GraphViz object
82
+ def add_graph_edges(graph, nodes)
83
+ @entities.each_value do |entity|
84
+ entity.foreign_keys.each_value do |key|
85
+ graph.add_edges nodes[entity.name], nodes[key.entity.name]
86
+ end
87
+ end
88
+ end
89
+
90
+ # Find a field in an entity where the entity may be a string or an object
91
+ def find_field_chain(field)
92
+ # Do a foreign key lookup
93
+ field = field.dup
94
+ key_field = @entities[field[0]][field[1]]
95
+ field[0..1] = key_field ? key_field.entity.name : field[1]
96
+ find_field field
97
+ end
98
+
99
+ # Find a field in an entity where the entity may be a string or an object
100
+ def find_entity_field(entity, field)
101
+ entity = entities[entity] if entity.is_a?(String)
102
+ entity[field]
103
+ end
104
+ end
105
+
106
+ # Raised when looking up an entity in the workload which does not exist
107
+ class EntityNotFound < StandardError
108
+ end
109
+
110
+ # Raised when attempting to add an invalid entity to a workload
111
+ class InvalidEntity < StandardError
112
+ end
113
+ end
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'parslet'
4
+
5
+ # rubocop:disable Style/ClassAndModuleChildren
6
+
7
+ # Parslet DSL extension for capturing the input source
8
+ class CaptureSource < Parslet::Atoms::Capture
9
+ # Ugly hack to capture the source string that was parsed
10
+ def apply(source, context, consume_all)
11
+ before = source.instance_variable_get(:@str).rest
12
+ success, value = result = super(source, context, consume_all)
13
+ if success
14
+ # Save the portion of the source string
15
+ after = source.instance_variable_get(:@str).rest
16
+ source_str = before[0..(before.length - after.length - 1)]
17
+ value[(name.to_s + '_source').to_sym] = source_str
18
+ end
19
+
20
+ result
21
+ end
22
+ end
23
+
24
+ # Modify named captures to allow arrays
25
+ class Parslet::Atoms::Named < Parslet::Atoms::Base
26
+ def initialize(parslet, name, array = false)
27
+ super()
28
+ @parslet = parslet
29
+ @name = name
30
+ @array = array
31
+ end
32
+
33
+ private
34
+
35
+ # Optionally wrap the produced single value in an array
36
+ def produce_return_value(val)
37
+ flatval = flatten(val, true)
38
+ flatval = [flatval] if @array && val.last == [:repetition]
39
+ { name => flatval }
40
+ end
41
+ end
42
+
43
+ # Extend the DSL to with some additional ways to capture the output
44
+ module Parslet::Atoms::DSL
45
+ # Like #as, but ensures that the result is always an array
46
+ # @return [Array<Parslet::Atoms::Named>]
47
+ def as_array(name)
48
+ Parslet::Atoms::Named.new(self, name, true)
49
+ end
50
+
51
+ # Capture some output along with the source string
52
+ # @return [CaptureSource]
53
+ def capture_source(name)
54
+ CaptureSource.new(self, name)
55
+ end
56
+ end
57
+
58
+ # rubocop:enable Style/ClassAndModuleChildren
59
+
60
+ module NoSE
61
+ # rubocop:disable Style/BlockEndNewline, Style/BlockDelimiters
62
+ # rubocop:disable Style/MultilineOperationIndentation
63
+
64
+ # Literals used in queries and updates
65
+ module Literals
66
+ include Parslet
67
+
68
+ rule(:integer) { match('[0-9]').repeat(1).as(:int) }
69
+ rule(:quote) { str('"') }
70
+ rule(:nonquote) { quote.absent? >> any }
71
+ rule(:string) { quote >> nonquote.repeat(1).as(:str) >> quote }
72
+ rule(:literal) { integer | string | str('?').as(:unknown) }
73
+ end
74
+
75
+ # Predicates used in queries and updates
76
+ module Predicates
77
+ include Parslet
78
+
79
+ rule(:operator) {
80
+ str('=') | str('!=') | str('<=') | str('>=') | str('<') | str('>') }
81
+ rule(:condition) {
82
+ field.as(:field) >> space? >> operator.as(:op) >> space? >>
83
+ literal.as(:value) }
84
+ rule(:expression) {
85
+ condition >> (space >> str('AND') >> space >> expression).repeat }
86
+ rule(:where) {
87
+ space >> str('WHERE') >> space >> expression.as_array(:expression) }
88
+ end
89
+
90
+ # Identifiers and combinations of them used in queries and updates
91
+ module Identifiers
92
+ include Parslet
93
+
94
+ rule(:identifier) { match('[A-z]').repeat(1).as(:identifier) }
95
+ rule(:field) { identifier >> (str('.') >> identifier).repeat(1) }
96
+ rule(:fields) { field >> (comma >> field).repeat }
97
+ rule(:select_field) {
98
+ field.as_array(:field) | (identifier >> str('.') >>
99
+ str('*').repeat(1, 2).as(:identifier2)) }
100
+ rule(:select_fields) { select_field >> (comma >> select_field).repeat }
101
+ rule(:path) { identifier >> (str('.') >> identifier).repeat }
102
+ end
103
+
104
+ # Field settings for update and insert statements
105
+ module UpdateSettings
106
+ include Parslet
107
+
108
+ rule(:setting) {
109
+ (identifier | str('**')).as(:field) >> space? >> str('=') >> space? >>
110
+ literal.as(:value)
111
+ }
112
+ rule(:settings) {
113
+ setting >> (space? >> str(',') >> space? >> setting).repeat
114
+ }
115
+ end
116
+
117
+ # Parser for a simple CQL-like grammar
118
+ class CQLP < Parslet::Parser
119
+ include Literals
120
+ include Identifiers
121
+ include Predicates
122
+ include UpdateSettings
123
+
124
+ rule(:space) { match('\s').repeat(1) }
125
+ rule(:space?) { space.maybe }
126
+ rule(:comma) { str(',') >> space? }
127
+
128
+ rule(:limit) { space >> str('LIMIT') >> space >> integer.as(:limit) }
129
+ rule(:order) {
130
+ space >> str('ORDER BY') >> space >> fields.as_array(:fields) }
131
+
132
+ rule(:comment) { str(' -- ') >> match('.').repeat }
133
+
134
+ rule(:query) {
135
+ str('SELECT') >> space >> select_fields.as_array(:select) >>
136
+ space >> str('FROM') >> space >> path.as_array(:path) >>
137
+ where.maybe.as(:where) >> order.maybe.as(:order) >>
138
+ limit.maybe.capture(:limit) >> comment.maybe.as(:comment) }
139
+
140
+ rule(:update) {
141
+ str('UPDATE') >> space >> identifier.as(:entity) >> space >>
142
+ (str('FROM') >> space >> path.as_array(:path) >> space).maybe >>
143
+ str('SET') >> space >> settings.as_array(:settings) >>
144
+ where.maybe.as(:where).capture_source(:where) >>
145
+ comment.maybe.as(:comment)
146
+ }
147
+
148
+ rule(:connect_item) {
149
+ identifier.as(:target) >> space? >> str('(') >> space? >>
150
+ literal.as(:target_pk) >> space? >> str(')')
151
+ }
152
+
153
+ rule(:connect_list) {
154
+ connect_item >> (space? >> str(',') >> space? >> connect_item).repeat
155
+ }
156
+
157
+ rule(:insert) {
158
+ str('INSERT INTO') >> space >> identifier.as(:entity) >> space >>
159
+ str('SET') >> space >> settings.as_array(:settings) >>
160
+ (space >> str('AND') >> space >> str('CONNECT') >> space >>
161
+ str('TO') >> space >> connect_list.as_array(:connections)).maybe >>
162
+ comment.maybe.as(:comment)
163
+ }
164
+
165
+ rule(:delete) {
166
+ str('DELETE') >> space >> identifier.as(:entity) >>
167
+ (space >> str('FROM') >> space >> path.as_array(:path)).maybe >>
168
+ where.maybe.as(:where).capture_source(:where) >>
169
+ comment.maybe.as(:comment)
170
+ }
171
+
172
+ rule(:connect) {
173
+ (str('CONNECT') | str('DISCONNECT')).capture(:type) >> space >>
174
+ identifier.as(:entity) >> space? >> str('(') >> space? >>
175
+ literal.as(:source_pk) >> space? >> str(')') >> space >>
176
+ dynamic do |_, context|
177
+ context.captures[:type] == 'CONNECT' ? str('TO') : str('FROM')
178
+ end >> space >> connect_item
179
+ }
180
+
181
+ rule(:statement) {
182
+ query | update | insert | delete | connect
183
+ }
184
+
185
+ root :statement
186
+ end
187
+
188
+ # Simple transformations to clean up the CQL parse tree
189
+ class CQLT < Parslet::Transform
190
+ rule(identifier: simple(:identifier)) { identifier }
191
+ rule(identifier: simple(:identifier), identifier2: simple(:identifier2)) {
192
+ [identifier.to_s, identifier2.to_s] }
193
+ rule(field: sequence(:id)) { id.map(&:to_s) }
194
+ rule(path: sequence(:id)) { id.map(&:to_s) }
195
+ rule(str: simple(:string)) { string.to_s }
196
+ rule(statement: subtree(:stmt)) { stmt.first.last }
197
+ rule(int: simple(:integer)) { integer }
198
+ rule(unknown: simple(:val)) { nil }
199
+ end
200
+
201
+ # rubocop:enable all
202
+ end