nose 0.1.0pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0cbc98973cefc286e5457924f9f314d79f8e00e2
4
+ data.tar.gz: f3182e0f7583fb068ddd3a679f1ad4ed34af4548
5
+ SHA512:
6
+ metadata.gz: c0e4526807ab3d17ca001361b6bbee74afeb94df64d54886f15eaac9045a460d2cd7109e56d6d567d2636267e992f9e4195f4d240bff4975545239d3843cf8fb
7
+ data.tar.gz: 9445a603f50c33bfcf2d055a48cac3c0ded0763a6631100a33f16c8e3c92b01beba512725a5f73b0a8671a63bd4a45dea44eb60fb42a82cc454915fecbd046a9
@@ -0,0 +1,390 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cassandra'
4
+ require 'zlib'
5
+
6
+ module NoSE
7
+ module Backend
8
+ # A backend which communicates with Cassandra via CQL
9
+ class CassandraBackend < BackendBase
10
+ def initialize(model, indexes, plans, update_plans, config)
11
+ super
12
+
13
+ @hosts = config[:hosts]
14
+ @port = config[:port]
15
+ @keyspace = config[:keyspace]
16
+ @generator = Cassandra::Uuid::Generator.new
17
+ end
18
+
19
+ # Generate a random UUID
20
+ def generate_id
21
+ @generator.uuid
22
+ end
23
+
24
+ # Produce the DDL necessary for column families for the given indexes
25
+ # and optionally execute them against the server
26
+ def indexes_ddl(execute = false, skip_existing = false,
27
+ drop_existing = false)
28
+ Enumerator.new do |enum|
29
+ @indexes.map do |index|
30
+ ddl = index_cql index
31
+ enum.yield ddl
32
+
33
+ begin
34
+ drop_index(index) if drop_existing && index_exists?(index)
35
+ client.execute(ddl) if execute
36
+ rescue Cassandra::Errors::AlreadyExistsError => exc
37
+ next if skip_existing
38
+
39
+ new_exc = IndexAlreadyExists.new exc.message
40
+ new_exc.set_backtrace exc.backtrace
41
+ raise new_exc
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ # Insert a chunk of rows into an index
48
+ # @return [Array<Array<Cassandra::Uuid>>]
49
+ def index_insert_chunk(index, chunk)
50
+ fields = index.all_fields.to_a
51
+ prepared = "INSERT INTO \"#{index.key}\" (" \
52
+ "#{field_names fields}" \
53
+ ") VALUES (#{(['?'] * fields.length).join ', '})"
54
+ prepared = client.prepare prepared
55
+
56
+ ids = []
57
+ client.execute(client.batch do |batch|
58
+ chunk.each do |row|
59
+ index_row = index_row(row, fields)
60
+ ids << (index.hash_fields.to_a + index.order_fields).map do |field|
61
+ index_row[fields.index field]
62
+ end
63
+ batch.add prepared, arguments: index_row
64
+ end
65
+ end)
66
+
67
+ ids
68
+ end
69
+
70
+ # Check if the given index is empty
71
+ def index_empty?(index)
72
+ query = "SELECT COUNT(*) FROM \"#{index.key}\" LIMIT 1"
73
+ client.execute(query).first.values.first.zero?
74
+ end
75
+
76
+ # Check if a given index exists in the target database
77
+ def index_exists?(index)
78
+ client
79
+ @cluster.keyspace(@keyspace).has_table? index.key
80
+ end
81
+
82
+ # Check if a given index exists in the target database
83
+ def drop_index(index)
84
+ client.execute "DROP TABLE \"#{index.key}\""
85
+ end
86
+
87
+ # Sample a number of values from the given index
88
+ def index_sample(index, count)
89
+ field_list = index.all_fields.map { |f| "\"#{f.id}\"" }
90
+ query = "SELECT #{field_list.join ', '} " \
91
+ "FROM \"#{index.key}\" LIMIT #{count}"
92
+ rows = client.execute(query).rows
93
+
94
+ # XXX Ignore null values for now
95
+ # fail if rows.any? { |row| row.values.any?(&:nil?) }
96
+
97
+ rows
98
+ end
99
+
100
+ private
101
+
102
+ # Produce an array of fields in the correct order for a CQL insert
103
+ # @return [Array]
104
+ def index_row(row, fields)
105
+ fields.map do |field|
106
+ value = row[field.id]
107
+ if field.is_a?(Fields::IDField)
108
+ value = case value
109
+ when Numeric
110
+ Cassandra::Uuid.new value.to_i
111
+ when String
112
+ Cassandra::Uuid.new value
113
+ when nil
114
+ Cassandra::Uuid::Generator.new.uuid
115
+ else
116
+ value
117
+ end
118
+ end
119
+
120
+ value
121
+ end
122
+ end
123
+
124
+ # Produce the CQL to create the definition for a given index
125
+ # @return [String]
126
+ def index_cql(index)
127
+ ddl = "CREATE COLUMNFAMILY \"#{index.key}\" (" \
128
+ "#{field_names index.all_fields, true}, " \
129
+ "PRIMARY KEY((#{field_names index.hash_fields})"
130
+
131
+ cluster_key = index.order_fields
132
+ ddl += ", #{field_names cluster_key}" unless cluster_key.empty?
133
+ ddl += '));'
134
+
135
+ ddl
136
+ end
137
+
138
+ # Get a comma-separated list of field names with optional types
139
+ # @return [String]
140
+ def field_names(fields, types = false)
141
+ fields.map do |field|
142
+ name = "\"#{field.id}\""
143
+ name += ' ' + cassandra_type(field.class).to_s if types
144
+ name
145
+ end.join ', '
146
+ end
147
+
148
+ # Get a Cassandra client, connecting if not done already
149
+ def client
150
+ return @client unless @client.nil?
151
+ @cluster = Cassandra.cluster hosts: @hosts, port: @port,
152
+ timeout: nil
153
+ @client = @cluster.connect @keyspace
154
+ end
155
+
156
+ # Return the datatype to use in Cassandra for a given field
157
+ # @return [Symbol]
158
+ def cassandra_type(field_class)
159
+ case [field_class]
160
+ when [Fields::IntegerField]
161
+ :int
162
+ when [Fields::FloatField]
163
+ :float
164
+ when [Fields::StringField]
165
+ :text
166
+ when [Fields::DateField]
167
+ :timestamp
168
+ when [Fields::IDField],
169
+ [Fields::ForeignKeyField]
170
+ :uuid
171
+ end
172
+ end
173
+
174
+ # Insert data into an index on the backend
175
+ class InsertStatementStep < BackendBase::InsertStatementStep
176
+ def initialize(client, index, fields)
177
+ super
178
+
179
+ @fields = fields.map(&:id) & index.all_fields.map(&:id)
180
+ @prepared = client.prepare insert_cql
181
+ @generator = Cassandra::Uuid::Generator.new
182
+ end
183
+
184
+ # Insert each row into the index
185
+ def process(results)
186
+ results.each do |result|
187
+ fields = @index.all_fields.select { |field| result.key? field.id }
188
+ values = fields.map do |field|
189
+ value = result[field.id]
190
+
191
+ # If this is an ID, generate or construct a UUID object
192
+ if field.is_a?(Fields::IDField)
193
+ value = if value.nil?
194
+ @generator.uuid
195
+ else
196
+ Cassandra::Uuid.new(value.to_i)
197
+ end
198
+ end
199
+
200
+ # XXX Useful to test that we never insert null values
201
+ # fail if value.nil?
202
+
203
+ value
204
+ end
205
+
206
+ begin
207
+ @client.execute(@prepared, arguments: values)
208
+ rescue Cassandra::Errors::InvalidError
209
+ # We hit a value which does not actually need to be
210
+ # inserted based on the data since some foreign
211
+ # key in the graph corresponding to this column
212
+ # family does not exist
213
+ nil
214
+ end
215
+ end
216
+ end
217
+
218
+ private
219
+
220
+ # The CQL used to insert the fields into the index
221
+ def insert_cql
222
+ insert = "INSERT INTO #{@index.key} ("
223
+ insert += @fields.map { |f| "\"#{f}\"" }.join(', ')
224
+ insert << ') VALUES (' << (['?'] * @fields.length).join(', ') + ')'
225
+
226
+ insert
227
+ end
228
+ end
229
+
230
+ # Delete data from an index on the backend
231
+ class DeleteStatementStep < BackendBase::DeleteStatementStep
232
+ def initialize(client, index)
233
+ super
234
+
235
+ @index_keys = @index.hash_fields + @index.order_fields.to_set
236
+
237
+ # Prepare the statement required to perform the deletion
238
+ delete = "DELETE FROM #{index.key} WHERE "
239
+ delete += @index_keys.map { |key| "\"#{key.id}\" = ?" }.join(' AND ')
240
+ @prepared = client.prepare delete
241
+ end
242
+
243
+ # Execute the delete for a given set of keys
244
+ def process(results)
245
+ # Delete each row from the index
246
+ results.each do |result|
247
+ values = delete_values result
248
+ @client.execute(@prepared, arguments: values)
249
+ end
250
+ end
251
+
252
+ private
253
+
254
+ # Get the values used in the WHERE clause for a CQL DELETE
255
+ def delete_values(result)
256
+ @index_keys.map do |key|
257
+ cur_field = @index.all_fields.find { |field| field.id == key.id }
258
+
259
+ if cur_field.is_a?(Fields::IDField)
260
+ Cassandra::Uuid.new(result[key.id].to_i)
261
+ else
262
+ result[key.id]
263
+ end
264
+ end
265
+ end
266
+ end
267
+
268
+ # A query step to look up data from a particular column family
269
+ class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
270
+ # rubocop:disable Metrics/ParameterLists
271
+ def initialize(client, select, conditions, step, next_step, prev_step)
272
+ super
273
+
274
+ @logger = Logging.logger['nose::backend::cassandra::indexlookupstep']
275
+
276
+ # TODO: Check if we can apply the next filter via ALLOW FILTERING
277
+ @prepared = client.prepare select_cql(select, conditions)
278
+ end
279
+ # rubocop:enable Metrics/ParameterLists
280
+
281
+ # Perform a column family lookup in Cassandra
282
+ def process(conditions, results)
283
+ results = initial_results(conditions) if results.nil?
284
+ condition_list = result_conditions conditions, results
285
+ new_result = fetch_all_queries condition_list, results
286
+
287
+ # Limit the size of the results in case we fetched multiple keys
288
+ new_result[0..(@step.limit.nil? ? -1 : @step.limit)]
289
+ end
290
+
291
+ private
292
+
293
+ # Produce the select CQL statement for a provided set of fields
294
+ # @return [String]
295
+ def select_cql(select, conditions)
296
+ select = expand_selected_fields select
297
+ cql = "SELECT #{select.map { |f| "\"#{f.id}\"" }.join ', '} FROM " \
298
+ "\"#{@step.index.key}\" WHERE #{cql_where_clause conditions}"
299
+ cql += cql_order_by
300
+
301
+ # Add an optional limit
302
+ cql << " LIMIT #{@step.limit}" unless @step.limit.nil?
303
+
304
+ cql
305
+ end
306
+
307
+ # Produce a CQL where clause using the given conditions
308
+ # @return [String]
309
+ def cql_where_clause(conditions)
310
+ where = @eq_fields.map do |field|
311
+ "\"#{field.id}\" = ?"
312
+ end.join ' AND '
313
+ unless @range_field.nil?
314
+ condition = conditions.each_value.find(&:range?)
315
+ where << " AND \"#{condition.field.id}\" #{condition.operator} ?"
316
+ end
317
+
318
+ where
319
+ end
320
+
321
+ # Produce the CQL ORDER BY clause for this step
322
+ # @return [String]
323
+ def cql_order_by
324
+ # TODO: CQL3 requires all clustered columns before the one actually
325
+ # ordered on also be specified
326
+ #
327
+ # Example:
328
+ #
329
+ # SELECT * FROM cf WHERE id=? AND col1=? ORDER by col1, col2
330
+ return '' if @step.order_by.empty?
331
+ ' ORDER BY ' + @step.order_by.map { |f| "\"#{f.id}\"" }.join(', ')
332
+ end
333
+
334
+ # Lookup values from an index selecting the given
335
+ # fields and filtering on the given conditions
336
+ def fetch_all_queries(condition_list, results)
337
+ new_result = []
338
+ @logger.debug { " #{@prepared.cql} * #{condition_list.size}" }
339
+
340
+ # TODO: Chain enumerables of results instead
341
+ # Limit the total number of queries as well as the query limit
342
+ condition_list.zip(results).each do |condition_set, result|
343
+ # Loop over all pages to fetch results
344
+ values = lookup_values condition_set
345
+ fetch_query_pages values, new_result, result
346
+
347
+ # Don't continue with further queries
348
+ break if !@step.limit.nil? && new_result.length >= @step.limit
349
+ end
350
+ @logger.debug "Total result size = #{new_result.size}"
351
+
352
+ new_result
353
+ end
354
+
355
+ # Get the necessary pages of results for a given list of values
356
+ def fetch_query_pages(values, new_result, result)
357
+ new_results = @client.execute(@prepared, arguments: values)
358
+ loop do
359
+ # Add the previous results to each row
360
+ rows = new_results.map { |row| result.merge row }
361
+
362
+ # XXX Ignore null values in results for now
363
+ # fail if rows.any? { |row| row.values.any?(&:nil?) }
364
+
365
+ new_result.concat rows
366
+ break if new_results.last_page? ||
367
+ (!@step.limit.nil? && result.length >= @step.limit)
368
+ new_results = new_results.next_page
369
+ @logger.debug "Fetched #{result.length} results"
370
+ end
371
+ end
372
+
373
+ # Produce the values used for lookup on a given set of conditions
374
+ def lookup_values(condition_set)
375
+ condition_set.map do |condition|
376
+ value = condition.value ||
377
+ conditions[condition.field.id].value
378
+ fail if value.nil?
379
+
380
+ if condition.field.is_a?(Fields::IDField)
381
+ Cassandra::Uuid.new(value.to_i)
382
+ else
383
+ value
384
+ end
385
+ end
386
+ end
387
+ end
388
+ end
389
+ end
390
+ end
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Backend
5
+ # Simple backend which persists data to a file
6
+ class FileBackend < BackendBase
7
+ def initialize(model, indexes, plans, update_plans, config)
8
+ super
9
+
10
+ # Try to load data from file or start fresh
11
+ @index_data = if !config[:file].nil? && File.file?(config[:file])
12
+ Marshal.load File.open(config[:file])
13
+ else
14
+ {}
15
+ end
16
+
17
+ # Ensure the data is saved when we exit
18
+ ObjectSpace.define_finalizer self, self.class.finalize(@index_data,
19
+ config[:file])
20
+ end
21
+
22
+ # Save data when the object is destroyed
23
+ def self.finalize(index_data, file)
24
+ proc do
25
+ Marshal.dump(index_data, File.open(file, 'w'))
26
+ end
27
+ end
28
+
29
+ # Check for an empty array for the data
30
+ def index_empty?(index)
31
+ !index_exists?(index) || @index_data[index.key].empty?
32
+ end
33
+
34
+ # Check if we have prepared space for this index
35
+ def index_exists?(index)
36
+ @index_data.key? index.key
37
+ end
38
+
39
+ # @abstract Subclasses implement to allow inserting
40
+ def index_insert_chunk(index, chunk)
41
+ @index_data[index.key].concat chunk
42
+ end
43
+
44
+ # Generate a simple UUID
45
+ def generate_id
46
+ SecureRandom.uuid
47
+ end
48
+
49
+ # Allocate space for data on the new indexes
50
+ def indexes_ddl(execute = false, skip_existing = false,
51
+ drop_existing = false)
52
+ @indexes.each do |index|
53
+ # Do the appropriate behaviour based on the flags passed in
54
+ if index_exists?(index)
55
+ next if skip_existing
56
+ fail unless drop_existing
57
+ end
58
+
59
+ @index_data[index.key] = []
60
+ end if execute
61
+
62
+ # We just use the original index definition as DDL
63
+ @indexes.map(&:inspect)
64
+ end
65
+
66
+ # Sample a number of values from the given index
67
+ def index_sample(index, count)
68
+ data = @index_data[index.key]
69
+ data.nil? ? [] : data.sample(count)
70
+ end
71
+
72
+ # We just produce the data here which can be manipulated as needed
73
+ # @return [Hash]
74
+ def client
75
+ @index_data
76
+ end
77
+
78
+ # Provide some helper functions which allow the matching of rows
79
+ # based on a set of list of conditions
80
+ module RowMatcher
81
+ # Check if a row matches the given condition
82
+ # @return [Boolean]
83
+ def row_matches?(row, conditions)
84
+ row_matches_eq?(row, conditions) &&
85
+ row_matches_range?(row, conditions)
86
+ end
87
+
88
+ # Check if a row matches the given condition on equality predicates
89
+ # @return [Boolean]
90
+ def row_matches_eq?(row, conditions)
91
+ @eq_fields.all? do |field|
92
+ row[field.id] == conditions.find { |c| c.field == field }.value
93
+ end
94
+ end
95
+
96
+ # Check if a row matches the given condition on the range predicate
97
+ # @return [Boolean]
98
+ def row_matches_range?(row, conditions)
99
+ return true if @range_field.nil?
100
+
101
+ range_cond = conditions.find { |c| c.field == @range_field }
102
+ row[@range_field.id].send range_cond.operator, range_cond.value
103
+ end
104
+ end
105
+
106
+ # Look up data on an index in the backend
107
+ class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
108
+ include RowMatcher
109
+
110
+ # Filter all the rows in the specified index to those requested
111
+ def process(conditions, results)
112
+ # Get the set of conditions we need to process
113
+ results = initial_results(conditions) if results.nil?
114
+ condition_list = result_conditions conditions, results
115
+
116
+ # Loop through all rows to find the matching ones
117
+ rows = @client[@index.key] || []
118
+ selected = condition_list.flat_map do |condition|
119
+ rows.select { |row| row_matches? row, condition }
120
+ end.compact
121
+
122
+ # Apply the limit and only return selected fields
123
+ field_ids = Set.new @step.fields.map(&:id).to_set
124
+ selected[0..(@step.limit.nil? ? -1 : @step.limit)].map do |row|
125
+ row.select { |k, _| field_ids.include? k }
126
+ end
127
+ end
128
+ end
129
+
130
+ # Insert data into an index on the backend
131
+ class InsertStatementStep < BackendBase::InsertStatementStep
132
+ # Add new rows to the index
133
+ def process(results)
134
+ key_ids = (@index.hash_fields + @index.order_fields).map(&:id).to_set
135
+
136
+ results.each do |row|
137
+ # Pick out primary key fields we can use to match
138
+ conditions = row.select do |field_id|
139
+ key_ids.include? field_id
140
+ end
141
+
142
+ # If we have all the primary keys, check for a match
143
+ if conditions.length == key_ids.length
144
+ # Try to find a row with this ID and update it
145
+ matching_row = @client[index.key].find do |index_row|
146
+ index_row.merge(conditions) == index_row
147
+ end
148
+
149
+ unless matching_row.nil?
150
+ matching_row.merge! row
151
+ next
152
+ end
153
+ end
154
+
155
+ # Populate IDs as needed
156
+ key_ids.each do |key_id|
157
+ row[key_id] = SecureRandom.uuid if row[key_id].nil?
158
+ end
159
+
160
+ @client[index.key] << row
161
+ end
162
+ end
163
+ end
164
+
165
+ # Delete data from an index on the backend
166
+ class DeleteStatementStep < BackendBase::DeleteStatementStep
167
+ include RowMatcher
168
+
169
+ # Remove rows matching the results from the dataset
170
+ def process(results)
171
+ # Loop over all rows
172
+ @client[index.key].reject! do |row|
173
+ # Check against all results
174
+ results.any? do |result|
175
+ # If all fields match, drop the row
176
+ result.all? do |field, value|
177
+ row[field] == value
178
+ end
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+ end
185
+ end