nose 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0cbc98973cefc286e5457924f9f314d79f8e00e2
4
+ data.tar.gz: f3182e0f7583fb068ddd3a679f1ad4ed34af4548
5
+ SHA512:
6
+ metadata.gz: c0e4526807ab3d17ca001361b6bbee74afeb94df64d54886f15eaac9045a460d2cd7109e56d6d567d2636267e992f9e4195f4d240bff4975545239d3843cf8fb
7
+ data.tar.gz: 9445a603f50c33bfcf2d055a48cac3c0ded0763a6631100a33f16c8e3c92b01beba512725a5f73b0a8671a63bd4a45dea44eb60fb42a82cc454915fecbd046a9
@@ -0,0 +1,390 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cassandra'
4
+ require 'zlib'
5
+
6
+ module NoSE
7
+ module Backend
8
+ # A backend which communicates with Cassandra via CQL
9
+ class CassandraBackend < BackendBase
10
+ def initialize(model, indexes, plans, update_plans, config)
11
+ super
12
+
13
+ @hosts = config[:hosts]
14
+ @port = config[:port]
15
+ @keyspace = config[:keyspace]
16
+ @generator = Cassandra::Uuid::Generator.new
17
+ end
18
+
19
+ # Generate a random UUID
20
+ def generate_id
21
+ @generator.uuid
22
+ end
23
+
24
+ # Produce the DDL necessary for column families for the given indexes
25
+ # and optionally execute them against the server
26
+ def indexes_ddl(execute = false, skip_existing = false,
27
+ drop_existing = false)
28
+ Enumerator.new do |enum|
29
+ @indexes.map do |index|
30
+ ddl = index_cql index
31
+ enum.yield ddl
32
+
33
+ begin
34
+ drop_index(index) if drop_existing && index_exists?(index)
35
+ client.execute(ddl) if execute
36
+ rescue Cassandra::Errors::AlreadyExistsError => exc
37
+ next if skip_existing
38
+
39
+ new_exc = IndexAlreadyExists.new exc.message
40
+ new_exc.set_backtrace exc.backtrace
41
+ raise new_exc
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ # Insert a chunk of rows into an index
48
+ # @return [Array<Array<Cassandra::Uuid>>]
49
+ def index_insert_chunk(index, chunk)
50
+ fields = index.all_fields.to_a
51
+ prepared = "INSERT INTO \"#{index.key}\" (" \
52
+ "#{field_names fields}" \
53
+ ") VALUES (#{(['?'] * fields.length).join ', '})"
54
+ prepared = client.prepare prepared
55
+
56
+ ids = []
57
+ client.execute(client.batch do |batch|
58
+ chunk.each do |row|
59
+ index_row = index_row(row, fields)
60
+ ids << (index.hash_fields.to_a + index.order_fields).map do |field|
61
+ index_row[fields.index field]
62
+ end
63
+ batch.add prepared, arguments: index_row
64
+ end
65
+ end)
66
+
67
+ ids
68
+ end
69
+
70
+ # Check if the given index is empty
71
+ def index_empty?(index)
72
+ query = "SELECT COUNT(*) FROM \"#{index.key}\" LIMIT 1"
73
+ client.execute(query).first.values.first.zero?
74
+ end
75
+
76
+ # Check if a given index exists in the target database
77
+ def index_exists?(index)
78
+ client
79
+ @cluster.keyspace(@keyspace).has_table? index.key
80
+ end
81
+
82
+ # Check if a given index exists in the target database
83
+ def drop_index(index)
84
+ client.execute "DROP TABLE \"#{index.key}\""
85
+ end
86
+
87
+ # Sample a number of values from the given index
88
+ def index_sample(index, count)
89
+ field_list = index.all_fields.map { |f| "\"#{f.id}\"" }
90
+ query = "SELECT #{field_list.join ', '} " \
91
+ "FROM \"#{index.key}\" LIMIT #{count}"
92
+ rows = client.execute(query).rows
93
+
94
+ # XXX Ignore null values for now
95
+ # fail if rows.any? { |row| row.values.any?(&:nil?) }
96
+
97
+ rows
98
+ end
99
+
100
+ private
101
+
102
+ # Produce an array of fields in the correct order for a CQL insert
103
+ # @return [Array]
104
+ def index_row(row, fields)
105
+ fields.map do |field|
106
+ value = row[field.id]
107
+ if field.is_a?(Fields::IDField)
108
+ value = case value
109
+ when Numeric
110
+ Cassandra::Uuid.new value.to_i
111
+ when String
112
+ Cassandra::Uuid.new value
113
+ when nil
114
+ Cassandra::Uuid::Generator.new.uuid
115
+ else
116
+ value
117
+ end
118
+ end
119
+
120
+ value
121
+ end
122
+ end
123
+
124
+ # Produce the CQL to create the definition for a given index
125
+ # @return [String]
126
+ def index_cql(index)
127
+ ddl = "CREATE COLUMNFAMILY \"#{index.key}\" (" \
128
+ "#{field_names index.all_fields, true}, " \
129
+ "PRIMARY KEY((#{field_names index.hash_fields})"
130
+
131
+ cluster_key = index.order_fields
132
+ ddl += ", #{field_names cluster_key}" unless cluster_key.empty?
133
+ ddl += '));'
134
+
135
+ ddl
136
+ end
137
+
138
+ # Get a comma-separated list of field names with optional types
139
+ # @return [String]
140
+ def field_names(fields, types = false)
141
+ fields.map do |field|
142
+ name = "\"#{field.id}\""
143
+ name += ' ' + cassandra_type(field.class).to_s if types
144
+ name
145
+ end.join ', '
146
+ end
147
+
148
+ # Get a Cassandra client, connecting if not done already
149
+ def client
150
+ return @client unless @client.nil?
151
+ @cluster = Cassandra.cluster hosts: @hosts, port: @port,
152
+ timeout: nil
153
+ @client = @cluster.connect @keyspace
154
+ end
155
+
156
+ # Return the datatype to use in Cassandra for a given field
157
+ # @return [Symbol]
158
+ def cassandra_type(field_class)
159
+ case [field_class]
160
+ when [Fields::IntegerField]
161
+ :int
162
+ when [Fields::FloatField]
163
+ :float
164
+ when [Fields::StringField]
165
+ :text
166
+ when [Fields::DateField]
167
+ :timestamp
168
+ when [Fields::IDField],
169
+ [Fields::ForeignKeyField]
170
+ :uuid
171
+ end
172
+ end
173
+
174
+ # Insert data into an index on the backend
175
+ class InsertStatementStep < BackendBase::InsertStatementStep
176
+ def initialize(client, index, fields)
177
+ super
178
+
179
+ @fields = fields.map(&:id) & index.all_fields.map(&:id)
180
+ @prepared = client.prepare insert_cql
181
+ @generator = Cassandra::Uuid::Generator.new
182
+ end
183
+
184
+ # Insert each row into the index
185
+ def process(results)
186
+ results.each do |result|
187
+ fields = @index.all_fields.select { |field| result.key? field.id }
188
+ values = fields.map do |field|
189
+ value = result[field.id]
190
+
191
+ # If this is an ID, generate or construct a UUID object
192
+ if field.is_a?(Fields::IDField)
193
+ value = if value.nil?
194
+ @generator.uuid
195
+ else
196
+ Cassandra::Uuid.new(value.to_i)
197
+ end
198
+ end
199
+
200
+ # XXX Useful to test that we never insert null values
201
+ # fail if value.nil?
202
+
203
+ value
204
+ end
205
+
206
+ begin
207
+ @client.execute(@prepared, arguments: values)
208
+ rescue Cassandra::Errors::InvalidError
209
+ # We hit a value which does not actually need to be
210
+ # inserted based on the data since some foreign
211
+ # key in the graph corresponding to this column
212
+ # family does not exist
213
+ nil
214
+ end
215
+ end
216
+ end
217
+
218
+ private
219
+
220
+ # The CQL used to insert the fields into the index
221
+ def insert_cql
222
+ insert = "INSERT INTO #{@index.key} ("
223
+ insert += @fields.map { |f| "\"#{f}\"" }.join(', ')
224
+ insert << ') VALUES (' << (['?'] * @fields.length).join(', ') + ')'
225
+
226
+ insert
227
+ end
228
+ end
229
+
230
+ # Delete data from an index on the backend
231
+ class DeleteStatementStep < BackendBase::DeleteStatementStep
232
+ def initialize(client, index)
233
+ super
234
+
235
+ @index_keys = @index.hash_fields + @index.order_fields.to_set
236
+
237
+ # Prepare the statement required to perform the deletion
238
+ delete = "DELETE FROM #{index.key} WHERE "
239
+ delete += @index_keys.map { |key| "\"#{key.id}\" = ?" }.join(' AND ')
240
+ @prepared = client.prepare delete
241
+ end
242
+
243
+ # Execute the delete for a given set of keys
244
+ def process(results)
245
+ # Delete each row from the index
246
+ results.each do |result|
247
+ values = delete_values result
248
+ @client.execute(@prepared, arguments: values)
249
+ end
250
+ end
251
+
252
+ private
253
+
254
+ # Get the values used in the WHERE clause for a CQL DELETE
255
+ def delete_values(result)
256
+ @index_keys.map do |key|
257
+ cur_field = @index.all_fields.find { |field| field.id == key.id }
258
+
259
+ if cur_field.is_a?(Fields::IDField)
260
+ Cassandra::Uuid.new(result[key.id].to_i)
261
+ else
262
+ result[key.id]
263
+ end
264
+ end
265
+ end
266
+ end
267
+
268
+ # A query step to look up data from a particular column family
269
+ class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
270
+ # rubocop:disable Metrics/ParameterLists
271
+ def initialize(client, select, conditions, step, next_step, prev_step)
272
+ super
273
+
274
+ @logger = Logging.logger['nose::backend::cassandra::indexlookupstep']
275
+
276
+ # TODO: Check if we can apply the next filter via ALLOW FILTERING
277
+ @prepared = client.prepare select_cql(select, conditions)
278
+ end
279
+ # rubocop:enable Metrics/ParameterLists
280
+
281
+ # Perform a column family lookup in Cassandra
282
+ def process(conditions, results)
283
+ results = initial_results(conditions) if results.nil?
284
+ condition_list = result_conditions conditions, results
285
+ new_result = fetch_all_queries condition_list, results
286
+
287
+ # Limit the size of the results in case we fetched multiple keys
288
+ new_result[0..(@step.limit.nil? ? -1 : @step.limit)]
289
+ end
290
+
291
+ private
292
+
293
+ # Produce the select CQL statement for a provided set of fields
294
+ # @return [String]
295
+ def select_cql(select, conditions)
296
+ select = expand_selected_fields select
297
+ cql = "SELECT #{select.map { |f| "\"#{f.id}\"" }.join ', '} FROM " \
298
+ "\"#{@step.index.key}\" WHERE #{cql_where_clause conditions}"
299
+ cql += cql_order_by
300
+
301
+ # Add an optional limit
302
+ cql << " LIMIT #{@step.limit}" unless @step.limit.nil?
303
+
304
+ cql
305
+ end
306
+
307
+ # Produce a CQL where clause using the given conditions
308
+ # @return [String]
309
+ def cql_where_clause(conditions)
310
+ where = @eq_fields.map do |field|
311
+ "\"#{field.id}\" = ?"
312
+ end.join ' AND '
313
+ unless @range_field.nil?
314
+ condition = conditions.each_value.find(&:range?)
315
+ where << " AND \"#{condition.field.id}\" #{condition.operator} ?"
316
+ end
317
+
318
+ where
319
+ end
320
+
321
+ # Produce the CQL ORDER BY clause for this step
322
+ # @return [String]
323
+ def cql_order_by
324
+ # TODO: CQL3 requires all clustered columns before the one actually
325
+ # ordered on also be specified
326
+ #
327
+ # Example:
328
+ #
329
+ # SELECT * FROM cf WHERE id=? AND col1=? ORDER by col1, col2
330
+ return '' if @step.order_by.empty?
331
+ ' ORDER BY ' + @step.order_by.map { |f| "\"#{f.id}\"" }.join(', ')
332
+ end
333
+
334
+ # Lookup values from an index selecting the given
335
+ # fields and filtering on the given conditions
336
+ def fetch_all_queries(condition_list, results)
337
+ new_result = []
338
+ @logger.debug { " #{@prepared.cql} * #{condition_list.size}" }
339
+
340
+ # TODO: Chain enumerables of results instead
341
+ # Limit the total number of queries as well as the query limit
342
+ condition_list.zip(results).each do |condition_set, result|
343
+ # Loop over all pages to fetch results
344
+ values = lookup_values condition_set
345
+ fetch_query_pages values, new_result, result
346
+
347
+ # Don't continue with further queries
348
+ break if !@step.limit.nil? && new_result.length >= @step.limit
349
+ end
350
+ @logger.debug "Total result size = #{new_result.size}"
351
+
352
+ new_result
353
+ end
354
+
355
+ # Get the necessary pages of results for a given list of values
356
+ def fetch_query_pages(values, new_result, result)
357
+ new_results = @client.execute(@prepared, arguments: values)
358
+ loop do
359
+ # Add the previous results to each row
360
+ rows = new_results.map { |row| result.merge row }
361
+
362
+ # XXX Ignore null values in results for now
363
+ # fail if rows.any? { |row| row.values.any?(&:nil?) }
364
+
365
+ new_result.concat rows
366
+ break if new_results.last_page? ||
367
+ (!@step.limit.nil? && result.length >= @step.limit)
368
+ new_results = new_results.next_page
369
+ @logger.debug "Fetched #{result.length} results"
370
+ end
371
+ end
372
+
373
+ # Produce the values used for lookup on a given set of conditions
374
+ def lookup_values(condition_set)
375
+ condition_set.map do |condition|
376
+ value = condition.value ||
377
+ conditions[condition.field.id].value
378
+ fail if value.nil?
379
+
380
+ if condition.field.is_a?(Fields::IDField)
381
+ Cassandra::Uuid.new(value.to_i)
382
+ else
383
+ value
384
+ end
385
+ end
386
+ end
387
+ end
388
+ end
389
+ end
390
+ end
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Backend
5
+ # Simple backend which persists data to a file
6
+ class FileBackend < BackendBase
7
+ def initialize(model, indexes, plans, update_plans, config)
8
+ super
9
+
10
+ # Try to load data from file or start fresh
11
+ @index_data = if !config[:file].nil? && File.file?(config[:file])
12
+ Marshal.load File.open(config[:file])
13
+ else
14
+ {}
15
+ end
16
+
17
+ # Ensure the data is saved when we exit
18
+ ObjectSpace.define_finalizer self, self.class.finalize(@index_data,
19
+ config[:file])
20
+ end
21
+
22
+ # Save data when the object is destroyed
23
+ def self.finalize(index_data, file)
24
+ proc do
25
+ Marshal.dump(index_data, File.open(file, 'w'))
26
+ end
27
+ end
28
+
29
+ # Check for an empty array for the data
30
+ def index_empty?(index)
31
+ !index_exists?(index) || @index_data[index.key].empty?
32
+ end
33
+
34
+ # Check if we have prepared space for this index
35
+ def index_exists?(index)
36
+ @index_data.key? index.key
37
+ end
38
+
39
+ # @abstract Subclasses implement to allow inserting
40
+ def index_insert_chunk(index, chunk)
41
+ @index_data[index.key].concat chunk
42
+ end
43
+
44
+ # Generate a simple UUID
45
+ def generate_id
46
+ SecureRandom.uuid
47
+ end
48
+
49
+ # Allocate space for data on the new indexes
50
+ def indexes_ddl(execute = false, skip_existing = false,
51
+ drop_existing = false)
52
+ @indexes.each do |index|
53
+ # Do the appropriate behaviour based on the flags passed in
54
+ if index_exists?(index)
55
+ next if skip_existing
56
+ fail unless drop_existing
57
+ end
58
+
59
+ @index_data[index.key] = []
60
+ end if execute
61
+
62
+ # We just use the original index definition as DDL
63
+ @indexes.map(&:inspect)
64
+ end
65
+
66
+ # Sample a number of values from the given index
67
+ def index_sample(index, count)
68
+ data = @index_data[index.key]
69
+ data.nil? ? [] : data.sample(count)
70
+ end
71
+
72
+ # We just produce the data here which can be manipulated as needed
73
+ # @return [Hash]
74
+ def client
75
+ @index_data
76
+ end
77
+
78
+ # Provide some helper functions which allow the matching of rows
79
+ # based on a set of list of conditions
80
+ module RowMatcher
81
+ # Check if a row matches the given condition
82
+ # @return [Boolean]
83
+ def row_matches?(row, conditions)
84
+ row_matches_eq?(row, conditions) &&
85
+ row_matches_range?(row, conditions)
86
+ end
87
+
88
+ # Check if a row matches the given condition on equality predicates
89
+ # @return [Boolean]
90
+ def row_matches_eq?(row, conditions)
91
+ @eq_fields.all? do |field|
92
+ row[field.id] == conditions.find { |c| c.field == field }.value
93
+ end
94
+ end
95
+
96
+ # Check if a row matches the given condition on the range predicate
97
+ # @return [Boolean]
98
+ def row_matches_range?(row, conditions)
99
+ return true if @range_field.nil?
100
+
101
+ range_cond = conditions.find { |c| c.field == @range_field }
102
+ row[@range_field.id].send range_cond.operator, range_cond.value
103
+ end
104
+ end
105
+
106
+ # Look up data on an index in the backend
107
+ class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
108
+ include RowMatcher
109
+
110
+ # Filter all the rows in the specified index to those requested
111
+ def process(conditions, results)
112
+ # Get the set of conditions we need to process
113
+ results = initial_results(conditions) if results.nil?
114
+ condition_list = result_conditions conditions, results
115
+
116
+ # Loop through all rows to find the matching ones
117
+ rows = @client[@index.key] || []
118
+ selected = condition_list.flat_map do |condition|
119
+ rows.select { |row| row_matches? row, condition }
120
+ end.compact
121
+
122
+ # Apply the limit and only return selected fields
123
+ field_ids = Set.new @step.fields.map(&:id).to_set
124
+ selected[0..(@step.limit.nil? ? -1 : @step.limit)].map do |row|
125
+ row.select { |k, _| field_ids.include? k }
126
+ end
127
+ end
128
+ end
129
+
130
+ # Insert data into an index on the backend
131
+ class InsertStatementStep < BackendBase::InsertStatementStep
132
+ # Add new rows to the index
133
+ def process(results)
134
+ key_ids = (@index.hash_fields + @index.order_fields).map(&:id).to_set
135
+
136
+ results.each do |row|
137
+ # Pick out primary key fields we can use to match
138
+ conditions = row.select do |field_id|
139
+ key_ids.include? field_id
140
+ end
141
+
142
+ # If we have all the primary keys, check for a match
143
+ if conditions.length == key_ids.length
144
+ # Try to find a row with this ID and update it
145
+ matching_row = @client[index.key].find do |index_row|
146
+ index_row.merge(conditions) == index_row
147
+ end
148
+
149
+ unless matching_row.nil?
150
+ matching_row.merge! row
151
+ next
152
+ end
153
+ end
154
+
155
+ # Populate IDs as needed
156
+ key_ids.each do |key_id|
157
+ row[key_id] = SecureRandom.uuid if row[key_id].nil?
158
+ end
159
+
160
+ @client[index.key] << row
161
+ end
162
+ end
163
+ end
164
+
165
+ # Delete data from an index on the backend
166
+ class DeleteStatementStep < BackendBase::DeleteStatementStep
167
+ include RowMatcher
168
+
169
+ # Remove rows matching the results from the dataset
170
+ def process(results)
171
+ # Loop over all rows
172
+ @client[index.key].reject! do |row|
173
+ # Check against all results
174
+ results.any? do |result|
175
+ # If all fields match, drop the row
176
+ result.all? do |field, value|
177
+ row[field] == value
178
+ end
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+ end
185
+ end