nose 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,242 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mongo'
4
+
5
+ module NoSE
6
+ module Backend
7
+ # A backend which communicates with MongoDB
8
+ class MongoBackend < BackendBase
9
+ def initialize(model, indexes, plans, update_plans, config)
10
+ super
11
+
12
+ @uri = config[:uri]
13
+ @database = config[:database]
14
+ Mongo::Logger.logger.level = ::Logger::FATAL
15
+ end
16
+
17
+ # MongoDB uses ID graphs for column families
18
+ # @return [Boolean]
19
+ def by_id_graph
20
+ true
21
+ end
22
+
23
+ # Produce a new ObjectId
24
+ # @return [BSON::ObjectId]
25
+ def generate_id
26
+ BSON::ObjectId.new
27
+ end
28
+
29
+ # Create new MongoDB collections for each index
30
+ def indexes_ddl(execute = false, skip_existing = false,
31
+ drop_existing = false)
32
+ ddl = []
33
+
34
+ # Create the ID graphs for all indexes
35
+ id_graphs = @indexes.map(&:to_id_graph).uniq
36
+ id_graphs.map do |id_graph|
37
+ ddl << "Create #{id_graph.key}"
38
+ next unless execute
39
+
40
+ collection = client.collections.find { |c| c.name == id_graph.key }
41
+ collection.drop if drop_existing && !collection.nil?
42
+ client[id_graph.key].create unless skip_existing
43
+ end
44
+
45
+ # Create any necessary indexes on the ID graphs
46
+ index_keys = []
47
+ @indexes.sort_by do |index|
48
+ -(index.hash_fields.to_a + index.order_fields).length
49
+ end.each do |index|
50
+ # Check if we already have a prefix of this index created
51
+ keys = index.hash_fields.to_a + index.order_fields
52
+ next if index_keys.any? { |i| i[keys.length - 1] == keys }
53
+ index_keys << keys
54
+
55
+ id_graph = index.to_id_graph
56
+ next if id_graph == index
57
+
58
+ # Combine the key paths for all fields to create a compound index
59
+ index_spec = Hash[keys.map do |key|
60
+ [self.class.field_path(index, key).join('.'), 1]
61
+ end]
62
+
63
+ ddl << "Add index #{index_spec} to #{id_graph.key} (#{index.key})"
64
+ next unless execute
65
+
66
+ client[id_graph.key].indexes.create_one index_spec
67
+ end
68
+
69
+ ddl
70
+ end
71
+
72
+ # Insert a chunk of rows into an index
73
+ # @return [Array<BSON::ObjectId>]
74
+ def index_insert_chunk(index, chunk)
75
+ # We only need to insert into indexes which are ID graphs
76
+ fail unless index == index.to_id_graph
77
+
78
+ chunk.map! do |row|
79
+ row_hash = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
80
+ index.all_fields.each do |field|
81
+ field_path = self.class.field_path(index, field)
82
+ entity_hash = field_path[0..-2].reduce(row_hash) { |h, k| h[k] }
83
+
84
+ if field_path.last == '_id'
85
+ entity_hash[field_path.last] = BSON::ObjectId.new
86
+ else
87
+ entity_hash[field_path.last] = row[field.id]
88
+ end
89
+ end
90
+
91
+ row_hash.default_proc = nil
92
+ row_hash
93
+ end
94
+
95
+ client[index.key].insert_many(chunk, ordered: false).inserted_ids
96
+ end
97
+
98
+ # Sample a number of values from the given index
99
+ def index_sample(index, count)
100
+ rows = client[index.to_id_graph.key].aggregate(
101
+ [
102
+ { '$sample' => { 'size' => count } }
103
+ ]
104
+ ).to_a
105
+
106
+ MongoBackend.rows_from_mongo rows, index
107
+ end
108
+
109
+ # Convert documens returned from MongoDB into the format we understand
110
+ # @return [Array<Hash>]
111
+ def self.rows_from_mongo(rows, index, fields = nil)
112
+ fields = index.all_fields if fields.nil?
113
+
114
+ rows.map! do |row|
115
+ Hash[fields.map do |field|
116
+ field_path = MongoBackend.field_path(index, field)
117
+ [field.id, field_path.reduce(row) { |h, p| h[p] }]
118
+ end]
119
+ end
120
+ end
121
+
122
+ # Find the path to a given field
123
+ # @return [Array<String>]
124
+ def self.field_path(index, field)
125
+ # Find the path from the hash entity to the given key
126
+ field_path = index.graph.path_between index.hash_fields.first.parent,
127
+ field.parent
128
+ field_path = field_path.path_for_field(field)
129
+
130
+ # Use _id for any primary keys
131
+ field_path[-1] = '_id' if field.is_a? Fields::IDField
132
+
133
+ field_path
134
+ end
135
+
136
+ # Insert data into an index on the backend
137
+ class InsertStatementStep < BackendBase::InsertStatementStep
138
+ def initialize(client, index, fields)
139
+ super
140
+
141
+ @fields = fields.map(&:id) & index.all_fields.map(&:id)
142
+ end
143
+
144
+ # Insert each row into the index
145
+ def process(results)
146
+ results.each do |result|
147
+ values = Hash[@index.all_fields.map do |field|
148
+ next unless result.key? field.id
149
+ value = result[field.id]
150
+
151
+ # If this is an ID, generate or construct an ObjectId
152
+ if field.is_a?(Fields::IDField)
153
+ value = if value.nil?
154
+ BSON::ObjectId.new
155
+ else
156
+ BSON::ObjectId.from_string(value)
157
+ end
158
+ end
159
+ [MongoBackend.field_path(@index, field).join('.'), value]
160
+ end.compact]
161
+
162
+ @client[@index.to_id_graph.key].update_one(
163
+ { '_id' => values['_id'] },
164
+ { '$set' => values },
165
+ upsert: true
166
+ )
167
+ end
168
+ end
169
+ end
170
+
171
+ # A query step to look up data from a particular collection
172
+ class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
173
+ # rubocop:disable Metrics/ParameterLists
174
+ def initialize(client, select, conditions, step, next_step, prev_step)
175
+ super
176
+
177
+ @logger = Logging.logger['nose::backend::mongo::indexlookupstep']
178
+ @order = @step.order_by.map do |field|
179
+ { MongoBackend.field_path(@index, field).join('.') => 1 }
180
+ end
181
+ end
182
+ # rubocop:enable Metrics/ParameterLists
183
+
184
+ # Perform a column family lookup in MongoDB
185
+ def process(conditions, results)
186
+ results = initial_results(conditions) if results.nil?
187
+ condition_list = result_conditions conditions, results
188
+
189
+ new_result = condition_list.flat_map do |result_conditions|
190
+ query_doc = query_doc_for_conditions result_conditions
191
+ result = @client[@index.to_id_graph.key].find(query_doc)
192
+ result = result.sort(*@order) unless @order.empty?
193
+
194
+ result.to_a
195
+ end
196
+
197
+ # Limit the size of the results in case we fetched multiple keys
198
+ new_result = new_result[0..(@step.limit.nil? ? -1 : @step.limit)]
199
+ MongoBackend.rows_from_mongo new_result, @index, @step.fields
200
+ end
201
+
202
+ private
203
+
204
+ # Produce the document used to issue the query to MongoDB
205
+ # @return [Hash]
206
+ def query_doc_for_conditions(conditions)
207
+ conditions.map do |c|
208
+ match = c.value
209
+ match = BSON::ObjectId(match) if c.field.is_a? Fields::IDField
210
+
211
+ # For range operators, find the corresponding MongoDB operator
212
+ match = { mongo_operator(op) => match } if c.operator != :'='
213
+
214
+ { MongoBackend.field_path(@index, c.field).join('.') => match }
215
+ end.reduce(&:merge)
216
+ end
217
+
218
+ # Produce the comparison operator used in MongoDB
219
+ # @return [String]
220
+ def mongo_operator(operator)
221
+ case operator
222
+ when :>
223
+ '$gt'
224
+ when :>=
225
+ '$gte'
226
+ when :<
227
+ '$lt'
228
+ when :<=
229
+ '$lte'
230
+ end
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ # Create a Mongo client from the saved config
237
+ def client
238
+ @client ||= Mongo::Client.new @uri, database: @database
239
+ end
240
+ end
241
+ end
242
+ end