nose 0.1.0pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,242 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mongo'
4
+
5
+ module NoSE
6
+ module Backend
7
+ # A backend which communicates with MongoDB
8
+ class MongoBackend < BackendBase
9
+ def initialize(model, indexes, plans, update_plans, config)
10
+ super
11
+
12
+ @uri = config[:uri]
13
+ @database = config[:database]
14
+ Mongo::Logger.logger.level = ::Logger::FATAL
15
+ end
16
+
17
+ # MongoDB uses ID graphs for column families
18
+ # @return [Boolean]
19
+ def by_id_graph
20
+ true
21
+ end
22
+
23
+ # Produce a new ObjectId
24
+ # @return [BSON::ObjectId]
25
+ def generate_id
26
+ BSON::ObjectId.new
27
+ end
28
+
29
+ # Create new MongoDB collections for each index
30
+ def indexes_ddl(execute = false, skip_existing = false,
31
+ drop_existing = false)
32
+ ddl = []
33
+
34
+ # Create the ID graphs for all indexes
35
+ id_graphs = @indexes.map(&:to_id_graph).uniq
36
+ id_graphs.map do |id_graph|
37
+ ddl << "Create #{id_graph.key}"
38
+ next unless execute
39
+
40
+ collection = client.collections.find { |c| c.name == id_graph.key }
41
+ collection.drop if drop_existing && !collection.nil?
42
+ client[id_graph.key].create unless skip_existing
43
+ end
44
+
45
+ # Create any necessary indexes on the ID graphs
46
+ index_keys = []
47
+ @indexes.sort_by do |index|
48
+ -(index.hash_fields.to_a + index.order_fields).length
49
+ end.each do |index|
50
+ # Check if we already have a prefix of this index created
51
+ keys = index.hash_fields.to_a + index.order_fields
52
+ next if index_keys.any? { |i| i[keys.length - 1] == keys }
53
+ index_keys << keys
54
+
55
+ id_graph = index.to_id_graph
56
+ next if id_graph == index
57
+
58
+ # Combine the key paths for all fields to create a compound index
59
+ index_spec = Hash[keys.map do |key|
60
+ [self.class.field_path(index, key).join('.'), 1]
61
+ end]
62
+
63
+ ddl << "Add index #{index_spec} to #{id_graph.key} (#{index.key})"
64
+ next unless execute
65
+
66
+ client[id_graph.key].indexes.create_one index_spec
67
+ end
68
+
69
+ ddl
70
+ end
71
+
72
+ # Insert a chunk of rows into an index
73
+ # @return [Array<BSON::ObjectId>]
74
+ def index_insert_chunk(index, chunk)
75
+ # We only need to insert into indexes which are ID graphs
76
+ fail unless index == index.to_id_graph
77
+
78
+ chunk.map! do |row|
79
+ row_hash = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
80
+ index.all_fields.each do |field|
81
+ field_path = self.class.field_path(index, field)
82
+ entity_hash = field_path[0..-2].reduce(row_hash) { |h, k| h[k] }
83
+
84
+ if field_path.last == '_id'
85
+ entity_hash[field_path.last] = BSON::ObjectId.new
86
+ else
87
+ entity_hash[field_path.last] = row[field.id]
88
+ end
89
+ end
90
+
91
+ row_hash.default_proc = nil
92
+ row_hash
93
+ end
94
+
95
+ client[index.key].insert_many(chunk, ordered: false).inserted_ids
96
+ end
97
+
98
+ # Sample a number of values from the given index
99
+ def index_sample(index, count)
100
+ rows = client[index.to_id_graph.key].aggregate(
101
+ [
102
+ { '$sample' => { 'size' => count } }
103
+ ]
104
+ ).to_a
105
+
106
+ MongoBackend.rows_from_mongo rows, index
107
+ end
108
+
109
+ # Convert documens returned from MongoDB into the format we understand
110
+ # @return [Array<Hash>]
111
+ def self.rows_from_mongo(rows, index, fields = nil)
112
+ fields = index.all_fields if fields.nil?
113
+
114
+ rows.map! do |row|
115
+ Hash[fields.map do |field|
116
+ field_path = MongoBackend.field_path(index, field)
117
+ [field.id, field_path.reduce(row) { |h, p| h[p] }]
118
+ end]
119
+ end
120
+ end
121
+
122
+ # Find the path to a given field
123
+ # @return [Array<String>]
124
+ def self.field_path(index, field)
125
+ # Find the path from the hash entity to the given key
126
+ field_path = index.graph.path_between index.hash_fields.first.parent,
127
+ field.parent
128
+ field_path = field_path.path_for_field(field)
129
+
130
+ # Use _id for any primary keys
131
+ field_path[-1] = '_id' if field.is_a? Fields::IDField
132
+
133
+ field_path
134
+ end
135
+
136
+ # Insert data into an index on the backend
137
+ class InsertStatementStep < BackendBase::InsertStatementStep
138
+ def initialize(client, index, fields)
139
+ super
140
+
141
+ @fields = fields.map(&:id) & index.all_fields.map(&:id)
142
+ end
143
+
144
+ # Insert each row into the index
145
+ def process(results)
146
+ results.each do |result|
147
+ values = Hash[@index.all_fields.map do |field|
148
+ next unless result.key? field.id
149
+ value = result[field.id]
150
+
151
+ # If this is an ID, generate or construct an ObjectId
152
+ if field.is_a?(Fields::IDField)
153
+ value = if value.nil?
154
+ BSON::ObjectId.new
155
+ else
156
+ BSON::ObjectId.from_string(value)
157
+ end
158
+ end
159
+ [MongoBackend.field_path(@index, field).join('.'), value]
160
+ end.compact]
161
+
162
+ @client[@index.to_id_graph.key].update_one(
163
+ { '_id' => values['_id'] },
164
+ { '$set' => values },
165
+ upsert: true
166
+ )
167
+ end
168
+ end
169
+ end
170
+
171
+ # A query step to look up data from a particular collection
172
+ class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
173
+ # rubocop:disable Metrics/ParameterLists
174
+ def initialize(client, select, conditions, step, next_step, prev_step)
175
+ super
176
+
177
+ @logger = Logging.logger['nose::backend::mongo::indexlookupstep']
178
+ @order = @step.order_by.map do |field|
179
+ { MongoBackend.field_path(@index, field).join('.') => 1 }
180
+ end
181
+ end
182
+ # rubocop:enable Metrics/ParameterLists
183
+
184
+ # Perform a column family lookup in MongoDB
185
+ def process(conditions, results)
186
+ results = initial_results(conditions) if results.nil?
187
+ condition_list = result_conditions conditions, results
188
+
189
+ new_result = condition_list.flat_map do |result_conditions|
190
+ query_doc = query_doc_for_conditions result_conditions
191
+ result = @client[@index.to_id_graph.key].find(query_doc)
192
+ result = result.sort(*@order) unless @order.empty?
193
+
194
+ result.to_a
195
+ end
196
+
197
+ # Limit the size of the results in case we fetched multiple keys
198
+ new_result = new_result[0..(@step.limit.nil? ? -1 : @step.limit)]
199
+ MongoBackend.rows_from_mongo new_result, @index, @step.fields
200
+ end
201
+
202
+ private
203
+
204
+ # Produce the document used to issue the query to MongoDB
205
+ # @return [Hash]
206
+ def query_doc_for_conditions(conditions)
207
+ conditions.map do |c|
208
+ match = c.value
209
+ match = BSON::ObjectId(match) if c.field.is_a? Fields::IDField
210
+
211
+ # For range operators, find the corresponding MongoDB operator
212
+ match = { mongo_operator(op) => match } if c.operator != :'='
213
+
214
+ { MongoBackend.field_path(@index, c.field).join('.') => match }
215
+ end.reduce(&:merge)
216
+ end
217
+
218
+ # Produce the comparison operator used in MongoDB
219
+ # @return [String]
220
+ def mongo_operator(operator)
221
+ case operator
222
+ when :>
223
+ '$gt'
224
+ when :>=
225
+ '$gte'
226
+ when :<
227
+ '$lt'
228
+ when :<=
229
+ '$lte'
230
+ end
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ # Create a Mongo client from the saved config
237
+ def client
238
+ @client ||= Mongo::Client.new @uri, database: @database
239
+ end
240
+ end
241
+ end
242
+ end