nose 0.1.0pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/nose/backend/cassandra.rb +390 -0
- data/lib/nose/backend/file.rb +185 -0
- data/lib/nose/backend/mongo.rb +242 -0
- data/lib/nose/backend.rb +557 -0
- data/lib/nose/cost/cassandra.rb +33 -0
- data/lib/nose/cost/entity_count.rb +27 -0
- data/lib/nose/cost/field_size.rb +31 -0
- data/lib/nose/cost/request_count.rb +32 -0
- data/lib/nose/cost.rb +68 -0
- data/lib/nose/debug.rb +45 -0
- data/lib/nose/enumerator.rb +199 -0
- data/lib/nose/indexes.rb +239 -0
- data/lib/nose/loader/csv.rb +99 -0
- data/lib/nose/loader/mysql.rb +199 -0
- data/lib/nose/loader/random.rb +48 -0
- data/lib/nose/loader/sql.rb +105 -0
- data/lib/nose/loader.rb +38 -0
- data/lib/nose/model/entity.rb +136 -0
- data/lib/nose/model/fields.rb +293 -0
- data/lib/nose/model.rb +113 -0
- data/lib/nose/parser.rb +202 -0
- data/lib/nose/plans/execution_plan.rb +282 -0
- data/lib/nose/plans/filter.rb +99 -0
- data/lib/nose/plans/index_lookup.rb +302 -0
- data/lib/nose/plans/limit.rb +42 -0
- data/lib/nose/plans/query_planner.rb +361 -0
- data/lib/nose/plans/sort.rb +49 -0
- data/lib/nose/plans/update.rb +60 -0
- data/lib/nose/plans/update_planner.rb +270 -0
- data/lib/nose/plans.rb +135 -0
- data/lib/nose/proxy/mysql.rb +275 -0
- data/lib/nose/proxy.rb +102 -0
- data/lib/nose/query_graph.rb +481 -0
- data/lib/nose/random/barbasi_albert.rb +48 -0
- data/lib/nose/random/watts_strogatz.rb +50 -0
- data/lib/nose/random.rb +391 -0
- data/lib/nose/schema.rb +89 -0
- data/lib/nose/search/constraints.rb +143 -0
- data/lib/nose/search/problem.rb +328 -0
- data/lib/nose/search/results.rb +200 -0
- data/lib/nose/search.rb +266 -0
- data/lib/nose/serialize.rb +747 -0
- data/lib/nose/statements/connection.rb +160 -0
- data/lib/nose/statements/delete.rb +83 -0
- data/lib/nose/statements/insert.rb +146 -0
- data/lib/nose/statements/query.rb +161 -0
- data/lib/nose/statements/update.rb +101 -0
- data/lib/nose/statements.rb +645 -0
- data/lib/nose/timing.rb +79 -0
- data/lib/nose/util.rb +305 -0
- data/lib/nose/workload.rb +244 -0
- data/lib/nose.rb +37 -0
- data/templates/workload.erb +42 -0
- metadata +700 -0
@@ -0,0 +1,242 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'mongo'
|
4
|
+
|
5
|
+
module NoSE
|
6
|
+
module Backend
|
7
|
+
# A backend which communicates with MongoDB
|
8
|
+
class MongoBackend < BackendBase
|
9
|
+
def initialize(model, indexes, plans, update_plans, config)
|
10
|
+
super
|
11
|
+
|
12
|
+
@uri = config[:uri]
|
13
|
+
@database = config[:database]
|
14
|
+
Mongo::Logger.logger.level = ::Logger::FATAL
|
15
|
+
end
|
16
|
+
|
17
|
+
# MongoDB uses ID graphs for column families
|
18
|
+
# @return [Boolean]
|
19
|
+
def by_id_graph
|
20
|
+
true
|
21
|
+
end
|
22
|
+
|
23
|
+
# Produce a new ObjectId
|
24
|
+
# @return [BSON::ObjectId]
|
25
|
+
def generate_id
|
26
|
+
BSON::ObjectId.new
|
27
|
+
end
|
28
|
+
|
29
|
+
# Create new MongoDB collections for each index
|
30
|
+
def indexes_ddl(execute = false, skip_existing = false,
|
31
|
+
drop_existing = false)
|
32
|
+
ddl = []
|
33
|
+
|
34
|
+
# Create the ID graphs for all indexes
|
35
|
+
id_graphs = @indexes.map(&:to_id_graph).uniq
|
36
|
+
id_graphs.map do |id_graph|
|
37
|
+
ddl << "Create #{id_graph.key}"
|
38
|
+
next unless execute
|
39
|
+
|
40
|
+
collection = client.collections.find { |c| c.name == id_graph.key }
|
41
|
+
collection.drop if drop_existing && !collection.nil?
|
42
|
+
client[id_graph.key].create unless skip_existing
|
43
|
+
end
|
44
|
+
|
45
|
+
# Create any necessary indexes on the ID graphs
|
46
|
+
index_keys = []
|
47
|
+
@indexes.sort_by do |index|
|
48
|
+
-(index.hash_fields.to_a + index.order_fields).length
|
49
|
+
end.each do |index|
|
50
|
+
# Check if we already have a prefix of this index created
|
51
|
+
keys = index.hash_fields.to_a + index.order_fields
|
52
|
+
next if index_keys.any? { |i| i[keys.length - 1] == keys }
|
53
|
+
index_keys << keys
|
54
|
+
|
55
|
+
id_graph = index.to_id_graph
|
56
|
+
next if id_graph == index
|
57
|
+
|
58
|
+
# Combine the key paths for all fields to create a compound index
|
59
|
+
index_spec = Hash[keys.map do |key|
|
60
|
+
[self.class.field_path(index, key).join('.'), 1]
|
61
|
+
end]
|
62
|
+
|
63
|
+
ddl << "Add index #{index_spec} to #{id_graph.key} (#{index.key})"
|
64
|
+
next unless execute
|
65
|
+
|
66
|
+
client[id_graph.key].indexes.create_one index_spec
|
67
|
+
end
|
68
|
+
|
69
|
+
ddl
|
70
|
+
end
|
71
|
+
|
72
|
+
# Insert a chunk of rows into an index
|
73
|
+
# @return [Array<BSON::ObjectId>]
|
74
|
+
def index_insert_chunk(index, chunk)
|
75
|
+
# We only need to insert into indexes which are ID graphs
|
76
|
+
fail unless index == index.to_id_graph
|
77
|
+
|
78
|
+
chunk.map! do |row|
|
79
|
+
row_hash = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
|
80
|
+
index.all_fields.each do |field|
|
81
|
+
field_path = self.class.field_path(index, field)
|
82
|
+
entity_hash = field_path[0..-2].reduce(row_hash) { |h, k| h[k] }
|
83
|
+
|
84
|
+
if field_path.last == '_id'
|
85
|
+
entity_hash[field_path.last] = BSON::ObjectId.new
|
86
|
+
else
|
87
|
+
entity_hash[field_path.last] = row[field.id]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
row_hash.default_proc = nil
|
92
|
+
row_hash
|
93
|
+
end
|
94
|
+
|
95
|
+
client[index.key].insert_many(chunk, ordered: false).inserted_ids
|
96
|
+
end
|
97
|
+
|
98
|
+
# Sample a number of values from the given index
|
99
|
+
def index_sample(index, count)
|
100
|
+
rows = client[index.to_id_graph.key].aggregate(
|
101
|
+
[
|
102
|
+
{ '$sample' => { 'size' => count } }
|
103
|
+
]
|
104
|
+
).to_a
|
105
|
+
|
106
|
+
MongoBackend.rows_from_mongo rows, index
|
107
|
+
end
|
108
|
+
|
109
|
+
# Convert documens returned from MongoDB into the format we understand
|
110
|
+
# @return [Array<Hash>]
|
111
|
+
def self.rows_from_mongo(rows, index, fields = nil)
|
112
|
+
fields = index.all_fields if fields.nil?
|
113
|
+
|
114
|
+
rows.map! do |row|
|
115
|
+
Hash[fields.map do |field|
|
116
|
+
field_path = MongoBackend.field_path(index, field)
|
117
|
+
[field.id, field_path.reduce(row) { |h, p| h[p] }]
|
118
|
+
end]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Find the path to a given field
|
123
|
+
# @return [Array<String>]
|
124
|
+
def self.field_path(index, field)
|
125
|
+
# Find the path from the hash entity to the given key
|
126
|
+
field_path = index.graph.path_between index.hash_fields.first.parent,
|
127
|
+
field.parent
|
128
|
+
field_path = field_path.path_for_field(field)
|
129
|
+
|
130
|
+
# Use _id for any primary keys
|
131
|
+
field_path[-1] = '_id' if field.is_a? Fields::IDField
|
132
|
+
|
133
|
+
field_path
|
134
|
+
end
|
135
|
+
|
136
|
+
# Insert data into an index on the backend
|
137
|
+
class InsertStatementStep < BackendBase::InsertStatementStep
|
138
|
+
def initialize(client, index, fields)
|
139
|
+
super
|
140
|
+
|
141
|
+
@fields = fields.map(&:id) & index.all_fields.map(&:id)
|
142
|
+
end
|
143
|
+
|
144
|
+
# Insert each row into the index
|
145
|
+
def process(results)
|
146
|
+
results.each do |result|
|
147
|
+
values = Hash[@index.all_fields.map do |field|
|
148
|
+
next unless result.key? field.id
|
149
|
+
value = result[field.id]
|
150
|
+
|
151
|
+
# If this is an ID, generate or construct an ObjectId
|
152
|
+
if field.is_a?(Fields::IDField)
|
153
|
+
value = if value.nil?
|
154
|
+
BSON::ObjectId.new
|
155
|
+
else
|
156
|
+
BSON::ObjectId.from_string(value)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
[MongoBackend.field_path(@index, field).join('.'), value]
|
160
|
+
end.compact]
|
161
|
+
|
162
|
+
@client[@index.to_id_graph.key].update_one(
|
163
|
+
{ '_id' => values['_id'] },
|
164
|
+
{ '$set' => values },
|
165
|
+
upsert: true
|
166
|
+
)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# A query step to look up data from a particular collection
|
172
|
+
class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
|
173
|
+
# rubocop:disable Metrics/ParameterLists
|
174
|
+
def initialize(client, select, conditions, step, next_step, prev_step)
|
175
|
+
super
|
176
|
+
|
177
|
+
@logger = Logging.logger['nose::backend::mongo::indexlookupstep']
|
178
|
+
@order = @step.order_by.map do |field|
|
179
|
+
{ MongoBackend.field_path(@index, field).join('.') => 1 }
|
180
|
+
end
|
181
|
+
end
|
182
|
+
# rubocop:enable Metrics/ParameterLists
|
183
|
+
|
184
|
+
# Perform a column family lookup in MongoDB
|
185
|
+
def process(conditions, results)
|
186
|
+
results = initial_results(conditions) if results.nil?
|
187
|
+
condition_list = result_conditions conditions, results
|
188
|
+
|
189
|
+
new_result = condition_list.flat_map do |result_conditions|
|
190
|
+
query_doc = query_doc_for_conditions result_conditions
|
191
|
+
result = @client[@index.to_id_graph.key].find(query_doc)
|
192
|
+
result = result.sort(*@order) unless @order.empty?
|
193
|
+
|
194
|
+
result.to_a
|
195
|
+
end
|
196
|
+
|
197
|
+
# Limit the size of the results in case we fetched multiple keys
|
198
|
+
new_result = new_result[0..(@step.limit.nil? ? -1 : @step.limit)]
|
199
|
+
MongoBackend.rows_from_mongo new_result, @index, @step.fields
|
200
|
+
end
|
201
|
+
|
202
|
+
private
|
203
|
+
|
204
|
+
# Produce the document used to issue the query to MongoDB
|
205
|
+
# @return [Hash]
|
206
|
+
def query_doc_for_conditions(conditions)
|
207
|
+
conditions.map do |c|
|
208
|
+
match = c.value
|
209
|
+
match = BSON::ObjectId(match) if c.field.is_a? Fields::IDField
|
210
|
+
|
211
|
+
# For range operators, find the corresponding MongoDB operator
|
212
|
+
match = { mongo_operator(op) => match } if c.operator != :'='
|
213
|
+
|
214
|
+
{ MongoBackend.field_path(@index, c.field).join('.') => match }
|
215
|
+
end.reduce(&:merge)
|
216
|
+
end
|
217
|
+
|
218
|
+
# Produce the comparison operator used in MongoDB
|
219
|
+
# @return [String]
|
220
|
+
def mongo_operator(operator)
|
221
|
+
case operator
|
222
|
+
when :>
|
223
|
+
'$gt'
|
224
|
+
when :>=
|
225
|
+
'$gte'
|
226
|
+
when :<
|
227
|
+
'$lt'
|
228
|
+
when :<=
|
229
|
+
'$lte'
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
private
|
235
|
+
|
236
|
+
# Create a Mongo client from the saved config
|
237
|
+
def client
|
238
|
+
@client ||= Mongo::Client.new @uri, database: @database
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|