tigerbeetle-node 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +302 -101
- package/dist/index.d.ts +70 -72
- package/dist/index.js +70 -72
- package/dist/index.js.map +1 -1
- package/package.json +6 -6
- package/scripts/download_node_headers.sh +14 -7
- package/src/index.ts +6 -10
- package/src/node.zig +6 -3
- package/src/tigerbeetle/scripts/benchmark.sh +4 -4
- package/src/tigerbeetle/scripts/confirm_image.sh +44 -0
- package/src/tigerbeetle/scripts/fuzz_loop.sh +15 -0
- package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +7 -0
- package/src/tigerbeetle/scripts/install.sh +19 -4
- package/src/tigerbeetle/scripts/install_zig.bat +5 -1
- package/src/tigerbeetle/scripts/install_zig.sh +24 -14
- package/src/tigerbeetle/scripts/pre-commit.sh +9 -0
- package/src/tigerbeetle/scripts/shellcheck.sh +5 -0
- package/src/tigerbeetle/scripts/tests_on_alpine.sh +10 -0
- package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +14 -0
- package/src/tigerbeetle/src/benchmark.zig +4 -2
- package/src/tigerbeetle/src/benchmark_array_search.zig +3 -3
- package/src/tigerbeetle/src/c/tb_client/thread.zig +8 -9
- package/src/tigerbeetle/src/c/tb_client.h +100 -80
- package/src/tigerbeetle/src/c/tb_client.zig +4 -1
- package/src/tigerbeetle/src/cli.zig +1 -1
- package/src/tigerbeetle/src/config.zig +48 -16
- package/src/tigerbeetle/src/demo.zig +3 -1
- package/src/tigerbeetle/src/eytzinger_benchmark.zig +3 -3
- package/src/tigerbeetle/src/io/linux.zig +1 -1
- package/src/tigerbeetle/src/lsm/README.md +214 -0
- package/src/tigerbeetle/src/lsm/binary_search.zig +137 -10
- package/src/tigerbeetle/src/lsm/bloom_filter.zig +43 -0
- package/src/tigerbeetle/src/lsm/compaction.zig +352 -398
- package/src/tigerbeetle/src/lsm/composite_key.zig +2 -0
- package/src/tigerbeetle/src/lsm/eytzinger.zig +1 -1
- package/src/tigerbeetle/src/lsm/forest.zig +21 -447
- package/src/tigerbeetle/src/lsm/forest_fuzz.zig +412 -0
- package/src/tigerbeetle/src/lsm/grid.zig +145 -69
- package/src/tigerbeetle/src/lsm/groove.zig +196 -133
- package/src/tigerbeetle/src/lsm/k_way_merge.zig +40 -18
- package/src/tigerbeetle/src/lsm/level_iterator.zig +28 -9
- package/src/tigerbeetle/src/lsm/manifest.zig +81 -181
- package/src/tigerbeetle/src/lsm/manifest_level.zig +210 -454
- package/src/tigerbeetle/src/lsm/manifest_log.zig +77 -28
- package/src/tigerbeetle/src/lsm/posted_groove.zig +64 -76
- package/src/tigerbeetle/src/lsm/segmented_array.zig +561 -241
- package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +148 -0
- package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +9 -0
- package/src/tigerbeetle/src/lsm/set_associative_cache.zig +62 -12
- package/src/tigerbeetle/src/lsm/table.zig +83 -48
- package/src/tigerbeetle/src/lsm/table_immutable.zig +30 -23
- package/src/tigerbeetle/src/lsm/table_iterator.zig +25 -14
- package/src/tigerbeetle/src/lsm/table_mutable.zig +63 -12
- package/src/tigerbeetle/src/lsm/test.zig +49 -55
- package/src/tigerbeetle/src/lsm/tree.zig +407 -402
- package/src/tigerbeetle/src/lsm/tree_fuzz.zig +457 -0
- package/src/tigerbeetle/src/main.zig +28 -6
- package/src/tigerbeetle/src/message_bus.zig +2 -2
- package/src/tigerbeetle/src/message_pool.zig +14 -17
- package/src/tigerbeetle/src/simulator.zig +145 -112
- package/src/tigerbeetle/src/state_machine.zig +338 -228
- package/src/tigerbeetle/src/static_allocator.zig +65 -0
- package/src/tigerbeetle/src/storage.zig +3 -7
- package/src/tigerbeetle/src/test/accounting/auditor.zig +577 -0
- package/src/tigerbeetle/src/test/accounting/workload.zig +819 -0
- package/src/tigerbeetle/src/test/cluster.zig +18 -48
- package/src/tigerbeetle/src/test/conductor.zig +365 -0
- package/src/tigerbeetle/src/test/fuzz.zig +121 -0
- package/src/tigerbeetle/src/test/id.zig +89 -0
- package/src/tigerbeetle/src/test/priority_queue.zig +645 -0
- package/src/tigerbeetle/src/test/state_checker.zig +93 -69
- package/src/tigerbeetle/src/test/state_machine.zig +11 -35
- package/src/tigerbeetle/src/test/storage.zig +29 -8
- package/src/tigerbeetle/src/tigerbeetle.zig +14 -16
- package/src/tigerbeetle/src/unit_tests.zig +7 -0
- package/src/tigerbeetle/src/vopr.zig +494 -0
- package/src/tigerbeetle/src/vopr_hub/README.md +58 -0
- package/src/tigerbeetle/src/vopr_hub/SETUP.md +199 -0
- package/src/tigerbeetle/src/vopr_hub/go.mod +3 -0
- package/src/tigerbeetle/src/vopr_hub/main.go +1022 -0
- package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +3 -0
- package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +403 -0
- package/src/tigerbeetle/src/vsr/client.zig +13 -0
- package/src/tigerbeetle/src/vsr/journal.zig +16 -13
- package/src/tigerbeetle/src/vsr/replica.zig +924 -491
- package/src/tigerbeetle/src/vsr/superblock.zig +55 -37
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -10
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +2 -2
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +18 -3
- package/src/tigerbeetle/src/vsr.zig +75 -55
- package/src/tigerbeetle/scripts/vopr.bat +0 -48
- package/src/tigerbeetle/scripts/vopr.sh +0 -33
|
@@ -13,6 +13,7 @@ const CompositeKey = @import("composite_key.zig").CompositeKey;
|
|
|
13
13
|
const NodePool = @import("node_pool.zig").NodePool(config.lsm_manifest_node_size, 16);
|
|
14
14
|
|
|
15
15
|
const snapshot_latest = @import("tree.zig").snapshot_latest;
|
|
16
|
+
const compaction_snapshot_for_op = @import("tree.zig").compaction_snapshot_for_op;
|
|
16
17
|
|
|
17
18
|
fn ObjectTreeHelpers(comptime Object: type) type {
|
|
18
19
|
assert(@hasField(Object, "id"));
|
|
@@ -49,6 +50,12 @@ const IdTreeValue = extern struct {
|
|
|
49
50
|
timestamp: u64,
|
|
50
51
|
padding: u64 = 0,
|
|
51
52
|
|
|
53
|
+
comptime {
|
|
54
|
+
// Assert that there is no implicit padding.
|
|
55
|
+
assert(@sizeOf(IdTreeValue) == 32);
|
|
56
|
+
assert(@bitSizeOf(IdTreeValue) == 32 * 8);
|
|
57
|
+
}
|
|
58
|
+
|
|
52
59
|
inline fn compare_keys(a: u128, b: u128) std.math.Order {
|
|
53
60
|
return std.math.order(a, b);
|
|
54
61
|
}
|
|
@@ -78,7 +85,7 @@ fn IndexCompositeKeyType(comptime Field: type) type {
|
|
|
78
85
|
.Enum => |e| {
|
|
79
86
|
return switch (@bitSizeOf(e.tag_type)) {
|
|
80
87
|
0...@bitSizeOf(u64) => u64,
|
|
81
|
-
@bitSizeOf(
|
|
88
|
+
@bitSizeOf(u65)...@bitSizeOf(u128) => u128,
|
|
82
89
|
else => @compileError("Unsupported enum tag for index: " ++ @typeName(e.tag_type)),
|
|
83
90
|
};
|
|
84
91
|
},
|
|
@@ -88,7 +95,7 @@ fn IndexCompositeKeyType(comptime Field: type) type {
|
|
|
88
95
|
}
|
|
89
96
|
return switch (@bitSizeOf(Field)) {
|
|
90
97
|
0...@bitSizeOf(u64) => u64,
|
|
91
|
-
@bitSizeOf(
|
|
98
|
+
@bitSizeOf(u65)...@bitSizeOf(u128) => u128,
|
|
92
99
|
else => @compileError("Unsupported int type for index: " ++ @typeName(Field)),
|
|
93
100
|
};
|
|
94
101
|
},
|
|
@@ -131,6 +138,9 @@ fn IndexTreeType(
|
|
|
131
138
|
|
|
132
139
|
/// A Groove is a collection of LSM trees auto generated for fields on a struct type
|
|
133
140
|
/// as well as custom derived fields from said struct type.
|
|
141
|
+
///
|
|
142
|
+
/// Invariants:
|
|
143
|
+
/// - Between beats, all of a groove's trees share the same lookup_snapshot_max.
|
|
134
144
|
pub fn GrooveType(
|
|
135
145
|
comptime Storage: type,
|
|
136
146
|
comptime Object: type,
|
|
@@ -142,7 +152,7 @@ pub fn GrooveType(
|
|
|
142
152
|
/// - derived: { .field = fn (*const Object) ?DerivedType }:
|
|
143
153
|
/// An anonymous struct which contain fields that don't exist on the Object
|
|
144
154
|
/// but can be derived from an Object instance using the field's corresponding function.
|
|
145
|
-
comptime
|
|
155
|
+
comptime groove_options: anytype,
|
|
146
156
|
) type {
|
|
147
157
|
@setEvalBranchQuota(64000);
|
|
148
158
|
|
|
@@ -154,13 +164,13 @@ pub fn GrooveType(
|
|
|
154
164
|
comptime var index_fields: []const std.builtin.TypeInfo.StructField = &.{};
|
|
155
165
|
|
|
156
166
|
// Generate index LSM trees from the struct fields.
|
|
157
|
-
|
|
167
|
+
for (std.meta.fields(Object)) |field| {
|
|
158
168
|
// See if we should ignore this field from the options.
|
|
159
169
|
//
|
|
160
170
|
// By default, we ignore the "timestamp" field since it's a special identifier.
|
|
161
|
-
// Since the "timestamp" is ignored by default, it shouldn't be provided in
|
|
171
|
+
// Since the "timestamp" is ignored by default, it shouldn't be provided in groove_options.ignored.
|
|
162
172
|
comptime var ignored = mem.eql(u8, field.name, "timestamp") or mem.eql(u8, field.name, "id");
|
|
163
|
-
|
|
173
|
+
for (groove_options.ignored) |ignored_field_name| {
|
|
164
174
|
comptime assert(!std.mem.eql(u8, ignored_field_name, "timestamp"));
|
|
165
175
|
comptime assert(!std.mem.eql(u8, ignored_field_name, "id"));
|
|
166
176
|
ignored = ignored or std.mem.eql(u8, field.name, ignored_field_name);
|
|
@@ -181,11 +191,11 @@ pub fn GrooveType(
|
|
|
181
191
|
}
|
|
182
192
|
}
|
|
183
193
|
|
|
184
|
-
//
|
|
185
|
-
const derived_fields = std.meta.fields(@TypeOf(
|
|
186
|
-
|
|
194
|
+
// Generate IndexTrees for fields derived from the Value in groove_options.
|
|
195
|
+
const derived_fields = std.meta.fields(@TypeOf(groove_options.derived));
|
|
196
|
+
for (derived_fields) |field| {
|
|
187
197
|
// Get the function info for the derived field.
|
|
188
|
-
const derive_func = @field(
|
|
198
|
+
const derive_func = @field(groove_options.derived, field.name);
|
|
189
199
|
const derive_func_info = @typeInfo(@TypeOf(derive_func)).Fn;
|
|
190
200
|
|
|
191
201
|
// Make sure it has only one argument.
|
|
@@ -221,6 +231,20 @@ pub fn GrooveType(
|
|
|
221
231
|
};
|
|
222
232
|
}
|
|
223
233
|
|
|
234
|
+
comptime var index_options_fields: []const std.builtin.TypeInfo.StructField = &.{};
|
|
235
|
+
for (index_fields) |index_field| {
|
|
236
|
+
const IndexTree = index_field.field_type;
|
|
237
|
+
index_options_fields = index_options_fields ++ [_]std.builtin.TypeInfo.StructField{
|
|
238
|
+
.{
|
|
239
|
+
.name = index_field.name,
|
|
240
|
+
.field_type = IndexTree.Options,
|
|
241
|
+
.default_value = null,
|
|
242
|
+
.is_comptime = false,
|
|
243
|
+
.alignment = @alignOf(IndexTree.Options),
|
|
244
|
+
},
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
|
|
224
248
|
const ObjectTree = blk: {
|
|
225
249
|
const Table = TableType(
|
|
226
250
|
u64, // key = timestamp
|
|
@@ -259,6 +283,14 @@ pub fn GrooveType(
|
|
|
259
283
|
.is_tuple = false,
|
|
260
284
|
},
|
|
261
285
|
});
|
|
286
|
+
const IndexTreeOptions = @Type(.{
|
|
287
|
+
.Struct = .{
|
|
288
|
+
.layout = .Auto,
|
|
289
|
+
.fields = index_options_fields,
|
|
290
|
+
.decls = &.{},
|
|
291
|
+
.is_tuple = false,
|
|
292
|
+
},
|
|
293
|
+
});
|
|
262
294
|
|
|
263
295
|
// Verify no hash collisions between all the trees:
|
|
264
296
|
comptime var hashes: []const u128 = &.{ObjectTree.hash};
|
|
@@ -267,19 +299,20 @@ pub fn GrooveType(
|
|
|
267
299
|
const IndexTree = @TypeOf(@field(@as(IndexTrees, undefined), field.name));
|
|
268
300
|
const hash: []const u128 = &.{IndexTree.hash};
|
|
269
301
|
|
|
270
|
-
assert(std.mem.
|
|
302
|
+
assert(std.mem.indexOf(u128, hashes, hash) == null);
|
|
271
303
|
hashes = hashes ++ hash;
|
|
272
304
|
}
|
|
273
305
|
|
|
274
306
|
// Verify groove index count:
|
|
275
307
|
const indexes_count_actual = std.meta.fields(IndexTrees).len;
|
|
276
308
|
const indexes_count_expect = std.meta.fields(Object).len -
|
|
277
|
-
|
|
309
|
+
groove_options.ignored.len -
|
|
278
310
|
// The id/timestamp field is implicitly ignored since it's the primary key for ObjectTree:
|
|
279
311
|
2 +
|
|
280
|
-
std.meta.fields(@TypeOf(
|
|
312
|
+
std.meta.fields(@TypeOf(groove_options.derived)).len;
|
|
281
313
|
|
|
282
314
|
assert(indexes_count_actual == indexes_count_expect);
|
|
315
|
+
assert(indexes_count_actual == std.meta.fields(IndexTreeOptions).len);
|
|
283
316
|
|
|
284
317
|
// Generate a helper function for interacting with an Index field type.
|
|
285
318
|
const IndexTreeFieldHelperType = struct {
|
|
@@ -298,7 +331,7 @@ pub fn GrooveType(
|
|
|
298
331
|
return @TypeOf(@field(@as(Object, undefined), field_name));
|
|
299
332
|
}
|
|
300
333
|
|
|
301
|
-
const derived_fn = @TypeOf(@field(
|
|
334
|
+
const derived_fn = @TypeOf(@field(groove_options.derived, field_name));
|
|
302
335
|
return @typeInfo(derived_fn).Fn.return_type.?.Optional.child;
|
|
303
336
|
}
|
|
304
337
|
|
|
@@ -309,7 +342,7 @@ pub fn GrooveType(
|
|
|
309
342
|
/// Try to extract an index from the object, deriving it when necessary.
|
|
310
343
|
pub fn derive_index(object: *const Object) ?Index {
|
|
311
344
|
if (comptime is_derived(field_name)) {
|
|
312
|
-
return @field(
|
|
345
|
+
return @field(groove_options.derived, field_name)(object);
|
|
313
346
|
} else {
|
|
314
347
|
return @field(object, field_name);
|
|
315
348
|
}
|
|
@@ -371,10 +404,10 @@ pub fn GrooveType(
|
|
|
371
404
|
join_pending: usize = 0,
|
|
372
405
|
join_callback: ?Callback = null,
|
|
373
406
|
|
|
374
|
-
objects_cache: *ObjectTree.
|
|
407
|
+
objects_cache: *ObjectTree.TableMutable.ValuesCache,
|
|
375
408
|
objects: ObjectTree,
|
|
376
409
|
|
|
377
|
-
ids_cache: *IdTree.
|
|
410
|
+
ids_cache: *IdTree.TableMutable.ValuesCache,
|
|
378
411
|
ids: IdTree,
|
|
379
412
|
|
|
380
413
|
indexes: IndexTrees,
|
|
@@ -391,41 +424,36 @@ pub fn GrooveType(
|
|
|
391
424
|
/// sufficient to query this hashmap alone to know the state of the LSM trees.
|
|
392
425
|
prefetch_objects: PrefetchObjects,
|
|
393
426
|
|
|
427
|
+
/// The snapshot to prefetch from.
|
|
428
|
+
prefetch_snapshot: ?u64,
|
|
429
|
+
|
|
430
|
+
pub const Options = struct {
|
|
431
|
+
/// TODO Improve unit in this name to make more clear what should be passed.
|
|
432
|
+
/// For example, is this a size in bytes or a count in objects? It's a count in objects,
|
|
433
|
+
/// but the name poorly reflects this.
|
|
434
|
+
cache_entries_max: u32,
|
|
435
|
+
/// The maximum number of objects that might be prefetched by a batch.
|
|
436
|
+
prefetch_entries_max: u32,
|
|
437
|
+
|
|
438
|
+
tree_options_object: ObjectTree.Options,
|
|
439
|
+
tree_options_id: IdTree.Options,
|
|
440
|
+
tree_options_index: IndexTreeOptions,
|
|
441
|
+
};
|
|
442
|
+
|
|
394
443
|
pub fn init(
|
|
395
444
|
allocator: mem.Allocator,
|
|
396
445
|
node_pool: *NodePool,
|
|
397
446
|
grid: *Grid,
|
|
398
|
-
|
|
399
|
-
// that tigerbeetle was given to allocate from CLI arguments.
|
|
400
|
-
// TODO Improve unit in this name to make more clear what should be passed.
|
|
401
|
-
// For example, is this a size in bytes or a count in objects? It's a count in objects,
|
|
402
|
-
// but the name poorly reflects this.
|
|
403
|
-
cache_size: u32,
|
|
404
|
-
// In general, the commit count max for a field, depends on the field's object,
|
|
405
|
-
// how many objects might be changed by a batch:
|
|
406
|
-
// (config.message_size_max - sizeOf(vsr.header))
|
|
407
|
-
// For example, there are at most 8191 transfers in a batch.
|
|
408
|
-
// So commit_count_max=8191 for transfer objects and indexes.
|
|
409
|
-
//
|
|
410
|
-
// However, if a transfer is ever mutated, then this will double commit_count_max
|
|
411
|
-
// since the old index might need to be removed, and the new index inserted.
|
|
412
|
-
//
|
|
413
|
-
// A way to see this is by looking at the state machine. If a transfer is inserted,
|
|
414
|
-
// how many accounts and transfer put/removes will be generated?
|
|
415
|
-
//
|
|
416
|
-
// This also means looking at the state machine operation that will generate the
|
|
417
|
-
// most put/removes in the worst case.
|
|
418
|
-
// For example, create_accounts will put at most 8191 accounts.
|
|
419
|
-
// However, create_transfers will put 2 accounts (8191 * 2) for every transfer, and
|
|
420
|
-
// some of these accounts may exist, requiring a remove/put to update the index.
|
|
421
|
-
commit_count_max: u32,
|
|
447
|
+
options: Options,
|
|
422
448
|
) !Groove {
|
|
423
|
-
// Cache is
|
|
424
|
-
const objects_cache = try allocator.create(ObjectTree.
|
|
449
|
+
// Cache is heap-allocated to pass a pointer into the Object tree.
|
|
450
|
+
const objects_cache = try allocator.create(ObjectTree.TableMutable.ValuesCache);
|
|
425
451
|
errdefer allocator.destroy(objects_cache);
|
|
426
452
|
|
|
427
|
-
objects_cache.* = .
|
|
428
|
-
|
|
453
|
+
objects_cache.* = try ObjectTree.TableMutable.ValuesCache.init(
|
|
454
|
+
allocator,
|
|
455
|
+
options.cache_entries_max,
|
|
456
|
+
);
|
|
429
457
|
errdefer objects_cache.deinit(allocator);
|
|
430
458
|
|
|
431
459
|
// Intialize the object LSM tree.
|
|
@@ -434,18 +462,15 @@ pub fn GrooveType(
|
|
|
434
462
|
node_pool,
|
|
435
463
|
grid,
|
|
436
464
|
objects_cache,
|
|
437
|
-
.
|
|
438
|
-
.commit_count_max = commit_count_max,
|
|
439
|
-
},
|
|
465
|
+
options.tree_options_object,
|
|
440
466
|
);
|
|
441
467
|
errdefer object_tree.deinit(allocator);
|
|
442
468
|
|
|
443
|
-
// Cache is
|
|
444
|
-
const ids_cache = try allocator.create(IdTree.
|
|
469
|
+
// Cache is heap-allocated to pass a pointer into the ID tree.
|
|
470
|
+
const ids_cache = try allocator.create(IdTree.TableMutable.ValuesCache);
|
|
445
471
|
errdefer allocator.destroy(ids_cache);
|
|
446
472
|
|
|
447
|
-
ids_cache.* = .
|
|
448
|
-
try ids_cache.ensureTotalCapacity(allocator, cache_size);
|
|
473
|
+
ids_cache.* = try IdTree.TableMutable.ValuesCache.init(allocator, options.cache_entries_max);
|
|
449
474
|
errdefer ids_cache.deinit(allocator);
|
|
450
475
|
|
|
451
476
|
var id_tree = try IdTree.init(
|
|
@@ -453,9 +478,7 @@ pub fn GrooveType(
|
|
|
453
478
|
node_pool,
|
|
454
479
|
grid,
|
|
455
480
|
ids_cache,
|
|
456
|
-
.
|
|
457
|
-
.commit_count_max = commit_count_max,
|
|
458
|
-
},
|
|
481
|
+
options.tree_options_id,
|
|
459
482
|
);
|
|
460
483
|
errdefer id_tree.deinit(allocator);
|
|
461
484
|
|
|
@@ -475,23 +498,18 @@ pub fn GrooveType(
|
|
|
475
498
|
allocator,
|
|
476
499
|
node_pool,
|
|
477
500
|
grid,
|
|
478
|
-
null, // No value cache for index trees.
|
|
479
|
-
.
|
|
480
|
-
.commit_count_max = commit_count_max,
|
|
481
|
-
},
|
|
501
|
+
null, // No value cache for index trees, since they only do range queries.
|
|
502
|
+
@field(options.tree_options_index, field.name),
|
|
482
503
|
);
|
|
483
504
|
index_trees_initialized += 1;
|
|
484
505
|
}
|
|
485
506
|
|
|
486
|
-
// TODO: document why this is twice the commit count max.
|
|
487
|
-
const prefetch_count_max = commit_count_max * 2;
|
|
488
|
-
|
|
489
507
|
var prefetch_ids = PrefetchIDs{};
|
|
490
|
-
try prefetch_ids.ensureTotalCapacity(allocator,
|
|
508
|
+
try prefetch_ids.ensureTotalCapacity(allocator, options.prefetch_entries_max);
|
|
491
509
|
errdefer prefetch_ids.deinit(allocator);
|
|
492
510
|
|
|
493
511
|
var prefetch_objects = PrefetchObjects{};
|
|
494
|
-
try prefetch_objects.ensureTotalCapacity(allocator,
|
|
512
|
+
try prefetch_objects.ensureTotalCapacity(allocator, options.prefetch_entries_max);
|
|
495
513
|
errdefer prefetch_objects.deinit(allocator);
|
|
496
514
|
|
|
497
515
|
return Groove{
|
|
@@ -505,14 +523,11 @@ pub fn GrooveType(
|
|
|
505
523
|
|
|
506
524
|
.prefetch_ids = prefetch_ids,
|
|
507
525
|
.prefetch_objects = prefetch_objects,
|
|
526
|
+
.prefetch_snapshot = null,
|
|
508
527
|
};
|
|
509
528
|
}
|
|
510
529
|
|
|
511
530
|
pub fn deinit(groove: *Groove, allocator: mem.Allocator) void {
|
|
512
|
-
assert(groove.join_op == null);
|
|
513
|
-
assert(groove.join_pending == 0);
|
|
514
|
-
assert(groove.join_callback == null);
|
|
515
|
-
|
|
516
531
|
inline for (std.meta.fields(IndexTrees)) |field| {
|
|
517
532
|
@field(groove.indexes, field.name).deinit(allocator);
|
|
518
533
|
}
|
|
@@ -535,10 +550,26 @@ pub fn GrooveType(
|
|
|
535
550
|
return groove.prefetch_objects.getKeyPtrAdapted(id, PrefetchObjectsAdapter{});
|
|
536
551
|
}
|
|
537
552
|
|
|
538
|
-
/// Must be called directly
|
|
539
|
-
///
|
|
540
|
-
pub fn
|
|
541
|
-
|
|
553
|
+
/// Must be called directly before the state machine begins queuing ids for prefetch.
|
|
554
|
+
/// When `snapshot` is null, prefetch from the current snapshot.
|
|
555
|
+
pub fn prefetch_setup(groove: *Groove, snapshot: ?u64) void {
|
|
556
|
+
// We may query the input tables of an ongoing compaction, but must not query the
|
|
557
|
+
// output tables until the compaction is complete. (Until then, the output tables may
|
|
558
|
+
// be in the manifest but not yet on disk).
|
|
559
|
+
const snapshot_max = groove.objects.lookup_snapshot_max;
|
|
560
|
+
assert(snapshot_max == groove.ids.lookup_snapshot_max);
|
|
561
|
+
|
|
562
|
+
const snapshot_target = snapshot orelse snapshot_max;
|
|
563
|
+
assert(snapshot_target <= snapshot_max);
|
|
564
|
+
|
|
565
|
+
if (groove.prefetch_snapshot == null) {
|
|
566
|
+
groove.prefetch_objects.clearRetainingCapacity();
|
|
567
|
+
} else {
|
|
568
|
+
// If there is a snapshot already set from the previous prefetch_setup(), then its
|
|
569
|
+
// prefetch() was never called, so there must already be no queued objects or ids.
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
groove.prefetch_snapshot = snapshot_target;
|
|
542
573
|
assert(groove.prefetch_objects.count() == 0);
|
|
543
574
|
assert(groove.prefetch_ids.count() == 0);
|
|
544
575
|
}
|
|
@@ -547,14 +578,21 @@ pub fn GrooveType(
|
|
|
547
578
|
/// We tolerate duplicate IDs enqueued by the state machine.
|
|
548
579
|
/// For example, if all unique operations require the same two dependencies.
|
|
549
580
|
pub fn prefetch_enqueue(groove: *Groove, id: u128) void {
|
|
550
|
-
if (groove.ids.
|
|
551
|
-
if (
|
|
552
|
-
|
|
553
|
-
assert(!ObjectTreeHelpers(Object).tombstone(object));
|
|
554
|
-
groove.prefetch_objects.putAssumeCapacity(object.*, {});
|
|
581
|
+
if (groove.ids.lookup_from_memory(groove.prefetch_snapshot.?, id)) |id_tree_value| {
|
|
582
|
+
if (id_tree_value.tombstone()) {
|
|
583
|
+
// Do nothing; an explicit ID tombstone indicates that the object was deleted.
|
|
555
584
|
} else {
|
|
556
|
-
|
|
557
|
-
|
|
585
|
+
if (groove.objects.lookup_from_memory(
|
|
586
|
+
groove.prefetch_snapshot.?,
|
|
587
|
+
id_tree_value.timestamp,
|
|
588
|
+
)) |object| {
|
|
589
|
+
assert(!ObjectTreeHelpers(Object).tombstone(object));
|
|
590
|
+
groove.prefetch_objects.putAssumeCapacity(object.*, {});
|
|
591
|
+
} else {
|
|
592
|
+
// The id was in the IdTree's value cache, but not in the ObjectTree's
|
|
593
|
+
// value cache.
|
|
594
|
+
groove.prefetch_ids.putAssumeCapacity(id, {});
|
|
595
|
+
}
|
|
558
596
|
}
|
|
559
597
|
} else {
|
|
560
598
|
groove.prefetch_ids.putAssumeCapacity(id, {});
|
|
@@ -562,8 +600,7 @@ pub fn GrooveType(
|
|
|
562
600
|
}
|
|
563
601
|
|
|
564
602
|
/// Ensure the objects corresponding to all ids enqueued with prefetch_enqueue() are
|
|
565
|
-
///
|
|
566
|
-
/// backup hash map.
|
|
603
|
+
/// available in `prefetch_objects`.
|
|
567
604
|
pub fn prefetch(
|
|
568
605
|
groove: *Groove,
|
|
569
606
|
callback: fn (*PrefetchContext) void,
|
|
@@ -572,14 +609,17 @@ pub fn GrooveType(
|
|
|
572
609
|
context.* = .{
|
|
573
610
|
.groove = groove,
|
|
574
611
|
.callback = callback,
|
|
612
|
+
.snapshot = groove.prefetch_snapshot.?,
|
|
575
613
|
.id_iterator = groove.prefetch_ids.keyIterator(),
|
|
576
614
|
};
|
|
615
|
+
groove.prefetch_snapshot = null;
|
|
577
616
|
context.start_workers();
|
|
578
617
|
}
|
|
579
618
|
|
|
580
619
|
pub const PrefetchContext = struct {
|
|
581
620
|
groove: *Groove,
|
|
582
621
|
callback: fn (*PrefetchContext) void,
|
|
622
|
+
snapshot: u64,
|
|
583
623
|
|
|
584
624
|
id_iterator: PrefetchIDs.KeyIterator,
|
|
585
625
|
|
|
@@ -595,17 +635,16 @@ pub fn GrooveType(
|
|
|
595
635
|
|
|
596
636
|
// Track an extra "worker" that will finish after the loop.
|
|
597
637
|
//
|
|
598
|
-
// This prevents `context.finish()` from being called within the loop body when
|
|
599
|
-
// worker finishes synchronously. `context.finish()`
|
|
600
|
-
//
|
|
638
|
+
// This prevents `context.finish()` from being called within the loop body when
|
|
639
|
+
// every worker finishes synchronously. `context.finish()` calls the user-provided
|
|
640
|
+
// callback which may re-use the memory of this `PrefetchContext`. However, we
|
|
641
|
+
// rely on `context` being well-defined for the loop condition.
|
|
601
642
|
context.workers_busy += 1;
|
|
602
643
|
|
|
603
|
-
|
|
604
|
-
while (context.workers_busy - 1 < context.workers.len) {
|
|
605
|
-
const worker = &context.workers[context.workers_busy - 1];
|
|
644
|
+
for (context.workers) |*worker| {
|
|
606
645
|
worker.* = .{ .context = context };
|
|
607
646
|
context.workers_busy += 1;
|
|
608
|
-
|
|
647
|
+
worker.lookup_start_next();
|
|
609
648
|
}
|
|
610
649
|
|
|
611
650
|
assert(context.workers_busy >= 1);
|
|
@@ -619,12 +658,12 @@ pub fn GrooveType(
|
|
|
619
658
|
|
|
620
659
|
fn finish(context: *PrefetchContext) void {
|
|
621
660
|
assert(context.workers_busy == 0);
|
|
622
|
-
|
|
661
|
+
|
|
623
662
|
assert(context.id_iterator.next() == null);
|
|
663
|
+
context.groove.prefetch_ids.clearRetainingCapacity();
|
|
664
|
+
assert(context.groove.prefetch_ids.count() == 0);
|
|
624
665
|
|
|
625
|
-
|
|
626
|
-
context.* = undefined;
|
|
627
|
-
callback(context);
|
|
666
|
+
context.callback(context);
|
|
628
667
|
}
|
|
629
668
|
};
|
|
630
669
|
|
|
@@ -636,34 +675,37 @@ pub fn GrooveType(
|
|
|
636
675
|
lookup_id: IdTree.LookupContext = undefined,
|
|
637
676
|
lookup_object: ObjectTree.LookupContext = undefined,
|
|
638
677
|
|
|
639
|
-
|
|
640
|
-
/// Returns false if there are no more IDs to prefetch.
|
|
641
|
-
fn lookup_start(worker: *PrefetchWorker) bool {
|
|
642
|
-
const groove = worker.context.groove;
|
|
643
|
-
|
|
678
|
+
fn lookup_start_next(worker: *PrefetchWorker) void {
|
|
644
679
|
const id = worker.context.id_iterator.next() orelse {
|
|
645
|
-
groove.prefetch_ids.clearRetainingCapacity();
|
|
646
|
-
assert(groove.prefetch_ids.count() == 0);
|
|
647
680
|
worker.context.worker_finished();
|
|
648
|
-
return
|
|
681
|
+
return;
|
|
649
682
|
};
|
|
650
683
|
|
|
651
|
-
if (
|
|
652
|
-
|
|
653
|
-
assert(groove.ids.get_cached(id.*) == null);
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
// If not in the LSM tree's cache, the object must be read from disk and added
|
|
657
|
-
// to the auxiliary prefetch_objects hash map.
|
|
658
|
-
// TODO: this LSM tree function needlessly checks the LSM tree's cache a
|
|
659
|
-
// second time. Adding API to the LSM tree to avoid this may be worthwhile.
|
|
660
|
-
groove.ids.lookup(
|
|
661
|
-
lookup_id_callback,
|
|
662
|
-
&worker.lookup_id,
|
|
663
|
-
snapshot_latest,
|
|
684
|
+
if (worker.context.groove.ids.lookup_from_memory(
|
|
685
|
+
worker.context.snapshot,
|
|
664
686
|
id.*,
|
|
665
|
-
)
|
|
666
|
-
|
|
687
|
+
)) |id_tree_value| {
|
|
688
|
+
assert(!id_tree_value.tombstone());
|
|
689
|
+
lookup_id_callback(&worker.lookup_id, id_tree_value);
|
|
690
|
+
|
|
691
|
+
if (config.verify) {
|
|
692
|
+
// If the id is cached, then we must be prefetching it because the object
|
|
693
|
+
// was not also cached.
|
|
694
|
+
assert(worker.context.groove.objects.lookup_from_memory(
|
|
695
|
+
worker.context.snapshot,
|
|
696
|
+
id_tree_value.timestamp,
|
|
697
|
+
) == null);
|
|
698
|
+
}
|
|
699
|
+
} else {
|
|
700
|
+
// If not in the LSM tree's cache, the object must be read from disk and added
|
|
701
|
+
// to the auxiliary prefetch_objects hash map.
|
|
702
|
+
worker.context.groove.ids.lookup_from_levels(
|
|
703
|
+
lookup_id_callback,
|
|
704
|
+
&worker.lookup_id,
|
|
705
|
+
worker.context.snapshot,
|
|
706
|
+
id.*,
|
|
707
|
+
);
|
|
708
|
+
}
|
|
667
709
|
}
|
|
668
710
|
|
|
669
711
|
fn lookup_id_callback(
|
|
@@ -673,18 +715,45 @@ pub fn GrooveType(
|
|
|
673
715
|
const worker = @fieldParentPtr(PrefetchWorker, "lookup_id", completion);
|
|
674
716
|
|
|
675
717
|
if (result) |id_tree_value| {
|
|
676
|
-
if (
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
718
|
+
if (config.verify) {
|
|
719
|
+
// This was checked in prefetch_enqueue().
|
|
720
|
+
assert(
|
|
721
|
+
worker.context.groove.ids.lookup_from_memory(
|
|
722
|
+
worker.context.snapshot,
|
|
723
|
+
worker.lookup_id.key,
|
|
724
|
+
) == null or
|
|
725
|
+
worker.context.groove.objects.lookup_from_memory(
|
|
726
|
+
worker.context.snapshot,
|
|
727
|
+
id_tree_value.timestamp,
|
|
728
|
+
) == null,
|
|
682
729
|
);
|
|
683
|
-
} else {
|
|
684
|
-
worker.lookup_finish();
|
|
685
730
|
}
|
|
731
|
+
|
|
732
|
+
if (id_tree_value.tombstone()) {
|
|
733
|
+
worker.lookup_start_next();
|
|
734
|
+
return;
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
if (worker.context.groove.objects.lookup_from_memory(
|
|
738
|
+
worker.context.snapshot,
|
|
739
|
+
id_tree_value.timestamp,
|
|
740
|
+
)) |object| {
|
|
741
|
+
// The object is not a tombstone; the ID and Object trees are in sync.
|
|
742
|
+
assert(!ObjectTreeHelpers(Object).tombstone(object));
|
|
743
|
+
|
|
744
|
+
worker.context.groove.prefetch_objects.putAssumeCapacityNoClobber(object.*, {});
|
|
745
|
+
worker.lookup_start_next();
|
|
746
|
+
return;
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
worker.context.groove.objects.lookup_from_levels(
|
|
750
|
+
lookup_object_callback,
|
|
751
|
+
&worker.lookup_object,
|
|
752
|
+
worker.context.snapshot,
|
|
753
|
+
id_tree_value.timestamp,
|
|
754
|
+
);
|
|
686
755
|
} else {
|
|
687
|
-
worker.
|
|
756
|
+
worker.lookup_start_next();
|
|
688
757
|
}
|
|
689
758
|
}
|
|
690
759
|
|
|
@@ -699,13 +768,7 @@ pub fn GrooveType(
|
|
|
699
768
|
assert(!ObjectTreeHelpers(Object).tombstone(object));
|
|
700
769
|
|
|
701
770
|
worker.context.groove.prefetch_objects.putAssumeCapacityNoClobber(object.*, {});
|
|
702
|
-
worker.
|
|
703
|
-
}
|
|
704
|
-
|
|
705
|
-
fn lookup_finish(worker: *PrefetchWorker) void {
|
|
706
|
-
if (!worker.lookup_start()) {
|
|
707
|
-
worker.* = undefined;
|
|
708
|
-
}
|
|
771
|
+
worker.lookup_start_next();
|
|
709
772
|
}
|
|
710
773
|
};
|
|
711
774
|
|
|
@@ -794,7 +857,7 @@ pub fn GrooveType(
|
|
|
794
857
|
assert(groove.prefetch_objects.removeAdapted(object.id, PrefetchObjectsAdapter{}));
|
|
795
858
|
}
|
|
796
859
|
|
|
797
|
-
/// Maximum number of pending sync callbacks (
|
|
860
|
+
/// Maximum number of pending sync callbacks (ObjectTree + IdTree + IndexTrees).
|
|
798
861
|
const join_pending_max = 2 + std.meta.fields(IndexTrees).len;
|
|
799
862
|
|
|
800
863
|
fn JoinType(comptime join_op: JoinOp) type {
|
|
@@ -932,7 +995,7 @@ test "Groove" {
|
|
|
932
995
|
|
|
933
996
|
_ = Groove.prefetch_enqueue;
|
|
934
997
|
_ = Groove.prefetch;
|
|
935
|
-
_ = Groove.
|
|
998
|
+
_ = Groove.prefetch_setup;
|
|
936
999
|
|
|
937
1000
|
std.testing.refAllDecls(Groove.PrefetchWorker);
|
|
938
1001
|
std.testing.refAllDecls(Groove.PrefetchContext);
|