tigerbeetle-node 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +302 -101
  2. package/dist/index.d.ts +70 -72
  3. package/dist/index.js +70 -72
  4. package/dist/index.js.map +1 -1
  5. package/package.json +6 -6
  6. package/scripts/download_node_headers.sh +14 -7
  7. package/src/index.ts +6 -10
  8. package/src/node.zig +6 -3
  9. package/src/tigerbeetle/scripts/benchmark.sh +4 -4
  10. package/src/tigerbeetle/scripts/confirm_image.sh +44 -0
  11. package/src/tigerbeetle/scripts/fuzz_loop.sh +15 -0
  12. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +7 -0
  13. package/src/tigerbeetle/scripts/install.sh +19 -4
  14. package/src/tigerbeetle/scripts/install_zig.bat +5 -1
  15. package/src/tigerbeetle/scripts/install_zig.sh +24 -14
  16. package/src/tigerbeetle/scripts/pre-commit.sh +9 -0
  17. package/src/tigerbeetle/scripts/shellcheck.sh +5 -0
  18. package/src/tigerbeetle/scripts/tests_on_alpine.sh +10 -0
  19. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +14 -0
  20. package/src/tigerbeetle/src/benchmark.zig +4 -2
  21. package/src/tigerbeetle/src/benchmark_array_search.zig +3 -3
  22. package/src/tigerbeetle/src/c/tb_client/thread.zig +8 -9
  23. package/src/tigerbeetle/src/c/tb_client.h +100 -80
  24. package/src/tigerbeetle/src/c/tb_client.zig +4 -1
  25. package/src/tigerbeetle/src/cli.zig +1 -1
  26. package/src/tigerbeetle/src/config.zig +48 -16
  27. package/src/tigerbeetle/src/demo.zig +3 -1
  28. package/src/tigerbeetle/src/eytzinger_benchmark.zig +3 -3
  29. package/src/tigerbeetle/src/io/linux.zig +1 -1
  30. package/src/tigerbeetle/src/lsm/README.md +214 -0
  31. package/src/tigerbeetle/src/lsm/binary_search.zig +137 -10
  32. package/src/tigerbeetle/src/lsm/bloom_filter.zig +43 -0
  33. package/src/tigerbeetle/src/lsm/compaction.zig +352 -398
  34. package/src/tigerbeetle/src/lsm/composite_key.zig +2 -0
  35. package/src/tigerbeetle/src/lsm/eytzinger.zig +1 -1
  36. package/src/tigerbeetle/src/lsm/forest.zig +21 -447
  37. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +412 -0
  38. package/src/tigerbeetle/src/lsm/grid.zig +145 -69
  39. package/src/tigerbeetle/src/lsm/groove.zig +196 -133
  40. package/src/tigerbeetle/src/lsm/k_way_merge.zig +40 -18
  41. package/src/tigerbeetle/src/lsm/level_iterator.zig +28 -9
  42. package/src/tigerbeetle/src/lsm/manifest.zig +81 -181
  43. package/src/tigerbeetle/src/lsm/manifest_level.zig +210 -454
  44. package/src/tigerbeetle/src/lsm/manifest_log.zig +77 -28
  45. package/src/tigerbeetle/src/lsm/posted_groove.zig +64 -76
  46. package/src/tigerbeetle/src/lsm/segmented_array.zig +561 -241
  47. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +148 -0
  48. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +9 -0
  49. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +62 -12
  50. package/src/tigerbeetle/src/lsm/table.zig +83 -48
  51. package/src/tigerbeetle/src/lsm/table_immutable.zig +30 -23
  52. package/src/tigerbeetle/src/lsm/table_iterator.zig +25 -14
  53. package/src/tigerbeetle/src/lsm/table_mutable.zig +63 -12
  54. package/src/tigerbeetle/src/lsm/test.zig +49 -55
  55. package/src/tigerbeetle/src/lsm/tree.zig +407 -402
  56. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +457 -0
  57. package/src/tigerbeetle/src/main.zig +28 -6
  58. package/src/tigerbeetle/src/message_bus.zig +2 -2
  59. package/src/tigerbeetle/src/message_pool.zig +14 -17
  60. package/src/tigerbeetle/src/simulator.zig +145 -112
  61. package/src/tigerbeetle/src/state_machine.zig +338 -228
  62. package/src/tigerbeetle/src/static_allocator.zig +65 -0
  63. package/src/tigerbeetle/src/storage.zig +3 -7
  64. package/src/tigerbeetle/src/test/accounting/auditor.zig +577 -0
  65. package/src/tigerbeetle/src/test/accounting/workload.zig +819 -0
  66. package/src/tigerbeetle/src/test/cluster.zig +18 -48
  67. package/src/tigerbeetle/src/test/conductor.zig +365 -0
  68. package/src/tigerbeetle/src/test/fuzz.zig +121 -0
  69. package/src/tigerbeetle/src/test/id.zig +89 -0
  70. package/src/tigerbeetle/src/test/priority_queue.zig +645 -0
  71. package/src/tigerbeetle/src/test/state_checker.zig +93 -69
  72. package/src/tigerbeetle/src/test/state_machine.zig +11 -35
  73. package/src/tigerbeetle/src/test/storage.zig +29 -8
  74. package/src/tigerbeetle/src/tigerbeetle.zig +14 -16
  75. package/src/tigerbeetle/src/unit_tests.zig +7 -0
  76. package/src/tigerbeetle/src/vopr.zig +494 -0
  77. package/src/tigerbeetle/src/vopr_hub/README.md +58 -0
  78. package/src/tigerbeetle/src/vopr_hub/SETUP.md +199 -0
  79. package/src/tigerbeetle/src/vopr_hub/go.mod +3 -0
  80. package/src/tigerbeetle/src/vopr_hub/main.go +1022 -0
  81. package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +3 -0
  82. package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +403 -0
  83. package/src/tigerbeetle/src/vsr/client.zig +13 -0
  84. package/src/tigerbeetle/src/vsr/journal.zig +16 -13
  85. package/src/tigerbeetle/src/vsr/replica.zig +924 -491
  86. package/src/tigerbeetle/src/vsr/superblock.zig +55 -37
  87. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -10
  88. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +2 -2
  89. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +18 -3
  90. package/src/tigerbeetle/src/vsr.zig +75 -55
  91. package/src/tigerbeetle/scripts/vopr.bat +0 -48
  92. package/src/tigerbeetle/scripts/vopr.sh +0 -33
@@ -13,6 +13,7 @@ const CompositeKey = @import("composite_key.zig").CompositeKey;
13
13
  const NodePool = @import("node_pool.zig").NodePool(config.lsm_manifest_node_size, 16);
14
14
 
15
15
  const snapshot_latest = @import("tree.zig").snapshot_latest;
16
+ const compaction_snapshot_for_op = @import("tree.zig").compaction_snapshot_for_op;
16
17
 
17
18
  fn ObjectTreeHelpers(comptime Object: type) type {
18
19
  assert(@hasField(Object, "id"));
@@ -49,6 +50,12 @@ const IdTreeValue = extern struct {
49
50
  timestamp: u64,
50
51
  padding: u64 = 0,
51
52
 
53
+ comptime {
54
+ // Assert that there is no implicit padding.
55
+ assert(@sizeOf(IdTreeValue) == 32);
56
+ assert(@bitSizeOf(IdTreeValue) == 32 * 8);
57
+ }
58
+
52
59
  inline fn compare_keys(a: u128, b: u128) std.math.Order {
53
60
  return std.math.order(a, b);
54
61
  }
@@ -78,7 +85,7 @@ fn IndexCompositeKeyType(comptime Field: type) type {
78
85
  .Enum => |e| {
79
86
  return switch (@bitSizeOf(e.tag_type)) {
80
87
  0...@bitSizeOf(u64) => u64,
81
- @bitSizeOf(u64)...@bitSizeOf(u128) => u128,
88
+ @bitSizeOf(u65)...@bitSizeOf(u128) => u128,
82
89
  else => @compileError("Unsupported enum tag for index: " ++ @typeName(e.tag_type)),
83
90
  };
84
91
  },
@@ -88,7 +95,7 @@ fn IndexCompositeKeyType(comptime Field: type) type {
88
95
  }
89
96
  return switch (@bitSizeOf(Field)) {
90
97
  0...@bitSizeOf(u64) => u64,
91
- @bitSizeOf(u64)...@bitSizeOf(u128) => u128,
98
+ @bitSizeOf(u65)...@bitSizeOf(u128) => u128,
92
99
  else => @compileError("Unsupported int type for index: " ++ @typeName(Field)),
93
100
  };
94
101
  },
@@ -131,6 +138,9 @@ fn IndexTreeType(
131
138
 
132
139
  /// A Groove is a collection of LSM trees auto generated for fields on a struct type
133
140
  /// as well as custom derived fields from said struct type.
141
+ ///
142
+ /// Invariants:
143
+ /// - Between beats, all of a groove's trees share the same lookup_snapshot_max.
134
144
  pub fn GrooveType(
135
145
  comptime Storage: type,
136
146
  comptime Object: type,
@@ -142,7 +152,7 @@ pub fn GrooveType(
142
152
  /// - derived: { .field = fn (*const Object) ?DerivedType }:
143
153
  /// An anonymous struct which contain fields that don't exist on the Object
144
154
  /// but can be derived from an Object instance using the field's corresponding function.
145
- comptime options: anytype,
155
+ comptime groove_options: anytype,
146
156
  ) type {
147
157
  @setEvalBranchQuota(64000);
148
158
 
@@ -154,13 +164,13 @@ pub fn GrooveType(
154
164
  comptime var index_fields: []const std.builtin.TypeInfo.StructField = &.{};
155
165
 
156
166
  // Generate index LSM trees from the struct fields.
157
- inline for (std.meta.fields(Object)) |field| {
167
+ for (std.meta.fields(Object)) |field| {
158
168
  // See if we should ignore this field from the options.
159
169
  //
160
170
  // By default, we ignore the "timestamp" field since it's a special identifier.
161
- // Since the "timestamp" is ignored by default, it shouldn't be provided in options.ignored.
171
+ // Since the "timestamp" is ignored by default, it shouldn't be provided in groove_options.ignored.
162
172
  comptime var ignored = mem.eql(u8, field.name, "timestamp") or mem.eql(u8, field.name, "id");
163
- inline for (options.ignored) |ignored_field_name| {
173
+ for (groove_options.ignored) |ignored_field_name| {
164
174
  comptime assert(!std.mem.eql(u8, ignored_field_name, "timestamp"));
165
175
  comptime assert(!std.mem.eql(u8, ignored_field_name, "id"));
166
176
  ignored = ignored or std.mem.eql(u8, field.name, ignored_field_name);
@@ -181,11 +191,11 @@ pub fn GrooveType(
181
191
  }
182
192
  }
183
193
 
184
- // Generiate IndexTrees for fields derived from the Value in options.
185
- const derived_fields = std.meta.fields(@TypeOf(options.derived));
186
- inline for (derived_fields) |field| {
194
+ // Generate IndexTrees for fields derived from the Value in groove_options.
195
+ const derived_fields = std.meta.fields(@TypeOf(groove_options.derived));
196
+ for (derived_fields) |field| {
187
197
  // Get the function info for the derived field.
188
- const derive_func = @field(options.derived, field.name);
198
+ const derive_func = @field(groove_options.derived, field.name);
189
199
  const derive_func_info = @typeInfo(@TypeOf(derive_func)).Fn;
190
200
 
191
201
  // Make sure it has only one argument.
@@ -221,6 +231,20 @@ pub fn GrooveType(
221
231
  };
222
232
  }
223
233
 
234
+ comptime var index_options_fields: []const std.builtin.TypeInfo.StructField = &.{};
235
+ for (index_fields) |index_field| {
236
+ const IndexTree = index_field.field_type;
237
+ index_options_fields = index_options_fields ++ [_]std.builtin.TypeInfo.StructField{
238
+ .{
239
+ .name = index_field.name,
240
+ .field_type = IndexTree.Options,
241
+ .default_value = null,
242
+ .is_comptime = false,
243
+ .alignment = @alignOf(IndexTree.Options),
244
+ },
245
+ };
246
+ }
247
+
224
248
  const ObjectTree = blk: {
225
249
  const Table = TableType(
226
250
  u64, // key = timestamp
@@ -259,6 +283,14 @@ pub fn GrooveType(
259
283
  .is_tuple = false,
260
284
  },
261
285
  });
286
+ const IndexTreeOptions = @Type(.{
287
+ .Struct = .{
288
+ .layout = .Auto,
289
+ .fields = index_options_fields,
290
+ .decls = &.{},
291
+ .is_tuple = false,
292
+ },
293
+ });
262
294
 
263
295
  // Verify no hash collisions between all the trees:
264
296
  comptime var hashes: []const u128 = &.{ObjectTree.hash};
@@ -267,19 +299,20 @@ pub fn GrooveType(
267
299
  const IndexTree = @TypeOf(@field(@as(IndexTrees, undefined), field.name));
268
300
  const hash: []const u128 = &.{IndexTree.hash};
269
301
 
270
- assert(std.mem.containsAtLeast(u128, hashes, 0, hash));
302
+ assert(std.mem.indexOf(u128, hashes, hash) == null);
271
303
  hashes = hashes ++ hash;
272
304
  }
273
305
 
274
306
  // Verify groove index count:
275
307
  const indexes_count_actual = std.meta.fields(IndexTrees).len;
276
308
  const indexes_count_expect = std.meta.fields(Object).len -
277
- options.ignored.len -
309
+ groove_options.ignored.len -
278
310
  // The id/timestamp field is implicitly ignored since it's the primary key for ObjectTree:
279
311
  2 +
280
- std.meta.fields(@TypeOf(options.derived)).len;
312
+ std.meta.fields(@TypeOf(groove_options.derived)).len;
281
313
 
282
314
  assert(indexes_count_actual == indexes_count_expect);
315
+ assert(indexes_count_actual == std.meta.fields(IndexTreeOptions).len);
283
316
 
284
317
  // Generate a helper function for interacting with an Index field type.
285
318
  const IndexTreeFieldHelperType = struct {
@@ -298,7 +331,7 @@ pub fn GrooveType(
298
331
  return @TypeOf(@field(@as(Object, undefined), field_name));
299
332
  }
300
333
 
301
- const derived_fn = @TypeOf(@field(options.derived, field_name));
334
+ const derived_fn = @TypeOf(@field(groove_options.derived, field_name));
302
335
  return @typeInfo(derived_fn).Fn.return_type.?.Optional.child;
303
336
  }
304
337
 
@@ -309,7 +342,7 @@ pub fn GrooveType(
309
342
  /// Try to extract an index from the object, deriving it when necessary.
310
343
  pub fn derive_index(object: *const Object) ?Index {
311
344
  if (comptime is_derived(field_name)) {
312
- return @field(options.derived, field_name)(object);
345
+ return @field(groove_options.derived, field_name)(object);
313
346
  } else {
314
347
  return @field(object, field_name);
315
348
  }
@@ -371,10 +404,10 @@ pub fn GrooveType(
371
404
  join_pending: usize = 0,
372
405
  join_callback: ?Callback = null,
373
406
 
374
- objects_cache: *ObjectTree.ValueCache,
407
+ objects_cache: *ObjectTree.TableMutable.ValuesCache,
375
408
  objects: ObjectTree,
376
409
 
377
- ids_cache: *IdTree.ValueCache,
410
+ ids_cache: *IdTree.TableMutable.ValuesCache,
378
411
  ids: IdTree,
379
412
 
380
413
  indexes: IndexTrees,
@@ -391,41 +424,36 @@ pub fn GrooveType(
391
424
  /// sufficient to query this hashmap alone to know the state of the LSM trees.
392
425
  prefetch_objects: PrefetchObjects,
393
426
 
427
+ /// The snapshot to prefetch from.
428
+ prefetch_snapshot: ?u64,
429
+
430
+ pub const Options = struct {
431
+ /// TODO Improve unit in this name to make more clear what should be passed.
432
+ /// For example, is this a size in bytes or a count in objects? It's a count in objects,
433
+ /// but the name poorly reflects this.
434
+ cache_entries_max: u32,
435
+ /// The maximum number of objects that might be prefetched by a batch.
436
+ prefetch_entries_max: u32,
437
+
438
+ tree_options_object: ObjectTree.Options,
439
+ tree_options_id: IdTree.Options,
440
+ tree_options_index: IndexTreeOptions,
441
+ };
442
+
394
443
  pub fn init(
395
444
  allocator: mem.Allocator,
396
445
  node_pool: *NodePool,
397
446
  grid: *Grid,
398
- // The cache size is meant to be computed based on the left over available memory
399
- // that tigerbeetle was given to allocate from CLI arguments.
400
- // TODO Improve unit in this name to make more clear what should be passed.
401
- // For example, is this a size in bytes or a count in objects? It's a count in objects,
402
- // but the name poorly reflects this.
403
- cache_size: u32,
404
- // In general, the commit count max for a field, depends on the field's object,
405
- // how many objects might be changed by a batch:
406
- // (config.message_size_max - sizeOf(vsr.header))
407
- // For example, there are at most 8191 transfers in a batch.
408
- // So commit_count_max=8191 for transfer objects and indexes.
409
- //
410
- // However, if a transfer is ever mutated, then this will double commit_count_max
411
- // since the old index might need to be removed, and the new index inserted.
412
- //
413
- // A way to see this is by looking at the state machine. If a transfer is inserted,
414
- // how many accounts and transfer put/removes will be generated?
415
- //
416
- // This also means looking at the state machine operation that will generate the
417
- // most put/removes in the worst case.
418
- // For example, create_accounts will put at most 8191 accounts.
419
- // However, create_transfers will put 2 accounts (8191 * 2) for every transfer, and
420
- // some of these accounts may exist, requiring a remove/put to update the index.
421
- commit_count_max: u32,
447
+ options: Options,
422
448
  ) !Groove {
423
- // Cache is dynamically allocated to pass a pointer into the Object tree.
424
- const objects_cache = try allocator.create(ObjectTree.ValueCache);
449
+ // Cache is heap-allocated to pass a pointer into the Object tree.
450
+ const objects_cache = try allocator.create(ObjectTree.TableMutable.ValuesCache);
425
451
  errdefer allocator.destroy(objects_cache);
426
452
 
427
- objects_cache.* = .{};
428
- try objects_cache.ensureTotalCapacity(allocator, cache_size);
453
+ objects_cache.* = try ObjectTree.TableMutable.ValuesCache.init(
454
+ allocator,
455
+ options.cache_entries_max,
456
+ );
429
457
  errdefer objects_cache.deinit(allocator);
430
458
 
431
459
  // Intialize the object LSM tree.
@@ -434,18 +462,15 @@ pub fn GrooveType(
434
462
  node_pool,
435
463
  grid,
436
464
  objects_cache,
437
- .{
438
- .commit_count_max = commit_count_max,
439
- },
465
+ options.tree_options_object,
440
466
  );
441
467
  errdefer object_tree.deinit(allocator);
442
468
 
443
- // Cache is dynamically allocated to pass a pointer into the ID tree.
444
- const ids_cache = try allocator.create(IdTree.ValueCache);
469
+ // Cache is heap-allocated to pass a pointer into the ID tree.
470
+ const ids_cache = try allocator.create(IdTree.TableMutable.ValuesCache);
445
471
  errdefer allocator.destroy(ids_cache);
446
472
 
447
- ids_cache.* = .{};
448
- try ids_cache.ensureTotalCapacity(allocator, cache_size);
473
+ ids_cache.* = try IdTree.TableMutable.ValuesCache.init(allocator, options.cache_entries_max);
449
474
  errdefer ids_cache.deinit(allocator);
450
475
 
451
476
  var id_tree = try IdTree.init(
@@ -453,9 +478,7 @@ pub fn GrooveType(
453
478
  node_pool,
454
479
  grid,
455
480
  ids_cache,
456
- .{
457
- .commit_count_max = commit_count_max,
458
- },
481
+ options.tree_options_id,
459
482
  );
460
483
  errdefer id_tree.deinit(allocator);
461
484
 
@@ -475,23 +498,18 @@ pub fn GrooveType(
475
498
  allocator,
476
499
  node_pool,
477
500
  grid,
478
- null, // No value cache for index trees.
479
- .{
480
- .commit_count_max = commit_count_max,
481
- },
501
+ null, // No value cache for index trees, since they only do range queries.
502
+ @field(options.tree_options_index, field.name),
482
503
  );
483
504
  index_trees_initialized += 1;
484
505
  }
485
506
 
486
- // TODO: document why this is twice the commit count max.
487
- const prefetch_count_max = commit_count_max * 2;
488
-
489
507
  var prefetch_ids = PrefetchIDs{};
490
- try prefetch_ids.ensureTotalCapacity(allocator, prefetch_count_max);
508
+ try prefetch_ids.ensureTotalCapacity(allocator, options.prefetch_entries_max);
491
509
  errdefer prefetch_ids.deinit(allocator);
492
510
 
493
511
  var prefetch_objects = PrefetchObjects{};
494
- try prefetch_objects.ensureTotalCapacity(allocator, prefetch_count_max);
512
+ try prefetch_objects.ensureTotalCapacity(allocator, options.prefetch_entries_max);
495
513
  errdefer prefetch_objects.deinit(allocator);
496
514
 
497
515
  return Groove{
@@ -505,14 +523,11 @@ pub fn GrooveType(
505
523
 
506
524
  .prefetch_ids = prefetch_ids,
507
525
  .prefetch_objects = prefetch_objects,
526
+ .prefetch_snapshot = null,
508
527
  };
509
528
  }
510
529
 
511
530
  pub fn deinit(groove: *Groove, allocator: mem.Allocator) void {
512
- assert(groove.join_op == null);
513
- assert(groove.join_pending == 0);
514
- assert(groove.join_callback == null);
515
-
516
531
  inline for (std.meta.fields(IndexTrees)) |field| {
517
532
  @field(groove.indexes, field.name).deinit(allocator);
518
533
  }
@@ -535,10 +550,26 @@ pub fn GrooveType(
535
550
  return groove.prefetch_objects.getKeyPtrAdapted(id, PrefetchObjectsAdapter{});
536
551
  }
537
552
 
538
- /// Must be called directly after the state machine commit is finished and prefetch results
539
- /// are no longer needed.
540
- pub fn prefetch_clear(groove: *Groove) void {
541
- groove.prefetch_objects.clearRetainingCapacity();
553
+ /// Must be called directly before the state machine begins queuing ids for prefetch.
554
+ /// When `snapshot` is null, prefetch from the current snapshot.
555
+ pub fn prefetch_setup(groove: *Groove, snapshot: ?u64) void {
556
+ // We may query the input tables of an ongoing compaction, but must not query the
557
+ // output tables until the compaction is complete. (Until then, the output tables may
558
+ // be in the manifest but not yet on disk).
559
+ const snapshot_max = groove.objects.lookup_snapshot_max;
560
+ assert(snapshot_max == groove.ids.lookup_snapshot_max);
561
+
562
+ const snapshot_target = snapshot orelse snapshot_max;
563
+ assert(snapshot_target <= snapshot_max);
564
+
565
+ if (groove.prefetch_snapshot == null) {
566
+ groove.prefetch_objects.clearRetainingCapacity();
567
+ } else {
568
+ // If there is a snapshot already set from the previous prefetch_setup(), then its
569
+ // prefetch() was never called, so there must already be no queued objects or ids.
570
+ }
571
+
572
+ groove.prefetch_snapshot = snapshot_target;
542
573
  assert(groove.prefetch_objects.count() == 0);
543
574
  assert(groove.prefetch_ids.count() == 0);
544
575
  }
@@ -547,14 +578,21 @@ pub fn GrooveType(
547
578
  /// We tolerate duplicate IDs enqueued by the state machine.
548
579
  /// For example, if all unique operations require the same two dependencies.
549
580
  pub fn prefetch_enqueue(groove: *Groove, id: u128) void {
550
- if (groove.ids.get_cached(id)) |id_tree_value| {
551
- if (!id_tree_value.tombstone()) {
552
- const object = groove.objects.get_cached(id_tree_value.timestamp).?;
553
- assert(!ObjectTreeHelpers(Object).tombstone(object));
554
- groove.prefetch_objects.putAssumeCapacity(object.*, {});
581
+ if (groove.ids.lookup_from_memory(groove.prefetch_snapshot.?, id)) |id_tree_value| {
582
+ if (id_tree_value.tombstone()) {
583
+ // Do nothing; an explicit ID tombstone indicates that the object was deleted.
555
584
  } else {
556
- // Do nothing, a prefetched ID not present in prefetch_objects indicates
557
- // that the object has either been deleted or never existed.
585
+ if (groove.objects.lookup_from_memory(
586
+ groove.prefetch_snapshot.?,
587
+ id_tree_value.timestamp,
588
+ )) |object| {
589
+ assert(!ObjectTreeHelpers(Object).tombstone(object));
590
+ groove.prefetch_objects.putAssumeCapacity(object.*, {});
591
+ } else {
592
+ // The id was in the IdTree's value cache, but not in the ObjectTree's
593
+ // value cache.
594
+ groove.prefetch_ids.putAssumeCapacity(id, {});
595
+ }
558
596
  }
559
597
  } else {
560
598
  groove.prefetch_ids.putAssumeCapacity(id, {});
@@ -562,8 +600,7 @@ pub fn GrooveType(
562
600
  }
563
601
 
564
602
  /// Ensure the objects corresponding to all ids enqueued with prefetch_enqueue() are
565
- /// in memory, either in the value cache of the object tree or in the prefetch_objects
566
- /// backup hash map.
603
+ /// available in `prefetch_objects`.
567
604
  pub fn prefetch(
568
605
  groove: *Groove,
569
606
  callback: fn (*PrefetchContext) void,
@@ -572,14 +609,17 @@ pub fn GrooveType(
572
609
  context.* = .{
573
610
  .groove = groove,
574
611
  .callback = callback,
612
+ .snapshot = groove.prefetch_snapshot.?,
575
613
  .id_iterator = groove.prefetch_ids.keyIterator(),
576
614
  };
615
+ groove.prefetch_snapshot = null;
577
616
  context.start_workers();
578
617
  }
579
618
 
580
619
  pub const PrefetchContext = struct {
581
620
  groove: *Groove,
582
621
  callback: fn (*PrefetchContext) void,
622
+ snapshot: u64,
583
623
 
584
624
  id_iterator: PrefetchIDs.KeyIterator,
585
625
 
@@ -595,17 +635,16 @@ pub fn GrooveType(
595
635
 
596
636
  // Track an extra "worker" that will finish after the loop.
597
637
  //
598
- // This prevents `context.finish()` from being called within the loop body when every
599
- // worker finishes synchronously. `context.finish()` sets the `context` to undefined,
600
- // but `context` is required for the last loop condition check.
638
+ // This prevents `context.finish()` from being called within the loop body when
639
+ // every worker finishes synchronously. `context.finish()` calls the user-provided
640
+ // callback which may re-use the memory of this `PrefetchContext`. However, we
641
+ // rely on `context` being well-defined for the loop condition.
601
642
  context.workers_busy += 1;
602
643
 
603
- // -1 to ignore the extra worker.
604
- while (context.workers_busy - 1 < context.workers.len) {
605
- const worker = &context.workers[context.workers_busy - 1];
644
+ for (context.workers) |*worker| {
606
645
  worker.* = .{ .context = context };
607
646
  context.workers_busy += 1;
608
- if (!worker.lookup_start()) break;
647
+ worker.lookup_start_next();
609
648
  }
610
649
 
611
650
  assert(context.workers_busy >= 1);
@@ -619,12 +658,12 @@ pub fn GrooveType(
619
658
 
620
659
  fn finish(context: *PrefetchContext) void {
621
660
  assert(context.workers_busy == 0);
622
- assert(context.groove.prefetch_ids.count() == 0);
661
+
623
662
  assert(context.id_iterator.next() == null);
663
+ context.groove.prefetch_ids.clearRetainingCapacity();
664
+ assert(context.groove.prefetch_ids.count() == 0);
624
665
 
625
- const callback = context.callback;
626
- context.* = undefined;
627
- callback(context);
666
+ context.callback(context);
628
667
  }
629
668
  };
630
669
 
@@ -636,34 +675,37 @@ pub fn GrooveType(
636
675
  lookup_id: IdTree.LookupContext = undefined,
637
676
  lookup_object: ObjectTree.LookupContext = undefined,
638
677
 
639
- /// Returns true if asynchronous I/O has been started.
640
- /// Returns false if there are no more IDs to prefetch.
641
- fn lookup_start(worker: *PrefetchWorker) bool {
642
- const groove = worker.context.groove;
643
-
678
+ fn lookup_start_next(worker: *PrefetchWorker) void {
644
679
  const id = worker.context.id_iterator.next() orelse {
645
- groove.prefetch_ids.clearRetainingCapacity();
646
- assert(groove.prefetch_ids.count() == 0);
647
680
  worker.context.worker_finished();
648
- return false;
681
+ return;
649
682
  };
650
683
 
651
- if (config.verify) {
652
- // This is checked in prefetch_enqueue()
653
- assert(groove.ids.get_cached(id.*) == null);
654
- }
655
-
656
- // If not in the LSM tree's cache, the object must be read from disk and added
657
- // to the auxiliary prefetch_objects hash map.
658
- // TODO: this LSM tree function needlessly checks the LSM tree's cache a
659
- // second time. Adding API to the LSM tree to avoid this may be worthwhile.
660
- groove.ids.lookup(
661
- lookup_id_callback,
662
- &worker.lookup_id,
663
- snapshot_latest,
684
+ if (worker.context.groove.ids.lookup_from_memory(
685
+ worker.context.snapshot,
664
686
  id.*,
665
- );
666
- return true;
687
+ )) |id_tree_value| {
688
+ assert(!id_tree_value.tombstone());
689
+ lookup_id_callback(&worker.lookup_id, id_tree_value);
690
+
691
+ if (config.verify) {
692
+ // If the id is cached, then we must be prefetching it because the object
693
+ // was not also cached.
694
+ assert(worker.context.groove.objects.lookup_from_memory(
695
+ worker.context.snapshot,
696
+ id_tree_value.timestamp,
697
+ ) == null);
698
+ }
699
+ } else {
700
+ // If not in the LSM tree's cache, the object must be read from disk and added
701
+ // to the auxiliary prefetch_objects hash map.
702
+ worker.context.groove.ids.lookup_from_levels(
703
+ lookup_id_callback,
704
+ &worker.lookup_id,
705
+ worker.context.snapshot,
706
+ id.*,
707
+ );
708
+ }
667
709
  }
668
710
 
669
711
  fn lookup_id_callback(
@@ -673,18 +715,45 @@ pub fn GrooveType(
673
715
  const worker = @fieldParentPtr(PrefetchWorker, "lookup_id", completion);
674
716
 
675
717
  if (result) |id_tree_value| {
676
- if (!id_tree_value.tombstone()) {
677
- worker.context.groove.objects.lookup(
678
- lookup_object_callback,
679
- &worker.lookup_object,
680
- snapshot_latest,
681
- id_tree_value.timestamp,
718
+ if (config.verify) {
719
+ // This was checked in prefetch_enqueue().
720
+ assert(
721
+ worker.context.groove.ids.lookup_from_memory(
722
+ worker.context.snapshot,
723
+ worker.lookup_id.key,
724
+ ) == null or
725
+ worker.context.groove.objects.lookup_from_memory(
726
+ worker.context.snapshot,
727
+ id_tree_value.timestamp,
728
+ ) == null,
682
729
  );
683
- } else {
684
- worker.lookup_finish();
685
730
  }
731
+
732
+ if (id_tree_value.tombstone()) {
733
+ worker.lookup_start_next();
734
+ return;
735
+ }
736
+
737
+ if (worker.context.groove.objects.lookup_from_memory(
738
+ worker.context.snapshot,
739
+ id_tree_value.timestamp,
740
+ )) |object| {
741
+ // The object is not a tombstone; the ID and Object trees are in sync.
742
+ assert(!ObjectTreeHelpers(Object).tombstone(object));
743
+
744
+ worker.context.groove.prefetch_objects.putAssumeCapacityNoClobber(object.*, {});
745
+ worker.lookup_start_next();
746
+ return;
747
+ }
748
+
749
+ worker.context.groove.objects.lookup_from_levels(
750
+ lookup_object_callback,
751
+ &worker.lookup_object,
752
+ worker.context.snapshot,
753
+ id_tree_value.timestamp,
754
+ );
686
755
  } else {
687
- worker.lookup_finish();
756
+ worker.lookup_start_next();
688
757
  }
689
758
  }
690
759
 
@@ -699,13 +768,7 @@ pub fn GrooveType(
699
768
  assert(!ObjectTreeHelpers(Object).tombstone(object));
700
769
 
701
770
  worker.context.groove.prefetch_objects.putAssumeCapacityNoClobber(object.*, {});
702
- worker.lookup_finish();
703
- }
704
-
705
- fn lookup_finish(worker: *PrefetchWorker) void {
706
- if (!worker.lookup_start()) {
707
- worker.* = undefined;
708
- }
771
+ worker.lookup_start_next();
709
772
  }
710
773
  };
711
774
 
@@ -794,7 +857,7 @@ pub fn GrooveType(
794
857
  assert(groove.prefetch_objects.removeAdapted(object.id, PrefetchObjectsAdapter{}));
795
858
  }
796
859
 
797
- /// Maximum number of pending sync callbacks (ObjecTree + IdTree + IndexTrees).
860
+ /// Maximum number of pending sync callbacks (ObjectTree + IdTree + IndexTrees).
798
861
  const join_pending_max = 2 + std.meta.fields(IndexTrees).len;
799
862
 
800
863
  fn JoinType(comptime join_op: JoinOp) type {
@@ -932,7 +995,7 @@ test "Groove" {
932
995
 
933
996
  _ = Groove.prefetch_enqueue;
934
997
  _ = Groove.prefetch;
935
- _ = Groove.prefetch_clear;
998
+ _ = Groove.prefetch_setup;
936
999
 
937
1000
  std.testing.refAllDecls(Groove.PrefetchWorker);
938
1001
  std.testing.refAllDecls(Groove.PrefetchContext);