tigerbeetle-node 0.11.0 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/dist/.client.node.sha256 +1 -0
  2. package/package.json +5 -3
  3. package/src/tigerbeetle/scripts/fuzz_loop.sh +1 -1
  4. package/src/tigerbeetle/scripts/pre-commit.sh +2 -2
  5. package/src/tigerbeetle/scripts/validate_docs.sh +17 -0
  6. package/src/tigerbeetle/src/benchmark.zig +25 -11
  7. package/src/tigerbeetle/src/c/tb_client/context.zig +248 -47
  8. package/src/tigerbeetle/src/c/tb_client/echo_client.zig +108 -0
  9. package/src/tigerbeetle/src/c/tb_client/packet.zig +2 -2
  10. package/src/tigerbeetle/src/c/tb_client/signal.zig +2 -4
  11. package/src/tigerbeetle/src/c/tb_client/thread.zig +17 -256
  12. package/src/tigerbeetle/src/c/tb_client.h +18 -4
  13. package/src/tigerbeetle/src/c/tb_client.zig +88 -26
  14. package/src/tigerbeetle/src/c/tb_client_header_test.zig +135 -0
  15. package/src/tigerbeetle/src/c/test.zig +371 -1
  16. package/src/tigerbeetle/src/cli.zig +90 -18
  17. package/src/tigerbeetle/src/config.zig +12 -4
  18. package/src/tigerbeetle/src/demo.zig +2 -1
  19. package/src/tigerbeetle/src/demo_01_create_accounts.zig +1 -1
  20. package/src/tigerbeetle/src/demo_03_create_transfers.zig +13 -0
  21. package/src/tigerbeetle/src/ewah.zig +11 -33
  22. package/src/tigerbeetle/src/ewah_benchmark.zig +8 -9
  23. package/src/tigerbeetle/src/lsm/README.md +97 -3
  24. package/src/tigerbeetle/src/lsm/compaction.zig +32 -7
  25. package/src/tigerbeetle/src/{eytzinger_benchmark.zig → lsm/eytzinger_benchmark.zig} +34 -21
  26. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +34 -32
  27. package/src/tigerbeetle/src/lsm/grid.zig +39 -21
  28. package/src/tigerbeetle/src/lsm/groove.zig +1 -0
  29. package/src/tigerbeetle/src/lsm/k_way_merge.zig +3 -3
  30. package/src/tigerbeetle/src/lsm/level_iterator.zig +1 -1
  31. package/src/tigerbeetle/src/lsm/manifest.zig +13 -0
  32. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -49
  33. package/src/tigerbeetle/src/lsm/manifest_log.zig +173 -335
  34. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +665 -0
  35. package/src/tigerbeetle/src/lsm/node_pool.zig +4 -0
  36. package/src/tigerbeetle/src/lsm/posted_groove.zig +1 -0
  37. package/src/tigerbeetle/src/lsm/segmented_array.zig +24 -15
  38. package/src/tigerbeetle/src/lsm/table.zig +32 -20
  39. package/src/tigerbeetle/src/lsm/table_immutable.zig +1 -1
  40. package/src/tigerbeetle/src/lsm/table_iterator.zig +4 -5
  41. package/src/tigerbeetle/src/lsm/test.zig +13 -2
  42. package/src/tigerbeetle/src/lsm/tree.zig +45 -7
  43. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +36 -32
  44. package/src/tigerbeetle/src/main.zig +69 -13
  45. package/src/tigerbeetle/src/message_bus.zig +18 -7
  46. package/src/tigerbeetle/src/message_pool.zig +8 -2
  47. package/src/tigerbeetle/src/ring_buffer.zig +7 -3
  48. package/src/tigerbeetle/src/simulator.zig +38 -11
  49. package/src/tigerbeetle/src/state_machine.zig +48 -23
  50. package/src/tigerbeetle/src/test/accounting/workload.zig +9 -5
  51. package/src/tigerbeetle/src/test/cluster.zig +15 -33
  52. package/src/tigerbeetle/src/test/conductor.zig +2 -1
  53. package/src/tigerbeetle/src/test/network.zig +45 -19
  54. package/src/tigerbeetle/src/test/packet_simulator.zig +40 -29
  55. package/src/tigerbeetle/src/test/state_checker.zig +5 -7
  56. package/src/tigerbeetle/src/test/storage.zig +453 -110
  57. package/src/tigerbeetle/src/test/storage_checker.zig +204 -0
  58. package/src/tigerbeetle/src/tigerbeetle.zig +1 -0
  59. package/src/tigerbeetle/src/unit_tests.zig +7 -1
  60. package/src/tigerbeetle/src/util.zig +97 -11
  61. package/src/tigerbeetle/src/vopr.zig +2 -1
  62. package/src/tigerbeetle/src/vsr/client.zig +8 -3
  63. package/src/tigerbeetle/src/vsr/journal.zig +280 -202
  64. package/src/tigerbeetle/src/vsr/replica.zig +169 -31
  65. package/src/tigerbeetle/src/vsr/superblock.zig +356 -629
  66. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -6
  67. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +414 -151
  68. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +332 -0
  69. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +349 -0
  70. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +44 -9
  71. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +394 -0
  72. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +312 -0
  73. package/src/tigerbeetle/src/vsr.zig +19 -5
  74. package/src/tigerbeetle/src/benchmark_array_search.zig +0 -317
  75. package/src/tigerbeetle/src/benchmarks/perf.zig +0 -299
  76. package/src/tigerbeetle/src/vopr_hub/README.md +0 -58
  77. package/src/tigerbeetle/src/vopr_hub/SETUP.md +0 -199
  78. package/src/tigerbeetle/src/vopr_hub/go.mod +0 -3
  79. package/src/tigerbeetle/src/vopr_hub/main.go +0 -1022
  80. package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +0 -3
  81. package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +0 -403
@@ -28,10 +28,12 @@ const log = std.log.scoped(.manifest_log);
28
28
 
29
29
  const config = @import("../config.zig");
30
30
  const vsr = @import("../vsr.zig");
31
+ const util = @import("../util.zig");
31
32
 
32
33
  const SuperBlockType = vsr.SuperBlockType;
33
34
  const GridType = @import("grid.zig").GridType;
34
35
  const BlockType = @import("grid.zig").BlockType;
36
+ const tree = @import("tree.zig");
35
37
  const RingBuffer = @import("../ring_buffer.zig").RingBuffer;
36
38
 
37
39
  /// ManifestLog block schema:
@@ -47,8 +49,10 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
47
49
  const SuperBlock = SuperBlockType(Storage);
48
50
  const Grid = GridType(Storage);
49
51
 
52
+ pub const Block = ManifestLogBlockType(Storage, TableInfo);
50
53
  const BlockPtr = Grid.BlockPtr;
51
54
  const BlockPtrConst = Grid.BlockPtrConst;
55
+ const Label = Block.Label;
52
56
 
53
57
  pub const Callback = fn (manifest_log: *ManifestLog) void;
54
58
 
@@ -58,11 +62,6 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
58
62
  table: *const TableInfo,
59
63
  ) void;
60
64
 
61
- pub const Label = packed struct {
62
- level: u7,
63
- event: enum(u1) { insert, remove },
64
- };
65
-
66
65
  const alignment = 16;
67
66
 
68
67
  comptime {
@@ -78,29 +77,47 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
78
77
  // However, we still store Label ahead of TableInfo to save space on the network.
79
78
  // This means we store fewer entries per manifest block, to gain less padding,
80
79
  // since we must store entry_count_max of whichever array is first in the layout.
81
- // For a better understanding of this decision, see block_size() below.
80
+ // For a better understanding of this decision, see Block.size() below.
82
81
  assert(@sizeOf(TableInfo) % alignment == 0);
83
82
  }
84
83
 
85
- const block_body_size = config.block_size - @sizeOf(vsr.Header);
86
- const entry_size = @sizeOf(Label) + @sizeOf(TableInfo);
87
- const entry_count_max_unaligned = @divFloor(block_body_size, entry_size);
88
- const entry_count_max = @divFloor(entry_count_max_unaligned, alignment) * alignment;
84
+ /// The maximum number of table updates to the manifest by a half-measure of table
85
+ /// compaction (not including manifest log compaction).
86
+ ///
87
+ /// Input tables are updated in the manifest (snapshot_max is reduced).
88
+ /// Input tables are removed from the manifest (if not held by a persistent snapshot).
89
+ /// Output tables are inserted into the manifest.
90
+ // TODO If insert-then-remove can update in-memory, then we can only count input tables once.
91
+ pub const compaction_appends_max = tree.compactions_max *
92
+ (tree.compaction_tables_input_max + // Update snapshot_max.
93
+ tree.compaction_tables_input_max + // Remove.
94
+ tree.compaction_tables_output_max);
95
+
96
+ const blocks_count_appends = util.div_ceil(compaction_appends_max, Block.entry_count_max);
97
+
98
+ /// The upper-bound of manifest log blocks we must buffer.
99
+ ///
100
+ /// `blocks` must have sufficient capacity for:
101
+ /// - a manifest log compaction (+1 block in the worst case)
102
+ /// - a leftover open block from the previous ops (+1 block)
103
+ /// - table updates from a half bar of compactions
104
+ /// (This is typically +1 block, but may be more when the block size is small).
105
+ /// TODO(Beat compaction): blocks_count_appends only needs enough for 1 beat.
106
+ const blocks_count_max = 1 + 1 + blocks_count_appends;
89
107
 
90
108
  comptime {
91
- assert(entry_count_max > 0);
92
- assert((entry_count_max * @sizeOf(Label)) % alignment == 0);
93
- assert((entry_count_max * @sizeOf(TableInfo)) % alignment == 0);
109
+ assert(blocks_count_max >= 3);
110
+ assert(blocks_count_max == 3 or config.block_size < 64 * 1024);
94
111
  }
95
112
 
96
113
  superblock: *SuperBlock,
97
114
  grid: *Grid,
115
+ grid_reservation: ?Grid.Reservation = null,
98
116
  tree_hash: u128,
99
117
 
100
118
  /// The head block is used to accumulate a full block, to be written at the next flush.
101
119
  /// The remaining blocks must accommodate all further appends.
102
- // TODO Assert the relation between the number of blocks, and flush/compact/append.
103
- blocks: RingBuffer(BlockPtr, 3, .array),
120
+ blocks: RingBuffer(BlockPtr, blocks_count_max, .array),
104
121
 
105
122
  /// The number of blocks that have been appended to, filled up, and then closed.
106
123
  blocks_closed: u8 = 0,
@@ -131,26 +148,21 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
131
148
  pub fn init(allocator: mem.Allocator, grid: *Grid, tree_hash: u128) !ManifestLog {
132
149
  // TODO RingBuffer for .pointer should be extended to take care of alignment:
133
150
 
134
- const a = try allocator.alignedAlloc(u8, config.sector_size, config.block_size);
135
- errdefer allocator.free(a);
136
-
137
- const b = try allocator.alignedAlloc(u8, config.sector_size, config.block_size);
138
- errdefer allocator.free(b);
151
+ var blocks: [blocks_count_max]BlockPtr = undefined;
152
+ for (blocks) |*block, i| {
153
+ errdefer for (blocks[0..i]) |b| allocator.free(b);
139
154
 
140
- const c = try allocator.alignedAlloc(u8, config.sector_size, config.block_size);
141
- errdefer allocator.free(b);
155
+ const block_slice =
156
+ try allocator.alignedAlloc(u8, config.sector_size, config.block_size);
157
+ block.* = block_slice[0..config.block_size];
158
+ }
159
+ errdefer for (blocks) |b| allocator.free(b);
142
160
 
143
161
  return ManifestLog{
144
162
  .superblock = grid.superblock,
145
163
  .grid = grid,
146
164
  .tree_hash = tree_hash,
147
- .blocks = .{
148
- .buffer = .{
149
- a[0..config.block_size],
150
- b[0..config.block_size],
151
- c[0..config.block_size],
152
- },
153
- },
165
+ .blocks = .{ .buffer = blocks },
154
166
  };
155
167
  }
156
168
 
@@ -230,9 +242,9 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
230
242
  const block_reference = manifest_log.read_block_reference.?;
231
243
  verify_block(block, block_reference.checksum, block_reference.address);
232
244
 
233
- const entry_count = block_entry_count(block);
234
- const labels_used = labels_const(block)[0..entry_count];
235
- const tables_used = tables_const(block)[0..entry_count];
245
+ const entry_count = Block.entry_count(block);
246
+ const labels_used = Block.labels_const(block)[0..entry_count];
247
+ const tables_used = Block.tables_const(block)[0..entry_count];
236
248
 
237
249
  const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
238
250
 
@@ -253,7 +265,7 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
253
265
  }
254
266
  }
255
267
 
256
- if (block_entry_count(block) < entry_count_max) {
268
+ if (Block.entry_count(block) < Block.entry_count_max) {
257
269
  manifest.queue_for_compaction(block_reference.address);
258
270
  }
259
271
 
@@ -271,12 +283,14 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
271
283
  /// A move is only recorded as an insert, there is no remove from the previous level, since
272
284
  /// this is safer (no potential to get the event order wrong) and reduces fragmentation.
273
285
  pub fn insert(manifest_log: *ManifestLog, level: u7, table: *const TableInfo) void {
286
+ assert(!manifest_log.writing);
274
287
  manifest_log.append(.{ .level = level, .event = .insert }, table);
275
288
  }
276
289
 
277
290
  /// Appends the removal of a table from a level.
278
291
  /// The table must have previously been inserted to the manifest log.
279
292
  pub fn remove(manifest_log: *ManifestLog, level: u7, table: *const TableInfo) void {
293
+ assert(!manifest_log.writing);
280
294
  manifest_log.append(.{ .level = level, .event = .remove }, table);
281
295
  }
282
296
 
@@ -294,7 +308,7 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
294
308
  assert(manifest_log.blocks.count > 0);
295
309
  }
296
310
 
297
- assert(manifest_log.entry_count < entry_count_max);
311
+ assert(manifest_log.entry_count < Block.entry_count_max);
298
312
  assert(manifest_log.blocks.count - manifest_log.blocks_closed == 1);
299
313
 
300
314
  log.debug(
@@ -313,11 +327,11 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
313
327
 
314
328
  const block: BlockPtr = manifest_log.blocks.tail().?;
315
329
  const entry = manifest_log.entry_count;
316
- labels(block)[entry] = label;
317
- tables(block)[entry] = table.*;
330
+ Block.labels(block)[entry] = label;
331
+ Block.tables(block)[entry] = table.*;
318
332
 
319
333
  const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
320
- const address = block_address(block);
334
+ const address = Block.address(block);
321
335
  if (manifest.update_table_extent(table.address, address, entry)) |previous_block| {
322
336
  manifest.queue_for_compaction(previous_block);
323
337
  if (label.event == .remove) manifest.queue_for_compaction(address);
@@ -327,13 +341,12 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
327
341
  }
328
342
 
329
343
  manifest_log.entry_count += 1;
330
- if (manifest_log.entry_count == entry_count_max) {
344
+ if (manifest_log.entry_count == Block.entry_count_max) {
331
345
  manifest_log.close_block();
332
346
  assert(manifest_log.entry_count == 0);
333
347
  }
334
348
  }
335
349
 
336
- /// `flush` does not close a partial block; that is only necessary during `checkpoint`.
337
350
  fn flush(manifest_log: *ManifestLog, callback: Callback) void {
338
351
  assert(manifest_log.opened);
339
352
  assert(!manifest_log.reading);
@@ -347,6 +360,26 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
347
360
  manifest_log.tree_hash,
348
361
  manifest_log.blocks_closed,
349
362
  });
363
+
364
+ // The manifest is updated synchronously relative to the beginning of compact() and
365
+ // checkpoint() so that the SuperBlock.Manifest.append()s are deterministic relative
366
+ // to other trees' manifest logs.
367
+ const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
368
+ var i: usize = 0;
369
+ while (i < manifest_log.blocks_closed) : (i += 1) {
370
+ const block = manifest_log.blocks.get_ptr(i).?.*;
371
+ verify_block(block, null, null);
372
+
373
+ const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
374
+ const address = Block.address(block);
375
+ assert(address > 0);
376
+
377
+ manifest.append(manifest_log.tree_hash, header.checksum, address);
378
+ if (Block.entry_count(block) < Block.entry_count_max) {
379
+ manifest.queue_for_compaction(address);
380
+ }
381
+ }
382
+
350
383
  manifest_log.write_block();
351
384
  }
352
385
 
@@ -360,7 +393,7 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
360
393
  assert(manifest_log.entry_count == 0);
361
394
  } else {
362
395
  assert(manifest_log.blocks.count == 1);
363
- assert(manifest_log.entry_count < entry_count_max);
396
+ assert(manifest_log.entry_count < Block.entry_count_max);
364
397
  }
365
398
 
366
399
  const callback = manifest_log.write_callback.?;
@@ -375,16 +408,16 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
375
408
  verify_block(block, null, null);
376
409
 
377
410
  const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
378
- const address = block_address(block);
411
+ const address = Block.address(block);
379
412
  assert(address > 0);
380
413
 
381
- const entry_count = block_entry_count(block);
414
+ const entry_count = Block.entry_count(block);
382
415
 
383
416
  if (manifest_log.blocks_closed == 1 and manifest_log.blocks.count == 1) {
384
417
  // This might be the last block of a checkpoint, which can be a partial block.
385
418
  assert(entry_count > 0);
386
419
  } else {
387
- assert(entry_count == entry_count_max);
420
+ assert(entry_count == Block.entry_count_max);
388
421
  }
389
422
 
390
423
  log.debug("{}: write_block: checksum={} address={} entries={}", .{
@@ -407,18 +440,6 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
407
440
  assert(manifest_log.opened);
408
441
  assert(manifest_log.writing);
409
442
 
410
- const block = manifest_log.blocks.head().?;
411
- verify_block(block, null, null);
412
-
413
- const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
414
- const address = block_address(block);
415
- assert(address > 0);
416
-
417
- const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
418
-
419
- manifest.append(manifest_log.tree_hash, header.checksum, address);
420
- if (block_entry_count(block) < entry_count_max) manifest.queue_for_compaction(address);
421
-
422
443
  manifest_log.blocks_closed -= 1;
423
444
  manifest_log.blocks.advance_head();
424
445
  assert(manifest_log.blocks_closed <= manifest_log.blocks.count);
@@ -426,11 +447,38 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
426
447
  manifest_log.write_block();
427
448
  }
428
449
 
450
+ pub fn reserve(manifest_log: *ManifestLog) void {
451
+ assert(manifest_log.opened);
452
+ assert(!manifest_log.reading);
453
+ assert(!manifest_log.writing);
454
+ assert(manifest_log.read_callback == null);
455
+ assert(manifest_log.write_callback == null);
456
+ assert(manifest_log.grid_reservation == null);
457
+ // reserve() is called at the start of compaction, so we have:
458
+ // - at most 1 closed block, and
459
+ // - at most 1 open block
460
+ // due to the last log compaction plus a leftover partial block.
461
+ assert(manifest_log.blocks_closed <= 1);
462
+ assert(manifest_log.blocks.count <= manifest_log.blocks_closed + 1);
463
+
464
+ // TODO Make sure this cannot fail — before compaction begins verify that enough free
465
+ // blocks are available for all reservations.
466
+ // +1 for the manifest log block compaction, which acquires at most one block.
467
+ manifest_log.grid_reservation = manifest_log.grid.reserve(1 + blocks_count_appends).?;
468
+ }
469
+
470
+ /// `compact` does not close a partial block; that is only necessary during `checkpoint`.
429
471
  pub fn compact(manifest_log: *ManifestLog, callback: Callback) void {
430
472
  assert(manifest_log.opened);
431
473
  assert(!manifest_log.reading);
432
474
  assert(!manifest_log.writing);
433
475
  assert(manifest_log.read_callback == null);
476
+ assert(manifest_log.write_callback == null);
477
+ assert(manifest_log.grid_reservation != null);
478
+
479
+ const free_set = manifest_log.grid.superblock.free_set;
480
+ assert(free_set.count_free_reserved(manifest_log.grid_reservation.?) >= 1);
481
+
434
482
  manifest_log.read_callback = callback;
435
483
  manifest_log.flush(compact_flush_callback);
436
484
  }
@@ -442,12 +490,16 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
442
490
  assert(!manifest_log.reading);
443
491
  assert(!manifest_log.writing);
444
492
  assert(manifest_log.blocks_closed == 0);
493
+ assert(manifest_log.grid_reservation != null);
445
494
 
446
495
  const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
447
496
 
448
497
  // Compact a single manifest block — to minimize latency spikes, we want to do the bare
449
498
  // minimum of compaction work required.
450
499
  // TODO Compact more than 1 block if fragmentation is outstripping the compaction rate.
500
+ // (Make sure to update the grid block reservation to account for this).
501
+ // Or assert that compactions cannot update blocks fast enough to outpace manifest
502
+ // log compaction (relative to the number of updates that fit in a manifest log block).
451
503
  if (manifest.oldest_block_queued_for_compaction(manifest_log.tree_hash)) |block| {
452
504
  assert(block.tree == manifest_log.tree_hash);
453
505
  assert(block.address > 0);
@@ -464,6 +516,8 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
464
516
  );
465
517
  } else {
466
518
  manifest_log.read_callback = null;
519
+ manifest_log.grid.forfeit(manifest_log.grid_reservation.?);
520
+ manifest_log.grid_reservation = null;
467
521
  callback(manifest_log);
468
522
  }
469
523
  }
@@ -477,9 +531,9 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
477
531
  const block_reference = manifest_log.read_block_reference.?;
478
532
  verify_block(block, block_reference.checksum, block_reference.address);
479
533
 
480
- const entry_count = block_entry_count(block);
481
- const labels_used = labels_const(block)[0..entry_count];
482
- const tables_used = tables_const(block)[0..entry_count];
534
+ const entry_count = Block.entry_count(block);
535
+ const labels_used = Block.labels_const(block)[0..entry_count];
536
+ const tables_used = Block.tables_const(block)[0..entry_count];
483
537
 
484
538
  const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
485
539
  assert(manifest.tables.count() > 0);
@@ -518,7 +572,7 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
518
572
 
519
573
  // Blocks may be compacted if they contain frees, or are not completely full.
520
574
  // For example, a partial block may be flushed as part of a checkpoint.
521
- assert(frees > 0 or entry_count < entry_count_max);
575
+ assert(frees > 0 or entry_count < Block.entry_count_max);
522
576
 
523
577
  assert(manifest.queued_for_compaction(block_reference.address));
524
578
  manifest.remove(
@@ -528,7 +582,9 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
528
582
  );
529
583
  assert(!manifest.queued_for_compaction(block_reference.address));
530
584
 
531
- manifest_log.grid.release_at_checkpoint(block_reference.address);
585
+ manifest_log.grid.release(block_reference.address);
586
+ manifest_log.grid.forfeit(manifest_log.grid_reservation.?);
587
+ manifest_log.grid_reservation = null;
532
588
 
533
589
  const callback = manifest_log.read_callback.?;
534
590
  manifest_log.reading = false;
@@ -543,9 +599,7 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
543
599
  assert(!manifest_log.reading);
544
600
  assert(!manifest_log.writing);
545
601
  assert(manifest_log.write_callback == null);
546
-
547
- manifest_log.writing = true;
548
- manifest_log.write_callback = callback;
602
+ assert(manifest_log.grid_reservation == null);
549
603
 
550
604
  if (manifest_log.entry_count > 0) {
551
605
  manifest_log.close_block();
@@ -554,8 +608,7 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
554
608
  assert(manifest_log.blocks_closed == manifest_log.blocks.count);
555
609
  }
556
610
 
557
- log.debug("checkpoint: writing {} block(s)", .{manifest_log.blocks_closed});
558
- manifest_log.write_block();
611
+ manifest_log.flush(callback);
559
612
  }
560
613
 
561
614
  fn acquire_block(manifest_log: *ManifestLog) void {
@@ -571,9 +624,10 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
571
624
  const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
572
625
  header.* = .{
573
626
  .cluster = manifest_log.superblock.working.cluster,
574
- .op = manifest_log.grid.acquire(),
627
+ .op = manifest_log.grid.acquire(manifest_log.grid_reservation.?),
575
628
  .size = undefined,
576
629
  .command = .block,
630
+ .operation = BlockType.manifest.operation(),
577
631
  };
578
632
  }
579
633
 
@@ -583,31 +637,30 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
583
637
  const block: BlockPtr = manifest_log.blocks.tail().?;
584
638
  const entry_count = manifest_log.entry_count;
585
639
  assert(entry_count > 0);
586
- assert(entry_count <= entry_count_max);
640
+ assert(entry_count <= Block.entry_count_max);
587
641
 
588
642
  const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
589
643
  assert(header.cluster == manifest_log.superblock.working.cluster);
590
644
  assert(header.op > 0);
591
- header.size = block_size(entry_count);
592
645
  assert(header.command == .block);
646
+ header.size = Block.size(entry_count);
593
647
 
594
648
  // Zero unused labels:
595
- mem.set(u8, mem.sliceAsBytes(labels(block)[entry_count..]), 0);
649
+ mem.set(u8, mem.sliceAsBytes(Block.labels(block)[entry_count..]), 0);
596
650
 
597
651
  // Zero unused tables, and padding:
598
652
  mem.set(u8, block[header.size..], 0);
599
653
 
600
- header.operation = BlockType.manifest.operation();
601
654
  header.set_checksum_body(block[@sizeOf(vsr.Header)..header.size]);
602
655
  header.set_checksum();
603
656
 
604
657
  verify_block(block, null, null);
605
- assert(block_entry_count(block) == entry_count);
658
+ assert(Block.entry_count(block) == entry_count);
606
659
 
607
660
  log.debug("{}: close_block: checksum={} address={} entries={}", .{
608
661
  manifest_log.tree_hash,
609
662
  header.checksum,
610
- block_address(block),
663
+ Block.address(block),
611
664
  entry_count,
612
665
  });
613
666
 
@@ -627,65 +680,92 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
627
680
 
628
681
  assert(checksum == null or header.checksum == checksum.?);
629
682
 
630
- assert(block_address(block) > 0);
631
- assert(address == null or block_address(block) == address.?);
683
+ assert(Block.address(block) > 0);
684
+ assert(address == null or Block.address(block) == address.?);
632
685
 
633
- const entry_count = block_entry_count(block);
686
+ const entry_count = Block.entry_count(block);
634
687
  assert(entry_count > 0);
635
688
  }
689
+ };
690
+ }
691
+
692
+ fn ManifestLogBlockType(comptime Storage: type, comptime TableInfo: type) type {
693
+ return struct {
694
+ const Grid = GridType(Storage);
695
+ const BlockPtr = Grid.BlockPtr;
696
+ const BlockPtrConst = Grid.BlockPtrConst;
697
+
698
+ const block_body_size = config.block_size - @sizeOf(vsr.Header);
699
+ const entry_size = @sizeOf(Label) + @sizeOf(TableInfo);
700
+ const entry_count_max_unaligned = @divFloor(block_body_size, entry_size);
701
+ pub const entry_count_max = @divFloor(
702
+ entry_count_max_unaligned,
703
+ @alignOf(TableInfo),
704
+ ) * @alignOf(TableInfo);
705
+
706
+ comptime {
707
+ assert(entry_count_max > 0);
708
+ assert((entry_count_max * @sizeOf(Label)) % @alignOf(TableInfo) == 0);
709
+ assert((entry_count_max * @sizeOf(TableInfo)) % @alignOf(TableInfo) == 0);
710
+ }
636
711
 
637
- fn block_address(block: BlockPtrConst) u64 {
712
+ pub const Label = packed struct {
713
+ level: u7,
714
+ event: enum(u1) { insert, remove },
715
+ };
716
+
717
+ pub fn address(block: BlockPtrConst) u64 {
638
718
  const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
639
719
  assert(header.command == .block);
640
720
 
641
- const address = header.op;
642
- assert(address > 0);
643
- return address;
721
+ const block_address = header.op;
722
+ assert(block_address > 0);
723
+ return block_address;
644
724
  }
645
725
 
646
- fn block_checksum(block: BlockPtrConst) u128 {
726
+ pub fn checksum(block: BlockPtrConst) u128 {
647
727
  const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
648
728
  assert(header.command == .block);
649
729
 
650
730
  return header.checksum;
651
731
  }
652
732
 
653
- fn block_entry_count(block: BlockPtrConst) u32 {
733
+ pub fn entry_count(block: BlockPtrConst) u32 {
654
734
  const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
655
735
  assert(header.command == .block);
656
736
 
657
737
  const labels_size = entry_count_max * @sizeOf(Label);
658
738
  const tables_size = header.size - @sizeOf(vsr.Header) - labels_size;
659
739
 
660
- const entry_count = @intCast(u32, @divExact(tables_size, @sizeOf(TableInfo)));
661
- assert(entry_count > 0);
662
- assert(entry_count <= entry_count_max);
663
- return entry_count;
740
+ const entry_count_ = @intCast(u32, @divExact(tables_size, @sizeOf(TableInfo)));
741
+ assert(entry_count_ > 0);
742
+ assert(entry_count_ <= entry_count_max);
743
+ return entry_count_;
664
744
  }
665
745
 
666
- fn block_size(entry_count: u32) u32 {
667
- assert(entry_count > 0);
668
- assert(entry_count <= entry_count_max);
746
+ pub fn size(entry_count_: u32) u32 {
747
+ assert(entry_count_ > 0);
748
+ assert(entry_count_ <= entry_count_max);
669
749
 
670
750
  // Encode the smaller type first because this will be multiplied by entry_count_max.
671
751
  const labels_size = entry_count_max * @sizeOf(Label);
672
752
  assert(labels_size == labels_size_max);
673
753
  assert((@sizeOf(vsr.Header) + labels_size) % @alignOf(TableInfo) == 0);
674
- const tables_size = entry_count * @sizeOf(TableInfo);
754
+ const tables_size = entry_count_ * @sizeOf(TableInfo);
675
755
 
676
756
  return @sizeOf(vsr.Header) + labels_size + tables_size;
677
757
  }
678
758
 
679
759
  const labels_size_max = entry_count_max * @sizeOf(Label);
680
760
 
681
- fn labels(block: BlockPtr) *[entry_count_max]Label {
761
+ pub fn labels(block: BlockPtr) *[entry_count_max]Label {
682
762
  return mem.bytesAsSlice(
683
763
  Label,
684
764
  block[@sizeOf(vsr.Header)..][0..labels_size_max],
685
765
  )[0..entry_count_max];
686
766
  }
687
767
 
688
- fn labels_const(block: BlockPtrConst) *const [entry_count_max]Label {
768
+ pub fn labels_const(block: BlockPtrConst) *const [entry_count_max]Label {
689
769
  return mem.bytesAsSlice(
690
770
  Label,
691
771
  block[@sizeOf(vsr.Header)..][0..labels_size_max],
@@ -694,14 +774,14 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
694
774
 
695
775
  const tables_size_max = entry_count_max * @sizeOf(TableInfo);
696
776
 
697
- fn tables(block: BlockPtr) *[entry_count_max]TableInfo {
777
+ pub fn tables(block: BlockPtr) *[entry_count_max]TableInfo {
698
778
  return mem.bytesAsSlice(
699
779
  TableInfo,
700
780
  block[@sizeOf(vsr.Header) + labels_size_max ..][0..tables_size_max],
701
781
  )[0..entry_count_max];
702
782
  }
703
783
 
704
- fn tables_const(block: BlockPtrConst) *const [entry_count_max]TableInfo {
784
+ pub fn tables_const(block: BlockPtrConst) *const [entry_count_max]TableInfo {
705
785
  return mem.bytesAsSlice(
706
786
  TableInfo,
707
787
  block[@sizeOf(vsr.Header) + labels_size_max ..][0..tables_size_max],
@@ -709,245 +789,3 @@ pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
709
789
  }
710
790
  };
711
791
  }
712
-
713
- // TODO This is a manual runner to be replaced with a fuzz test.
714
- fn ManifestLogTestType(
715
- comptime Storage: type,
716
- comptime TableInfo: type,
717
- ) type {
718
- return struct {
719
- const ManifestLogTest = @This();
720
- const ManifestLog = ManifestLogType(Storage, TableInfo);
721
-
722
- const SuperBlock = SuperBlockType(Storage);
723
- const Grid = GridType(Storage);
724
-
725
- superblock: *SuperBlock,
726
- superblock_context: SuperBlock.Context = undefined,
727
- manifest_log: ManifestLog,
728
- pending: usize = 0,
729
-
730
- fn init(allocator: mem.Allocator, grid: *Grid) !ManifestLogTest {
731
- const tree_hash: u128 = std.math.maxInt(u128);
732
-
733
- var manifest_log = try ManifestLog.init(allocator, grid, tree_hash);
734
- errdefer manifest_log.deinit(allocator);
735
-
736
- return ManifestLogTest{
737
- .superblock = grid.superblock,
738
- .manifest_log = manifest_log,
739
- };
740
- }
741
-
742
- fn deinit(t: *ManifestLogTest, allocator: mem.Allocator) void {
743
- t.manifest_log.deinit(allocator);
744
- }
745
-
746
- fn format_superblock(t: *ManifestLogTest) void {
747
- t.pending += 1;
748
- t.superblock.format(format_superblock_callback, &t.superblock_context, .{
749
- .cluster = 10,
750
- .replica = 0,
751
- .size_max = 512 * 1024 * 1024,
752
- });
753
- }
754
-
755
- fn format_superblock_callback(context: *SuperBlock.Context) void {
756
- const t = @fieldParentPtr(ManifestLogTest, "superblock_context", context);
757
- t.pending -= 1;
758
- t.open_superblock();
759
- }
760
-
761
- fn open_superblock(t: *ManifestLogTest) void {
762
- t.pending += 1;
763
- t.superblock.open(open_superblock_callback, &t.superblock_context);
764
- }
765
-
766
- fn open_superblock_callback(context: *SuperBlock.Context) void {
767
- const t = @fieldParentPtr(ManifestLogTest, "superblock_context", context);
768
- t.pending -= 1;
769
-
770
- t.open();
771
- }
772
-
773
- fn open(t: *ManifestLogTest) void {
774
- t.pending += 1;
775
- t.manifest_log.open(open_event, open_callback);
776
- }
777
-
778
- fn open_event(manifest_log: *ManifestLog, level: u7, table: *const TableInfo) void {
779
- log.debug(
780
- "{}: open_event: level={} checksum={} address={} flags={} snapshot={}..{}",
781
- .{
782
- manifest_log.tree_hash,
783
- level,
784
- table.checksum,
785
- table.address,
786
- table.flags,
787
- table.snapshot_min,
788
- table.snapshot_max,
789
- },
790
- );
791
- }
792
-
793
- fn open_callback(manifest_log: *ManifestLog) void {
794
- const t = @fieldParentPtr(ManifestLogTest, "manifest_log", manifest_log);
795
- t.pending -= 1;
796
-
797
- t.manifest_log.insert(2, &TableInfo{
798
- .checksum = 123,
799
- .address = 7,
800
- .flags = 0,
801
- .snapshot_min = 42,
802
- .key_min = 50,
803
- .key_max = 100,
804
- });
805
-
806
- t.flush();
807
- }
808
-
809
- fn flush(t: *ManifestLogTest) void {
810
- t.pending += 1;
811
- t.manifest_log.flush(flush_callback);
812
- }
813
-
814
- fn flush_callback(manifest_log: *ManifestLog) void {
815
- const t = @fieldParentPtr(ManifestLogTest, "manifest_log", manifest_log);
816
- t.pending -= 1;
817
- t.checkpoint();
818
- }
819
-
820
- fn checkpoint(t: *ManifestLogTest) void {
821
- t.pending += 1;
822
- t.manifest_log.checkpoint(checkpoint_callback);
823
- }
824
-
825
- fn checkpoint_callback(manifest_log: *ManifestLog) void {
826
- const t = @fieldParentPtr(ManifestLogTest, "manifest_log", manifest_log);
827
- t.pending -= 1;
828
-
829
- t.manifest_log.insert(2, &TableInfo{
830
- .checksum = 123,
831
- .address = 7,
832
- .flags = 0,
833
- .snapshot_min = 42,
834
- .snapshot_max = 50,
835
- .key_min = 50,
836
- .key_max = 100,
837
- });
838
-
839
- t.manifest_log.remove(2, &TableInfo{
840
- .checksum = 123,
841
- .address = 7,
842
- .flags = 0,
843
- .snapshot_min = 42,
844
- .snapshot_max = 50,
845
- .key_min = 50,
846
- .key_max = 100,
847
- });
848
-
849
- t.checkpoint_again();
850
- }
851
-
852
- fn checkpoint_again(t: *ManifestLogTest) void {
853
- t.pending += 1;
854
- t.manifest_log.checkpoint(checkpoint_again_callback);
855
- }
856
-
857
- fn checkpoint_again_callback(manifest_log: *ManifestLog) void {
858
- const t = @fieldParentPtr(ManifestLogTest, "manifest_log", manifest_log);
859
- t.pending -= 1;
860
- t.compact();
861
- }
862
-
863
- fn compact(t: *ManifestLogTest) void {
864
- t.pending += 1;
865
- t.manifest_log.compact(compact_callback);
866
- }
867
-
868
- fn compact_callback(manifest_log: *ManifestLog) void {
869
- const t = @fieldParentPtr(ManifestLogTest, "manifest_log", manifest_log);
870
- t.pending -= 1;
871
-
872
- const tree = t.manifest_log.tree_hash;
873
- if (t.manifest_log.superblock.manifest.oldest_block_queued_for_compaction(tree)) |_| {
874
- t.compact();
875
- } else {
876
- t.checkpoint_superblock();
877
- }
878
- }
879
-
880
- fn checkpoint_superblock(t: *ManifestLogTest) void {
881
- t.pending += 1;
882
- t.superblock.checkpoint(checkpoint_superblock_callback, &t.superblock_context);
883
- }
884
-
885
- fn checkpoint_superblock_callback(context: *SuperBlock.Context) void {
886
- const t = @fieldParentPtr(ManifestLogTest, "superblock_context", context);
887
- t.pending -= 1;
888
- }
889
- };
890
- }
891
-
892
- pub fn main() !void {
893
- const testing = std.testing;
894
- const allocator = testing.allocator;
895
-
896
- testing.log_level = .debug;
897
-
898
- const os = std.os;
899
- const IO = @import("../io.zig").IO;
900
- const Storage = @import("../storage.zig").Storage;
901
- const SuperBlock = SuperBlockType(Storage);
902
- const Grid = @import("grid.zig").GridType(Storage);
903
-
904
- const dir_path = ".";
905
- const dir_fd = os.openZ(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0) catch |err| {
906
- std.debug.print("failed to open directory '{s}': {}", .{ dir_path, err });
907
- return;
908
- };
909
-
910
- const size_max = 2 * 1024 * 1024 * 1024;
911
- const storage_fd = try IO.open_file(dir_fd, "test_manifest_log", size_max, true);
912
- defer std.fs.cwd().deleteFile("test_manifest_log") catch {};
913
-
914
- var io = try IO.init(128, 0);
915
- defer io.deinit();
916
-
917
- var storage = try Storage.init(&io, storage_fd);
918
- defer storage.deinit();
919
-
920
- var superblock = try SuperBlock.init(allocator, &storage);
921
- defer superblock.deinit(allocator);
922
-
923
- var grid = try Grid.init(allocator, &superblock);
924
- defer grid.deinit(allocator);
925
-
926
- const TableInfo = extern struct {
927
- checksum: u128,
928
- address: u64,
929
- flags: u64 = 0,
930
-
931
- /// The minimum snapshot that can see this table (with exclusive bounds).
932
- /// This value is set to the current snapshot tick on table creation.
933
- snapshot_min: u64,
934
-
935
- /// The maximum snapshot that can see this table (with exclusive bounds).
936
- /// This value is set to the current snapshot tick on table deletion.
937
- snapshot_max: u64 = math.maxInt(u64),
938
-
939
- key_min: u128,
940
- key_max: u128,
941
- };
942
- assert(@sizeOf(TableInfo) == 48 + 16 * 2);
943
- assert(@alignOf(TableInfo) == 16);
944
- assert(@bitSizeOf(TableInfo) == @sizeOf(TableInfo) * 8);
945
-
946
- const ManifestLogTest = ManifestLogTestType(Storage, TableInfo);
947
-
948
- var t = try ManifestLogTest.init(allocator, &grid);
949
- defer t.deinit(allocator);
950
-
951
- t.format_superblock();
952
- while (t.pending > 0) try io.run_for_ns(100);
953
- }