tigerbeetle-node 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +305 -103
  2. package/dist/index.d.ts +70 -67
  3. package/dist/index.js +70 -67
  4. package/dist/index.js.map +1 -1
  5. package/package.json +6 -6
  6. package/scripts/download_node_headers.sh +14 -7
  7. package/src/index.ts +11 -10
  8. package/src/node.zig +22 -20
  9. package/src/tigerbeetle/scripts/benchmark.bat +4 -3
  10. package/src/tigerbeetle/scripts/benchmark.sh +25 -10
  11. package/src/tigerbeetle/scripts/confirm_image.sh +44 -0
  12. package/src/tigerbeetle/scripts/fuzz_loop.sh +15 -0
  13. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +7 -0
  14. package/src/tigerbeetle/scripts/install.sh +20 -4
  15. package/src/tigerbeetle/scripts/install_zig.bat +5 -1
  16. package/src/tigerbeetle/scripts/install_zig.sh +32 -26
  17. package/src/tigerbeetle/scripts/pre-commit.sh +9 -0
  18. package/src/tigerbeetle/scripts/shellcheck.sh +5 -0
  19. package/src/tigerbeetle/scripts/tests_on_alpine.sh +10 -0
  20. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +14 -0
  21. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +12 -3
  22. package/src/tigerbeetle/src/benchmark.zig +19 -9
  23. package/src/tigerbeetle/src/benchmark_array_search.zig +317 -0
  24. package/src/tigerbeetle/src/benchmarks/perf.zig +299 -0
  25. package/src/tigerbeetle/src/c/tb_client/context.zig +103 -0
  26. package/src/tigerbeetle/src/c/tb_client/packet.zig +80 -0
  27. package/src/tigerbeetle/src/c/tb_client/signal.zig +288 -0
  28. package/src/tigerbeetle/src/c/tb_client/thread.zig +328 -0
  29. package/src/tigerbeetle/src/c/tb_client.h +221 -0
  30. package/src/tigerbeetle/src/c/tb_client.zig +104 -0
  31. package/src/tigerbeetle/src/c/test.zig +1 -0
  32. package/src/tigerbeetle/src/cli.zig +143 -84
  33. package/src/tigerbeetle/src/config.zig +161 -20
  34. package/src/tigerbeetle/src/demo.zig +14 -8
  35. package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +2 -2
  36. package/src/tigerbeetle/src/ewah.zig +318 -0
  37. package/src/tigerbeetle/src/ewah_benchmark.zig +121 -0
  38. package/src/tigerbeetle/src/eytzinger_benchmark.zig +317 -0
  39. package/src/tigerbeetle/src/fifo.zig +17 -1
  40. package/src/tigerbeetle/src/io/darwin.zig +12 -10
  41. package/src/tigerbeetle/src/io/linux.zig +25 -9
  42. package/src/tigerbeetle/src/io/windows.zig +13 -9
  43. package/src/tigerbeetle/src/iops.zig +101 -0
  44. package/src/tigerbeetle/src/lsm/README.md +214 -0
  45. package/src/tigerbeetle/src/lsm/binary_search.zig +341 -0
  46. package/src/tigerbeetle/src/lsm/bloom_filter.zig +125 -0
  47. package/src/tigerbeetle/src/lsm/compaction.zig +557 -0
  48. package/src/tigerbeetle/src/lsm/composite_key.zig +77 -0
  49. package/src/tigerbeetle/src/lsm/direction.zig +11 -0
  50. package/src/tigerbeetle/src/lsm/eytzinger.zig +587 -0
  51. package/src/tigerbeetle/src/lsm/forest.zig +204 -0
  52. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +412 -0
  53. package/src/tigerbeetle/src/lsm/grid.zig +549 -0
  54. package/src/tigerbeetle/src/lsm/groove.zig +1002 -0
  55. package/src/tigerbeetle/src/lsm/k_way_merge.zig +474 -0
  56. package/src/tigerbeetle/src/lsm/level_iterator.zig +315 -0
  57. package/src/tigerbeetle/src/lsm/manifest.zig +580 -0
  58. package/src/tigerbeetle/src/lsm/manifest_level.zig +925 -0
  59. package/src/tigerbeetle/src/lsm/manifest_log.zig +953 -0
  60. package/src/tigerbeetle/src/lsm/node_pool.zig +231 -0
  61. package/src/tigerbeetle/src/lsm/posted_groove.zig +387 -0
  62. package/src/tigerbeetle/src/lsm/segmented_array.zig +1318 -0
  63. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +148 -0
  64. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +9 -0
  65. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +894 -0
  66. package/src/tigerbeetle/src/lsm/table.zig +967 -0
  67. package/src/tigerbeetle/src/lsm/table_immutable.zig +203 -0
  68. package/src/tigerbeetle/src/lsm/table_iterator.zig +306 -0
  69. package/src/tigerbeetle/src/lsm/table_mutable.zig +174 -0
  70. package/src/tigerbeetle/src/lsm/test.zig +423 -0
  71. package/src/tigerbeetle/src/lsm/tree.zig +1090 -0
  72. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +457 -0
  73. package/src/tigerbeetle/src/main.zig +141 -109
  74. package/src/tigerbeetle/src/message_bus.zig +49 -48
  75. package/src/tigerbeetle/src/message_pool.zig +22 -12
  76. package/src/tigerbeetle/src/ring_buffer.zig +126 -30
  77. package/src/tigerbeetle/src/simulator.zig +205 -140
  78. package/src/tigerbeetle/src/state_machine.zig +1268 -721
  79. package/src/tigerbeetle/src/static_allocator.zig +65 -0
  80. package/src/tigerbeetle/src/storage.zig +40 -14
  81. package/src/tigerbeetle/src/test/accounting/auditor.zig +577 -0
  82. package/src/tigerbeetle/src/test/accounting/workload.zig +819 -0
  83. package/src/tigerbeetle/src/test/cluster.zig +104 -88
  84. package/src/tigerbeetle/src/test/conductor.zig +365 -0
  85. package/src/tigerbeetle/src/test/fuzz.zig +121 -0
  86. package/src/tigerbeetle/src/test/id.zig +89 -0
  87. package/src/tigerbeetle/src/test/message_bus.zig +15 -24
  88. package/src/tigerbeetle/src/test/network.zig +26 -17
  89. package/src/tigerbeetle/src/test/priority_queue.zig +645 -0
  90. package/src/tigerbeetle/src/test/state_checker.zig +94 -68
  91. package/src/tigerbeetle/src/test/state_machine.zig +135 -69
  92. package/src/tigerbeetle/src/test/storage.zig +78 -28
  93. package/src/tigerbeetle/src/tigerbeetle.zig +19 -16
  94. package/src/tigerbeetle/src/unit_tests.zig +15 -0
  95. package/src/tigerbeetle/src/util.zig +51 -0
  96. package/src/tigerbeetle/src/vopr.zig +494 -0
  97. package/src/tigerbeetle/src/vopr_hub/README.md +58 -0
  98. package/src/tigerbeetle/src/vopr_hub/SETUP.md +199 -0
  99. package/src/tigerbeetle/src/vopr_hub/go.mod +3 -0
  100. package/src/tigerbeetle/src/vopr_hub/main.go +1022 -0
  101. package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +3 -0
  102. package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +403 -0
  103. package/src/tigerbeetle/src/vsr/client.zig +34 -7
  104. package/src/tigerbeetle/src/vsr/journal.zig +164 -174
  105. package/src/tigerbeetle/src/vsr/replica.zig +1602 -651
  106. package/src/tigerbeetle/src/vsr/superblock.zig +1761 -0
  107. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +255 -0
  108. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +644 -0
  109. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +561 -0
  110. package/src/tigerbeetle/src/vsr.zig +118 -170
  111. package/src/tigerbeetle/scripts/vopr.bat +0 -48
  112. package/src/tigerbeetle/scripts/vopr.sh +0 -33
@@ -9,10 +9,11 @@ const config = @import("../config.zig");
9
9
  const Message = @import("../message_pool.zig").MessagePool.Message;
10
10
  const vsr = @import("../vsr.zig");
11
11
  const Header = vsr.Header;
12
+ const IOPS = @import("../iops.zig").IOPS;
12
13
 
13
14
  const log = std.log.scoped(.journal);
14
15
 
15
- /// There are two contiguous circular buffers on disk in the journal storage zone.
16
+ /// There are two contiguous circular buffers on disk in the journal storage zone (`vsr.Zone.wal`).
16
17
  ///
17
18
  /// In both rings, the `op` for each reserved header is set to the slot index.
18
19
  /// This helps WAL recovery detect misdirected reads/writes.
@@ -215,7 +216,8 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
215
216
  recovering: bool = false,
216
217
 
217
218
  pub fn init(allocator: Allocator, storage: *Storage, replica: u8) !Self {
218
- assert(write_ahead_log_zone_size <= storage.size);
219
+ // TODO Fix this assertion:
220
+ // assert(write_ahead_log_zone_size <= storage.size);
219
221
 
220
222
  var headers = try allocator.allocAdvanced(
221
223
  Header,
@@ -324,11 +326,11 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
324
326
  assert(!self.recovering);
325
327
  assert(self.recovered);
326
328
  assert(self.writes.executing() == 0);
327
- assert(self.headers[0].valid_checksum());
328
329
 
329
- const replica = @fieldParentPtr(Replica, "journal", self);
330
+ if (!self.headers[0].valid_checksum()) return false;
330
331
  if (self.headers[0].operation != .root) return false;
331
332
 
333
+ const replica = @fieldParentPtr(Replica, "journal", self);
332
334
  assert(self.headers[0].checksum == Header.root_prepare(replica.cluster).checksum);
333
335
  assert(self.headers[0].checksum == self.prepare_checksums[0]);
334
336
  assert(self.prepare_inhabited[0]);
@@ -378,14 +380,15 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
378
380
  return self.slot_with_op(header.op);
379
381
  }
380
382
 
381
- /// Returns any existing entry at the location indicated by header.op.
382
- /// This existing entry may have an older or newer op number.
383
- pub fn header_for_entry(self: *const Self, header: *const Header) ?*const Header {
383
+ /// Returns any existing header at the location indicated by header.op.
384
+ /// The existing header may have an older or newer op number.
385
+ pub fn header_for_prepare(self: *const Self, header: *const Header) ?*const Header {
384
386
  assert(header.command == .prepare);
385
387
  return self.header_for_op(header.op);
386
388
  }
387
389
 
388
390
  /// We use `op` directly to index into the headers array and locate ops without a scan.
391
+ /// The existing header may have an older or newer op number.
389
392
  pub fn header_for_op(self: *const Self, op: u64) ?*const Header {
390
393
  // TODO Snapshots
391
394
  const slot = self.slot_for_op(op);
@@ -508,7 +511,8 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
508
511
 
509
512
  /// Copies latest headers between `op_min` and `op_max` (both inclusive) as fit in `dest`.
510
513
  /// Reverses the order when copying so that latest headers are copied first, which protects
511
- /// against the callsite slicing the buffer the wrong way and incorrectly.
514
+ /// against the callsite slicing the buffer the wrong way and incorrectly, and which is
515
+ /// required by message handlers that use the hash chain for repairs.
512
516
  /// Skips .reserved headers (gaps between headers).
513
517
  /// Zeroes the `dest` buffer in case the copy would underflow and leave a buffer bleed.
514
518
  /// Returns the number of headers actually copied.
@@ -668,6 +672,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
668
672
  return range;
669
673
  }
670
674
 
675
+ /// Read a prepare from disk. There must be a matching in-memory header.
671
676
  pub fn read_prepare(
672
677
  self: *Self,
673
678
  callback: fn (replica: *Replica, prepare: ?*Message, destination_replica: ?u8) void,
@@ -685,40 +690,20 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
685
690
  return;
686
691
  }
687
692
 
688
- // Do not use this pointer beyond this function's scope, as the
689
- // header memory may then change:
690
- const exact = self.header_with_op_and_checksum(op, checksum) orelse {
693
+ const slot = self.slot_with_op_and_checksum(op, checksum) orelse {
691
694
  self.read_prepare_log(op, checksum, "no entry exactly");
692
695
  callback(replica, null, null);
693
696
  return;
694
697
  };
695
698
 
696
- const slot = self.slot_with_op_and_checksum(op, checksum).?;
697
- if (self.faulty.bit(slot)) {
698
- assert(self.dirty.bit(slot));
699
-
700
- self.read_prepare_log(op, checksum, "faulty");
701
- callback(replica, null, null);
702
- return;
703
- }
704
-
705
- if (self.dirty.bit(slot)) {
706
- self.read_prepare_log(op, checksum, "dirty");
699
+ if (self.prepare_inhabited[slot.index] and
700
+ self.prepare_checksums[slot.index] == checksum)
701
+ {
702
+ self.read_prepare_with_op_and_checksum(callback, op, checksum, destination_replica);
703
+ } else {
704
+ self.read_prepare_log(op, checksum, "no matching prepare");
707
705
  callback(replica, null, null);
708
- return;
709
706
  }
710
-
711
- // Skip the disk read if the header is all we need:
712
- if (exact.size == @sizeOf(Header)) {
713
- const message = replica.message_bus.get_message();
714
- defer replica.message_bus.unref(message);
715
-
716
- message.header.* = exact.*;
717
- callback(replica, message, destination_replica);
718
- return;
719
- }
720
-
721
- self.read_prepare_with_op_and_checksum(callback, op, checksum, destination_replica);
722
707
  }
723
708
 
724
709
  /// Read a prepare from disk. There may or may not be an in-memory header.
@@ -738,6 +723,18 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
738
723
  const message = replica.message_bus.get_message();
739
724
  defer replica.message_bus.unref(message);
740
725
 
726
+ // If the header is in-memory, we can skip the read from the disk.
727
+ if (self.header_with_op_and_checksum(op, checksum)) |exact| {
728
+ if (exact.size == @sizeOf(Header)) {
729
+ message.header.* = exact.*;
730
+ // Normally the message's padding would have been zeroed by the MessageBus,
731
+ // but we are copying (only) a message header into a new buffer.
732
+ std.mem.set(u8, message.buffer[@sizeOf(Header)..config.sector_size], 0);
733
+ callback(replica, message, destination_replica);
734
+ return;
735
+ }
736
+ }
737
+
741
738
  const read = self.reads.acquire() orelse {
742
739
  self.read_prepare_log(op, checksum, "waiting for IOP");
743
740
  callback(replica, null, null);
@@ -755,12 +752,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
755
752
  };
756
753
 
757
754
  const buffer: []u8 = message.buffer[0..config.message_size_max];
758
- const offset = offset_physical(.prepares, slot);
759
-
760
- log.debug(
761
- "{}: read_sectors: offset={} len={}",
762
- .{ replica.replica, offset, buffer.len },
763
- );
755
+ const offset = offset_logical(.prepares, slot);
764
756
 
765
757
  // Memory must not be owned by `self.headers` as these may be modified concurrently:
766
758
  assert(@ptrToInt(buffer.ptr) < @ptrToInt(self.headers.ptr) or
@@ -771,6 +763,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
771
763
  read_prepare_with_op_and_checksum_callback,
772
764
  &read.completion,
773
765
  buffer,
766
+ .wal,
774
767
  offset,
775
768
  );
776
769
  }
@@ -818,6 +811,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
818
811
  read.callback(replica, null, null);
819
812
  return;
820
813
  }
814
+ assert(read.message.header.invalid() == null);
821
815
 
822
816
  if (read.message.header.cluster != replica.cluster) {
823
817
  // This could be caused by a misdirected read or write.
@@ -940,7 +934,8 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
940
934
  recover_headers_callback,
941
935
  &read.completion,
942
936
  buffer,
943
- offset_physical_for_logical(.headers, offset),
937
+ .wal,
938
+ offset,
944
939
  );
945
940
  }
946
941
 
@@ -987,7 +982,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
987
982
  self.recover_headers(offset_next);
988
983
  }
989
984
 
990
- fn recover_headers_buffer(message: *Message, offset: u64) []u8 {
985
+ fn recover_headers_buffer(message: *Message, offset: u64) []align(@alignOf(Header)) u8 {
991
986
  const max = std.math.min(message.buffer.len, headers_size - offset);
992
987
  assert(max % config.sector_size == 0);
993
988
  assert(max % @sizeOf(Header) == 0);
@@ -1034,7 +1029,8 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
1034
1029
  // We load the entire message to verify that it isn't torn or corrupt.
1035
1030
  // We don't know the message's size, so use the entire buffer.
1036
1031
  message.buffer[0..config.message_size_max],
1037
- offset_physical(.prepares, slot),
1032
+ .wal,
1033
+ offset_logical(.prepares, slot),
1038
1034
  );
1039
1035
  }
1040
1036
 
@@ -1450,6 +1446,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
1450
1446
  header.op,
1451
1447
  header.checksum,
1452
1448
  });
1449
+
1453
1450
  const slot = self.slot_for_header(header);
1454
1451
 
1455
1452
  if (self.has(header)) {
@@ -1513,7 +1510,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
1513
1510
 
1514
1511
  // Slice the message to the nearest sector, we don't want to write the whole buffer:
1515
1512
  const buffer = message.buffer[0..vsr.sector_ceil(message.header.size)];
1516
- const offset = offset_physical(.prepares, slot);
1513
+ const offset = offset_logical(.prepares, slot);
1517
1514
 
1518
1515
  if (builtin.mode == .Debug) {
1519
1516
  // Assert that any sector padding has already been zeroed:
@@ -1587,9 +1584,8 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
1587
1584
  .index = @divFloor(slot_of_message.index, headers_per_sector) * headers_per_sector,
1588
1585
  };
1589
1586
 
1590
- const offset = offset_physical(.headers, slot_of_message);
1587
+ const offset = offset_logical(.headers, slot_of_message);
1591
1588
  assert(offset % config.sector_size == 0);
1592
- assert(offset == slot_first.index * @sizeOf(Header));
1593
1589
 
1594
1590
  const buffer: []u8 = write.header_sector(self);
1595
1591
  const buffer_headers = std.mem.bytesAsSlice(Header, buffer);
@@ -1739,48 +1735,28 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
1739
1735
  .prepares => {
1740
1736
  const offset = config.message_size_max * slot.index;
1741
1737
  assert(offset < prepares_size);
1742
- return offset;
1738
+ return offset + config.journal_size_headers;
1743
1739
  },
1744
1740
  }
1745
1741
  }
1746
1742
 
1747
- fn offset_physical(ring: Ring, slot: Slot) u64 {
1748
- return switch (ring) {
1749
- .headers => offset_logical(.headers, slot),
1750
- .prepares => headers_size + offset_logical(.prepares, slot),
1751
- };
1752
- }
1753
-
1754
1743
  fn offset_logical_in_headers_for_message(self: *const Self, message: *Message) u64 {
1755
1744
  return offset_logical(.headers, self.slot_for_header(message.header));
1756
1745
  }
1757
1746
 
1758
- /// Where `offset` is a logical offset relative to the start of the respective ring.
1759
- fn offset_physical_for_logical(ring: Ring, offset: u64) u64 {
1760
- switch (ring) {
1761
- .headers => {
1762
- assert(offset < headers_size);
1763
- return offset;
1764
- },
1765
- .prepares => {
1766
- assert(offset < prepares_size);
1767
- return headers_size + offset;
1768
- },
1769
- }
1770
- }
1771
-
1747
+ // TODO Add a `Ring` argument, and make the offset relative to that.
1772
1748
  fn write_sectors(
1773
1749
  self: *Self,
1774
1750
  callback: fn (write: *Self.Write) void,
1775
1751
  write: *Self.Write,
1776
1752
  buffer: []const u8,
1777
- offset: u64,
1753
+ offset_in_wal: u64,
1778
1754
  ) void {
1779
1755
  write.range = .{
1780
1756
  .callback = callback,
1781
1757
  .completion = undefined,
1782
1758
  .buffer = buffer,
1783
- .offset = offset,
1759
+ .offset = offset_in_wal,
1784
1760
  .locked = false,
1785
1761
  };
1786
1762
  self.lock_sectors(write);
@@ -1816,6 +1792,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
1816
1792
  write_sectors_on_write,
1817
1793
  &write.range.completion,
1818
1794
  write.range.buffer,
1795
+ .wal,
1819
1796
  write.range.offset,
1820
1797
  );
1821
1798
  // We rely on the Storage.write_sectors() implementation being always synchronous,
@@ -1856,10 +1833,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
1856
1833
  self.lock_sectors(@fieldParentPtr(Self.Write, "range", waiting));
1857
1834
  }
1858
1835
 
1859
- // The callback may set range, so we can't set range to undefined after the callback.
1860
- const callback = range.callback;
1861
- range.* = undefined;
1862
- callback(write);
1836
+ range.callback(write);
1863
1837
  }
1864
1838
 
1865
1839
  pub fn writing(self: *Self, op: u64, checksum: u128) bool {
@@ -1921,105 +1895,6 @@ pub const BitSet = struct {
1921
1895
  }
1922
1896
  };
1923
1897
 
1924
- /// Take a u6 to limit to 64 items max (2^6 = 64)
1925
- pub fn IOPS(comptime T: type, comptime size: u6) type {
1926
- const Map = std.StaticBitSet(size);
1927
- return struct {
1928
- const Self = @This();
1929
-
1930
- items: [size]T = undefined,
1931
- /// 1 bits are free items.
1932
- free: Map = Map.initFull(),
1933
-
1934
- pub fn acquire(self: *Self) ?*T {
1935
- const i = self.free.findFirstSet() orelse return null;
1936
- self.free.unset(i);
1937
- return &self.items[i];
1938
- }
1939
-
1940
- pub fn release(self: *Self, item: *T) void {
1941
- item.* = undefined;
1942
- const i = (@ptrToInt(item) - @ptrToInt(&self.items)) / @sizeOf(T);
1943
- assert(!self.free.isSet(i));
1944
- self.free.set(i);
1945
- }
1946
-
1947
- /// Returns the count of IOPs available.
1948
- pub fn available(self: *const Self) usize {
1949
- return self.free.count();
1950
- }
1951
-
1952
- /// Returns the count of IOPs in use.
1953
- pub fn executing(self: *const Self) usize {
1954
- return size - self.available();
1955
- }
1956
-
1957
- pub const Iterator = struct {
1958
- iops: *Self,
1959
- bitset_iterator: Map.Iterator(.{ .kind = .unset }),
1960
-
1961
- pub fn next(iterator: *@This()) ?*T {
1962
- const i = iterator.bitset_iterator.next() orelse return null;
1963
- return &iterator.iops.items[i];
1964
- }
1965
- };
1966
-
1967
- pub fn iterate(self: *Self) Iterator {
1968
- return .{
1969
- .iops = self,
1970
- .bitset_iterator = self.free.iterator(.{ .kind = .unset }),
1971
- };
1972
- }
1973
- };
1974
- }
1975
-
1976
- test "IOPS" {
1977
- const testing = std.testing;
1978
- var iops = IOPS(u32, 4){};
1979
-
1980
- try testing.expectEqual(@as(usize, 4), iops.available());
1981
- try testing.expectEqual(@as(usize, 0), iops.executing());
1982
-
1983
- var one = iops.acquire().?;
1984
-
1985
- try testing.expectEqual(@as(usize, 3), iops.available());
1986
- try testing.expectEqual(@as(usize, 1), iops.executing());
1987
-
1988
- var two = iops.acquire().?;
1989
- var three = iops.acquire().?;
1990
-
1991
- try testing.expectEqual(@as(usize, 1), iops.available());
1992
- try testing.expectEqual(@as(usize, 3), iops.executing());
1993
-
1994
- var four = iops.acquire().?;
1995
- try testing.expectEqual(@as(?*u32, null), iops.acquire());
1996
-
1997
- try testing.expectEqual(@as(usize, 0), iops.available());
1998
- try testing.expectEqual(@as(usize, 4), iops.executing());
1999
-
2000
- iops.release(two);
2001
-
2002
- try testing.expectEqual(@as(usize, 1), iops.available());
2003
- try testing.expectEqual(@as(usize, 3), iops.executing());
2004
-
2005
- // there is only one slot free, so we will get the same pointer back.
2006
- try testing.expectEqual(@as(?*u32, two), iops.acquire());
2007
-
2008
- iops.release(four);
2009
- iops.release(two);
2010
- iops.release(one);
2011
- iops.release(three);
2012
-
2013
- try testing.expectEqual(@as(usize, 4), iops.available());
2014
- try testing.expectEqual(@as(usize, 0), iops.executing());
2015
-
2016
- one = iops.acquire().?;
2017
- two = iops.acquire().?;
2018
- three = iops.acquire().?;
2019
- four = iops.acquire().?;
2020
- try testing.expectEqual(@as(?*u32, null), iops.acquire());
2021
- }
2022
-
2023
1898
  /// @B and @C:
2024
1899
  /// This prepare header is corrupt.
2025
1900
  /// We may have a valid redundant header, but need to recover the full message.
@@ -2291,3 +2166,118 @@ test "recovery_cases" {
2291
2166
  if (case_match == null) @panic("no matching case");
2292
2167
  }
2293
2168
  }
2169
+
2170
+ /// Format part of a new WAL, writing to `target`.
2171
+ ///
2172
+ /// `offset_logical` is relative to the beginning of the WAL.
2173
+ /// Returns the number of bytes written to `target`.
2174
+ pub fn format_journal(cluster: u32, offset_logical: u64, target: []u8) usize {
2175
+ assert(offset_logical <= config.journal_size_max);
2176
+ assert(offset_logical % config.sector_size == 0);
2177
+ assert(target.len > 0);
2178
+ assert(target.len % config.sector_size == 0);
2179
+
2180
+ const sector_max = @divExact(config.journal_size_max, config.sector_size);
2181
+ var sectors = std.mem.bytesAsSlice([config.sector_size]u8, target);
2182
+ for (sectors) |*sector_data, i| {
2183
+ const sector = @divExact(offset_logical, config.sector_size) + i;
2184
+ if (sector == sector_max) {
2185
+ if (i == 0) {
2186
+ assert(offset_logical == config.journal_size_max);
2187
+ }
2188
+ return i * config.sector_size;
2189
+ } else {
2190
+ format_journal_sector(cluster, sector, sector_data);
2191
+ }
2192
+ }
2193
+ return target.len;
2194
+ }
2195
+
2196
+ fn format_journal_sector(cluster: u32, sector: usize, sector_data: *[config.sector_size]u8) void {
2197
+ assert(sector < @divExact(config.journal_size_max, config.sector_size));
2198
+
2199
+ var sector_headers = std.mem.bytesAsSlice(Header, sector_data);
2200
+
2201
+ if (sector * headers_per_sector < slot_count) {
2202
+ for (sector_headers) |*header, i| {
2203
+ const slot = sector * headers_per_sector + i;
2204
+ if (sector == 0 and i == 0) {
2205
+ header.* = Header.root_prepare(cluster);
2206
+ assert(header.op == 0);
2207
+ assert(header.command == .prepare);
2208
+ assert(header.operation == .root);
2209
+ } else {
2210
+ header.* = Header.reserved(cluster, slot);
2211
+ }
2212
+ }
2213
+ return;
2214
+ }
2215
+
2216
+ const sectors_per_message = @divExact(config.message_size_max, config.sector_size);
2217
+ const sector_in_prepares = sector - @divExact(slot_count, headers_per_sector);
2218
+ const message_slot = @divFloor(sector_in_prepares, sectors_per_message);
2219
+ assert(message_slot < slot_count);
2220
+
2221
+ std.mem.set(u8, sector_data, 0);
2222
+ if (sector_in_prepares % sectors_per_message == 0) {
2223
+ // The header goes in the first sector of the message.
2224
+ if (message_slot == 0) {
2225
+ sector_headers[0] = Header.root_prepare(cluster);
2226
+ } else {
2227
+ sector_headers[0] = Header.reserved(cluster, message_slot);
2228
+ }
2229
+ }
2230
+ }
2231
+
2232
+ test "format_journal" {
2233
+ const cluster = 123;
2234
+ const write_sizes = [_]usize{
2235
+ config.sector_size,
2236
+ config.sector_size * 2,
2237
+ config.sector_size * 3,
2238
+ config.journal_size_max,
2239
+ };
2240
+
2241
+ for (write_sizes) |write_size_max| {
2242
+ const wal_data = try std.testing.allocator.alignedAlloc(u8, @alignOf(Header), config.journal_size_max);
2243
+ defer std.testing.allocator.free(wal_data);
2244
+
2245
+ const write_data = try std.testing.allocator.alloc(u8, write_size_max);
2246
+ defer std.testing.allocator.free(write_data);
2247
+
2248
+ const headers_ring = std.mem.bytesAsSlice(Header, wal_data[0..config.journal_size_headers]);
2249
+ const prepare_ring = std.mem.bytesAsSlice([config.message_size_max]u8, wal_data[config.journal_size_headers..]);
2250
+ try std.testing.expectEqual(@as(usize, config.journal_slot_count), headers_ring.len);
2251
+ try std.testing.expectEqual(@as(usize, config.journal_slot_count), prepare_ring.len);
2252
+
2253
+ var offset: u64 = 0;
2254
+ while (true) {
2255
+ const write_size = format_journal(cluster, offset, write_data);
2256
+ if (write_size == 0) break;
2257
+ std.mem.copy(u8, wal_data[offset..][0..write_size], write_data[0..write_size]);
2258
+ offset += write_size;
2259
+ }
2260
+
2261
+ for (headers_ring) |*header, slot| {
2262
+ try std.testing.expect(header.valid_checksum());
2263
+ try std.testing.expect(header.valid_checksum_body(&[0]u8{}));
2264
+ try std.testing.expectEqual(header.invalid(), null);
2265
+ try std.testing.expectEqual(header.cluster, cluster);
2266
+ try std.testing.expectEqual(header.op, slot);
2267
+ try std.testing.expectEqual(header.size, @sizeOf(Header));
2268
+ if (slot == 0) {
2269
+ try std.testing.expectEqual(header.command, .prepare);
2270
+ try std.testing.expectEqual(header.operation, .root);
2271
+ } else {
2272
+ try std.testing.expectEqual(header.command, .reserved);
2273
+ }
2274
+
2275
+ const prepare_bytes = prepare_ring[slot];
2276
+ const prepare_header = std.mem.bytesAsValue(Header, prepare_bytes[0..@sizeOf(Header)]);
2277
+ const prepare_body = prepare_bytes[@sizeOf(Header)..];
2278
+
2279
+ try std.testing.expectEqual(header.*, prepare_header.*);
2280
+ for (prepare_body) |byte| try std.testing.expectEqual(byte, 0);
2281
+ }
2282
+ }
2283
+ }