tigerbeetle-node 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +302 -101
  2. package/dist/index.d.ts +70 -72
  3. package/dist/index.js +70 -72
  4. package/dist/index.js.map +1 -1
  5. package/package.json +6 -6
  6. package/scripts/download_node_headers.sh +14 -7
  7. package/src/index.ts +6 -10
  8. package/src/node.zig +6 -3
  9. package/src/tigerbeetle/scripts/benchmark.sh +4 -4
  10. package/src/tigerbeetle/scripts/confirm_image.sh +44 -0
  11. package/src/tigerbeetle/scripts/fuzz_loop.sh +15 -0
  12. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +7 -0
  13. package/src/tigerbeetle/scripts/install.sh +19 -4
  14. package/src/tigerbeetle/scripts/install_zig.bat +5 -1
  15. package/src/tigerbeetle/scripts/install_zig.sh +24 -14
  16. package/src/tigerbeetle/scripts/pre-commit.sh +9 -0
  17. package/src/tigerbeetle/scripts/shellcheck.sh +5 -0
  18. package/src/tigerbeetle/scripts/tests_on_alpine.sh +10 -0
  19. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +14 -0
  20. package/src/tigerbeetle/src/benchmark.zig +4 -2
  21. package/src/tigerbeetle/src/benchmark_array_search.zig +3 -3
  22. package/src/tigerbeetle/src/c/tb_client/thread.zig +8 -9
  23. package/src/tigerbeetle/src/c/tb_client.h +100 -80
  24. package/src/tigerbeetle/src/c/tb_client.zig +4 -1
  25. package/src/tigerbeetle/src/cli.zig +1 -1
  26. package/src/tigerbeetle/src/config.zig +48 -16
  27. package/src/tigerbeetle/src/demo.zig +3 -1
  28. package/src/tigerbeetle/src/eytzinger_benchmark.zig +3 -3
  29. package/src/tigerbeetle/src/io/linux.zig +1 -1
  30. package/src/tigerbeetle/src/lsm/README.md +214 -0
  31. package/src/tigerbeetle/src/lsm/binary_search.zig +137 -10
  32. package/src/tigerbeetle/src/lsm/bloom_filter.zig +43 -0
  33. package/src/tigerbeetle/src/lsm/compaction.zig +352 -398
  34. package/src/tigerbeetle/src/lsm/composite_key.zig +2 -0
  35. package/src/tigerbeetle/src/lsm/eytzinger.zig +1 -1
  36. package/src/tigerbeetle/src/lsm/forest.zig +21 -447
  37. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +412 -0
  38. package/src/tigerbeetle/src/lsm/grid.zig +145 -69
  39. package/src/tigerbeetle/src/lsm/groove.zig +196 -133
  40. package/src/tigerbeetle/src/lsm/k_way_merge.zig +40 -18
  41. package/src/tigerbeetle/src/lsm/level_iterator.zig +28 -9
  42. package/src/tigerbeetle/src/lsm/manifest.zig +81 -181
  43. package/src/tigerbeetle/src/lsm/manifest_level.zig +210 -454
  44. package/src/tigerbeetle/src/lsm/manifest_log.zig +77 -28
  45. package/src/tigerbeetle/src/lsm/posted_groove.zig +64 -76
  46. package/src/tigerbeetle/src/lsm/segmented_array.zig +561 -241
  47. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +148 -0
  48. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +9 -0
  49. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +62 -12
  50. package/src/tigerbeetle/src/lsm/table.zig +83 -48
  51. package/src/tigerbeetle/src/lsm/table_immutable.zig +30 -23
  52. package/src/tigerbeetle/src/lsm/table_iterator.zig +25 -14
  53. package/src/tigerbeetle/src/lsm/table_mutable.zig +63 -12
  54. package/src/tigerbeetle/src/lsm/test.zig +49 -55
  55. package/src/tigerbeetle/src/lsm/tree.zig +407 -402
  56. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +457 -0
  57. package/src/tigerbeetle/src/main.zig +28 -6
  58. package/src/tigerbeetle/src/message_bus.zig +2 -2
  59. package/src/tigerbeetle/src/message_pool.zig +14 -17
  60. package/src/tigerbeetle/src/simulator.zig +145 -112
  61. package/src/tigerbeetle/src/state_machine.zig +338 -228
  62. package/src/tigerbeetle/src/static_allocator.zig +65 -0
  63. package/src/tigerbeetle/src/storage.zig +3 -7
  64. package/src/tigerbeetle/src/test/accounting/auditor.zig +577 -0
  65. package/src/tigerbeetle/src/test/accounting/workload.zig +819 -0
  66. package/src/tigerbeetle/src/test/cluster.zig +18 -48
  67. package/src/tigerbeetle/src/test/conductor.zig +365 -0
  68. package/src/tigerbeetle/src/test/fuzz.zig +121 -0
  69. package/src/tigerbeetle/src/test/id.zig +89 -0
  70. package/src/tigerbeetle/src/test/priority_queue.zig +645 -0
  71. package/src/tigerbeetle/src/test/state_checker.zig +93 -69
  72. package/src/tigerbeetle/src/test/state_machine.zig +11 -35
  73. package/src/tigerbeetle/src/test/storage.zig +29 -8
  74. package/src/tigerbeetle/src/tigerbeetle.zig +14 -16
  75. package/src/tigerbeetle/src/unit_tests.zig +7 -0
  76. package/src/tigerbeetle/src/vopr.zig +494 -0
  77. package/src/tigerbeetle/src/vopr_hub/README.md +58 -0
  78. package/src/tigerbeetle/src/vopr_hub/SETUP.md +199 -0
  79. package/src/tigerbeetle/src/vopr_hub/go.mod +3 -0
  80. package/src/tigerbeetle/src/vopr_hub/main.go +1022 -0
  81. package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +3 -0
  82. package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +403 -0
  83. package/src/tigerbeetle/src/vsr/client.zig +13 -0
  84. package/src/tigerbeetle/src/vsr/journal.zig +16 -13
  85. package/src/tigerbeetle/src/vsr/replica.zig +924 -491
  86. package/src/tigerbeetle/src/vsr/superblock.zig +55 -37
  87. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -10
  88. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +2 -2
  89. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +18 -3
  90. package/src/tigerbeetle/src/vsr.zig +75 -55
  91. package/src/tigerbeetle/scripts/vopr.bat +0 -48
  92. package/src/tigerbeetle/scripts/vopr.sh +0 -33
@@ -0,0 +1,148 @@
1
+ const std = @import("std");
2
+ const assert = std.debug.assert;
3
+
4
+ const config = @import("../config.zig");
5
+ const NodePoolType = @import("node_pool.zig").NodePool;
6
+ const table_count_max_for_level = @import("tree.zig").table_count_max_for_level;
7
+ const table_count_max_for_tree = @import("tree.zig").table_count_max_for_tree;
8
+ const SortedSegmentedArray = @import("segmented_array.zig").SortedSegmentedArray;
9
+
10
+ const samples = 5_000_000;
11
+
12
+ const Options = struct {
13
+ Key: type,
14
+ value_size: u32,
15
+ value_count: u32,
16
+ node_size: u32,
17
+ };
18
+
19
+ // Benchmark 112B values to match `@sizeOf(TableInfo)`, which is either 112B or 80B depending on
20
+ // the Key type.
21
+ const configs = [_]Options{
22
+ Options{ .Key = u64, .value_size = 112, .value_count = 33, .node_size = 256 },
23
+ Options{ .Key = u64, .value_size = 112, .value_count = 34, .node_size = 256 },
24
+ Options{ .Key = u64, .value_size = 112, .value_count = 1024, .node_size = 256 },
25
+ Options{ .Key = u64, .value_size = 112, .value_count = 1024, .node_size = 512 },
26
+
27
+ Options{
28
+ .Key = u64,
29
+ .value_size = 112,
30
+ .value_count = table_count_max_for_level(config.lsm_growth_factor, 1),
31
+ .node_size = config.lsm_manifest_node_size,
32
+ },
33
+ Options{
34
+ .Key = u64,
35
+ .value_size = 112,
36
+ .value_count = table_count_max_for_level(config.lsm_growth_factor, 2),
37
+ .node_size = config.lsm_manifest_node_size,
38
+ },
39
+ Options{
40
+ .Key = u64,
41
+ .value_size = 112,
42
+ .value_count = table_count_max_for_level(config.lsm_growth_factor, 3),
43
+ .node_size = config.lsm_manifest_node_size,
44
+ },
45
+ Options{
46
+ .Key = u64,
47
+ .value_size = 112,
48
+ .value_count = table_count_max_for_level(config.lsm_growth_factor, 4),
49
+ .node_size = config.lsm_manifest_node_size,
50
+ },
51
+ Options{
52
+ .Key = u64,
53
+ .value_size = 112,
54
+ .value_count = table_count_max_for_level(config.lsm_growth_factor, 5),
55
+ .node_size = config.lsm_manifest_node_size,
56
+ },
57
+ Options{
58
+ .Key = u64,
59
+ .value_size = 112,
60
+ .value_count = table_count_max_for_level(config.lsm_growth_factor, 6),
61
+ .node_size = config.lsm_manifest_node_size,
62
+ },
63
+ };
64
+
65
+ pub fn main() !void {
66
+ const stdout = std.io.getStdOut().writer();
67
+ var prng = std.rand.DefaultPrng.init(42);
68
+
69
+ inline for (configs) |options| {
70
+ const Key = options.Key;
71
+ const Value = struct {
72
+ key: Key,
73
+ padding: [options.value_size - @sizeOf(Key)]u8,
74
+ };
75
+
76
+ const NodePool = NodePoolType(options.node_size, @alignOf(Value));
77
+ const SegmentedArray = SortedSegmentedArray(
78
+ Value,
79
+ NodePool,
80
+ // Must be max of both to avoid hitting SegmentedArray's assertion:
81
+ // assert(element_count_max > node_capacity);
82
+ comptime std.math.max(
83
+ options.value_count,
84
+ @divFloor(options.node_size, @sizeOf(Key)) + 1,
85
+ ),
86
+ Key,
87
+ struct {
88
+ inline fn key_from_value(value: *const Value) Key {
89
+ return value.key;
90
+ }
91
+ }.key_from_value,
92
+ struct {
93
+ inline fn compare_keys(a: Key, b: Key) std.math.Order {
94
+ return std.math.order(a, b);
95
+ }
96
+ }.compare_keys,
97
+ .{ .verify = false },
98
+ );
99
+
100
+ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
101
+ defer arena.deinit();
102
+ const allocator = arena.allocator();
103
+
104
+ var node_pool = try NodePool.init(allocator, SegmentedArray.node_count_max);
105
+ defer node_pool.deinit(allocator);
106
+
107
+ var array = try SegmentedArray.init(allocator);
108
+ defer array.deinit(allocator, &node_pool);
109
+
110
+ var i: usize = 0;
111
+ while (i < options.value_count) : (i += 1) {
112
+ _ = array.insert_element(&node_pool, .{
113
+ .key = prng.random().uintLessThanBiased(u64, options.value_count),
114
+ .padding = [_]u8{0} ** (options.value_size - @sizeOf(Key)),
115
+ });
116
+ }
117
+
118
+ const queries = try alloc_shuffled_index(allocator, options.value_count, prng.random());
119
+ defer allocator.free(queries);
120
+
121
+ const timer = try std.time.Timer.start();
122
+ const repetitions = std.math.max(1, @divFloor(samples, queries.len));
123
+ var j: usize = 0;
124
+ while (j < repetitions) : (j += 1) {
125
+ for (queries) |query| {
126
+ std.mem.doNotOptimizeAway(array.absolute_index_for_cursor(array.search(query)));
127
+ }
128
+ }
129
+ const time = timer.read() / repetitions / queries.len;
130
+
131
+ try stdout.print("KeyType={} ValueCount={:_>7} ValueSize={:_>2}B NodeSize={:_>6}B LookupTime={:_>6}ns\n", .{
132
+ options.Key,
133
+ options.value_count,
134
+ options.value_size,
135
+ options.node_size,
136
+ time,
137
+ });
138
+ }
139
+ }
140
+
141
+ // shuffle([0,1,…,n-1])
142
+ fn alloc_shuffled_index(allocator: std.mem.Allocator, n: usize, rand: std.rand.Random) ![]usize {
143
+ // Allocate on the heap; the array may be too large to fit on the stack.
144
+ var indices = try allocator.alloc(usize, n);
145
+ for (indices) |*i, j| i.* = j;
146
+ rand.shuffle(usize, indices[0..]);
147
+ return indices;
148
+ }
@@ -0,0 +1,9 @@
1
+ const std = @import("std");
2
+
3
+ const fuzz = @import("../test/fuzz.zig");
4
+ const segmented_array = @import("segmented_array.zig");
5
+
6
+ pub fn main() !void {
7
+ const fuzz_args = try fuzz.parse_fuzz_args(std.testing.allocator);
8
+ try segmented_array.run_tests(fuzz_args.seed, .{ .verify = true });
9
+ }
@@ -8,6 +8,7 @@ const meta = std.meta;
8
8
  const Vector = meta.Vector;
9
9
 
10
10
  const config = @import("../config.zig");
11
+ const div_ceil = @import("../util.zig").div_ceil;
11
12
  const verify = config.verify;
12
13
 
13
14
  pub const Layout = struct {
@@ -19,6 +20,7 @@ pub const Layout = struct {
19
20
  value_alignment: ?u29 = null,
20
21
  };
21
22
 
23
+ /// Each Key is associated with a set of n consecutive ways (or slots) that may contain the Value.
22
24
  pub fn SetAssociativeCache(
23
25
  comptime Key: type,
24
26
  comptime Value: type,
@@ -86,9 +88,37 @@ pub fn SetAssociativeCache(
86
88
  const Clock = meta.Int(.unsigned, clock_hand_bits);
87
89
 
88
90
  sets: u64,
91
+
92
+ /// A short, partial hash of a Key, corresponding to a Value.
93
+ /// Because the tag is small, collisions are possible:
94
+ /// `tag(v₁) = tag(v₂)` does not imply `v₁ = v₂`.
95
+ /// However, most of the time, where the tag differs, a full key comparison can be avoided.
96
+ /// Since tags are 16-32x smaller than keys, they can also be kept hot in cache.
89
97
  tags: []Tag,
98
+
99
+ /// When the corresponding Count is zero, the Value is absent.
90
100
  values: []align(value_alignment) Value,
101
+
102
+ /// Each value has a Count, which tracks the number of recent reads.
103
+ ///
104
+ /// * A Count is incremented when the value is accessed by `get`.
105
+ /// * A Count is decremented when a cache write to the value's Set misses.
106
+ /// * The value is evicted when its Count reaches zero.
107
+ ///
91
108
  counts: PackedUnsignedIntegerArray(Count),
109
+
110
+ /// Each set has a Clock: a counter that cycles between each of the set's ways (i.e. slots).
111
+ ///
112
+ /// On cache write, entries are checked for occupancy (or eviction) beginning from the
113
+ /// clock's position, wrapping around.
114
+ ///
115
+ /// The algorithm implemented is "CLOCK Nth-Chance" — each way has more than one bit,
116
+ /// to give ways more than one chance before eviction.
117
+ ///
118
+ /// * A similar algorithm called "RRIParoo" is described in
119
+ /// "Kangaroo: Caching Billions of Tiny Objects on Flash".
120
+ /// * For more general information on CLOCK algorithms, see:
121
+ /// https://en.wikipedia.org/wiki/Page_replacement_algorithm.
92
122
  clocks: PackedUnsignedIntegerArray(Clock),
93
123
 
94
124
  pub fn init(allocator: mem.Allocator, value_count_max: u64) !Self {
@@ -158,6 +188,13 @@ pub fn SetAssociativeCache(
158
188
  mem.set(u64, self.clocks.words, 0);
159
189
  }
160
190
 
191
+ /// Returns whether an entry with the given key is cached,
192
+ /// without modifying the entry's counter.
193
+ pub fn exists(self: *Self, key: Key) bool {
194
+ const set = self.associate(key);
195
+ return self.search(set, key) != null;
196
+ }
197
+
161
198
  pub fn get(self: *Self, key: Key) ?*align(value_alignment) Value {
162
199
  const set = self.associate(key);
163
200
  const way = self.search(set, key) orelse return null;
@@ -176,8 +213,17 @@ pub fn SetAssociativeCache(
176
213
  self.counts.set(set.offset + way, 0);
177
214
  }
178
215
 
216
+ /// Hint that the key is less likely to be accessed in the future, without actually removing
217
+ /// it from the cache.
218
+ pub fn demote(self: *Self, key: Key) void {
219
+ const set = self.associate(key);
220
+ const way = self.search(set, key) orelse return;
221
+
222
+ self.counts.set(set.offset + way, 1);
223
+ }
224
+
179
225
  /// If the key is present in the set, returns the way. Otherwise returns null.
180
- inline fn search(self: *Self, set: Set, key: Key) ?usize {
226
+ inline fn search(self: *const Self, set: Set, key: Key) ?usize {
181
227
  const ways = search_tags(set.tags, set.tag);
182
228
 
183
229
  var it = BitIterator(Ways){ .bits = ways };
@@ -194,7 +240,7 @@ pub fn SetAssociativeCache(
194
240
  /// Where each set bit represents the index of a way that has the same tag.
195
241
  const Ways = meta.Int(.unsigned, layout.ways);
196
242
 
197
- inline fn search_tags(tags: *[layout.ways]Tag, tag: Tag) Ways {
243
+ inline fn search_tags(tags: *const [layout.ways]Tag, tag: Tag) Ways {
198
244
  const x: Vector(layout.ways, Tag) = tags.*;
199
245
  const y: Vector(layout.ways, Tag) = @splat(layout.ways, tag);
200
246
 
@@ -202,8 +248,8 @@ pub fn SetAssociativeCache(
202
248
  return @ptrCast(*const Ways, &result).*;
203
249
  }
204
250
 
205
- pub fn put_no_clobber(self: *Self, key: Key) *align(value_alignment) Value {
206
- return self.put_no_clobber_preserve_locked(
251
+ pub fn insert(self: *Self, key: Key) *align(value_alignment) Value {
252
+ return self.insert_preserve_locked(
207
253
  void,
208
254
  struct {
209
255
  inline fn locked(_: void, _: *const Value) bool {
@@ -219,7 +265,7 @@ pub fn SetAssociativeCache(
219
265
  /// The key must not already be in the cache.
220
266
  /// Never evicts keys for which locked() returns true.
221
267
  /// The caller must guarantee that locked() returns true for less than layout.ways keys.
222
- pub fn put_no_clobber_preserve_locked(
268
+ pub fn insert_preserve_locked(
223
269
  self: *Self,
224
270
  comptime Context: type,
225
271
  comptime locked: fn (
@@ -230,8 +276,12 @@ pub fn SetAssociativeCache(
230
276
  key: Key,
231
277
  ) *align(value_alignment) Value {
232
278
  const set = self.associate(key);
233
-
234
- if (verify) assert(self.search(set, key) == null);
279
+ if (self.search(set, key)) |way| {
280
+ // Remove the old entry for this key.
281
+ // It should be a different value, but since we are returning a value pointer we
282
+ // can't check against the new one.
283
+ self.counts.set(set.offset + way, 0);
284
+ }
235
285
 
236
286
  const clock_index = @divExact(set.offset, layout.ways);
237
287
 
@@ -380,7 +430,7 @@ fn set_associative_cache_test(
380
430
  try expectEqual(i, sac.clocks.get(0));
381
431
 
382
432
  const key = i * sac.sets;
383
- sac.put_no_clobber(key).* = key;
433
+ sac.insert(key).* = key;
384
434
  try expect(sac.counts.get(i) == 1);
385
435
  try expectEqual(key, sac.get(key).?.*);
386
436
  try expect(sac.counts.get(i) == 2);
@@ -393,7 +443,7 @@ fn set_associative_cache_test(
393
443
  // Insert another element into the first set, causing key 0 to be evicted.
394
444
  {
395
445
  const key = layout.ways * sac.sets;
396
- sac.put_no_clobber(key).* = key;
446
+ sac.insert(key).* = key;
397
447
  try expect(sac.counts.get(0) == 1);
398
448
  try expectEqual(key, sac.get(key).?.*);
399
449
  try expect(sac.counts.get(0) == 2);
@@ -422,7 +472,7 @@ fn set_associative_cache_test(
422
472
  const key = (layout.ways + 1) * sac.sets;
423
473
  const expect_evicted = layout.ways * sac.sets;
424
474
 
425
- sac.put_no_clobber_preserve_locked(
475
+ sac.insert_preserve_locked(
426
476
  u64,
427
477
  struct {
428
478
  inline fn locked(only_unlocked: u64, value: *const Value) bool {
@@ -462,7 +512,7 @@ fn set_associative_cache_test(
462
512
  try expectEqual(i, sac.clocks.get(0));
463
513
 
464
514
  const key = i * sac.sets;
465
- sac.put_no_clobber(key).* = key;
515
+ sac.insert(key).* = key;
466
516
  try expect(sac.counts.get(i) == 1);
467
517
  var j: usize = 2;
468
518
  while (j <= math.maxInt(SAC.Count)) : (j += 1) {
@@ -480,7 +530,7 @@ fn set_associative_cache_test(
480
530
  // Insert another element into the first set, causing key 0 to be evicted.
481
531
  {
482
532
  const key = layout.ways * sac.sets;
483
- sac.put_no_clobber(key).* = key;
533
+ sac.insert(key).* = key;
484
534
  try expect(sac.counts.get(0) == 1);
485
535
  try expectEqual(key, sac.get(key).?.*);
486
536
  try expect(sac.counts.get(0) == 2);
@@ -12,8 +12,48 @@ const div_ceil = @import("../util.zig").div_ceil;
12
12
  const eytzinger = @import("eytzinger.zig").eytzinger;
13
13
  const snapshot_latest = @import("tree.zig").snapshot_latest;
14
14
 
15
+ const BlockType = @import("grid.zig").BlockType;
15
16
  const TableInfoType = @import("manifest.zig").TableInfoType;
16
17
 
18
+ /// A table is a set of blocks:
19
+ ///
20
+ /// * Index block (exactly 1)
21
+ /// * Filter blocks (at least one, at most `filter_block_count_max`)
22
+ /// Each filter block summarizes the keys for several adjacent (in terms of key) data blocks.
23
+ /// * Data blocks (at least one, at most `data_block_count_max`)
24
+ /// Store the actual keys/values, along with a small index of the keys to optimize lookups.
25
+ ///
26
+ ///
27
+ /// Every block begins with a `vsr.Header` that includes:
28
+ ///
29
+ /// * `checksum`, `checksum_body` verify the data integrity.
30
+ /// * `cluster` is the cluster id.
31
+ /// * `command` is `.block`.
32
+ /// * `op` is the block address.
33
+ /// * `size` is the block size excluding padding.
34
+ ///
35
+ /// Index block schema:
36
+ /// │ vsr.Header │ operation=BlockType.index
37
+ /// │ vsr.Header │ commit=filter_block_count,
38
+ /// │ │ request=data_block_count,
39
+ /// │ │ timestamp=snapshot_min
40
+ /// │ [filter_block_count_max]u128 │ checksums of filter blocks
41
+ /// │ [data_block_count_max]u128 │ checksums of data blocks
42
+ /// │ [data_block_count_max]Key │ the maximum/last key in the respective data block
43
+ /// │ [filter_block_count_max]u64 │ addresses of filter blocks
44
+ /// │ [data_block_count_max]u64 │ addresses of data blocks
45
+ /// │ […]u8{0} │ padding (to end of block)
46
+ ///
47
+ /// Filter block schema:
48
+ /// │ vsr.Header │ operation=BlockType.filter
49
+ /// │ […]u8 │ A split-block Bloom filter, "containing" every key from as many as
50
+ /// │ │ `filter_data_block_count_max` data blocks.
51
+ ///
52
+ /// Data block schema:
53
+ /// │ vsr.Header │ operation=BlockType.data
54
+ /// │ [block_key_count + 1]Key │ Eytzinger-layout keys from a subset of the values.
55
+ /// │ [≤value_count_max]Value │ At least one value (no empty tables).
56
+ /// │ […]u8{0} │ padding (to end of block)
17
57
  pub fn TableType(
18
58
  comptime TableKey: type,
19
59
  comptime TableValue: type,
@@ -84,7 +124,7 @@ pub fn TableType(
84
124
  const table_block_count_max = @divExact(table_size_max, block_size);
85
125
  const block_body_size = block_size - @sizeOf(vsr.Header);
86
126
 
87
- pub const layout = blk: {
127
+ pub const layout = layout: {
88
128
  assert(block_size % config.sector_size == 0);
89
129
  assert(math.isPowerOfTwo(table_size_max));
90
130
  assert(math.isPowerOfTwo(block_size));
@@ -158,15 +198,18 @@ pub fn TableType(
158
198
  block_value_count_max * filter_bytes_per_key,
159
199
  );
160
200
 
201
+ // Compute the number of data and filter blocks by solving the constraints:
202
+ // * the filter and data blocks' metadata must fix in the index block
203
+ // * the filter blocks must index all data blocks
204
+ // * minimize the number of filter blocks
205
+ // * maximize the number of data blocks
161
206
  var data_blocks = table_block_count_max - index_block_count;
162
- var data_index_size = 0;
163
207
  var filter_blocks = 0;
164
- var filter_index_size = 0;
165
208
  while (true) : (data_blocks -= 1) {
166
- data_index_size = data_index_entry_size * data_blocks;
167
-
168
209
  filter_blocks = div_ceil(data_blocks, filter_data_block_count_max);
169
- filter_index_size = filter_index_entry_size * filter_blocks;
210
+
211
+ const data_index_size = data_index_entry_size * data_blocks;
212
+ const filter_index_size = filter_index_entry_size * filter_blocks;
170
213
 
171
214
  const index_size = @sizeOf(vsr.Header) + data_index_size + filter_index_size;
172
215
  const table_block_count = index_block_count + filter_blocks + data_blocks;
@@ -178,14 +221,18 @@ pub fn TableType(
178
221
  const table_block_count = index_block_count + filter_blocks + data_blocks;
179
222
  assert(table_block_count <= table_block_count_max);
180
223
 
181
- break :blk .{
224
+ break :layout .{
225
+ // The number of keys in the Eytzinger layout per data block.
182
226
  .block_key_count = block_key_count,
227
+ // The number of bytes used by the keys in the data block.
183
228
  .block_key_layout_size = block_key_layout_size,
229
+ // The maximum number of values in a data block.
184
230
  .block_value_count_max = block_value_count_max,
185
231
 
186
232
  .data_block_count_max = data_blocks,
187
233
  .filter_block_count_max = filter_blocks,
188
234
 
235
+ // The number of data blocks covered by a single filter block.
189
236
  .filter_data_block_count_max = filter_data_block_count_max,
190
237
  };
191
238
  };
@@ -397,21 +444,11 @@ pub fn TableType(
397
444
  }
398
445
  }
399
446
 
400
- fn blocks_used(table: *Table) u32 {
401
- assert(!table.free);
402
- return Table.index_blocks_used(&table.blocks[0]);
403
- }
404
-
405
- fn filter_blocks_used(table: *Table) u32 {
406
- assert(!table.free);
407
- return Table.index_filter_blocks_used(&table.blocks[0]);
408
- }
409
-
410
447
  pub const Builder = struct {
411
448
  const TableInfo = TableInfoType(Table);
412
449
 
413
- key_min: Key = undefined,
414
- key_max: Key = undefined,
450
+ key_min: Key = undefined, // Inclusive.
451
+ key_max: Key = undefined, // Inclusive.
415
452
 
416
453
  index_block: BlockPtr,
417
454
  filter_block: BlockPtr,
@@ -496,6 +533,7 @@ pub fn TableType(
496
533
  // For each block we write the sorted values, initialize the Eytzinger layout,
497
534
  // complete the block header, and add the block's max key to the table index.
498
535
 
536
+ assert(options.address > 0);
499
537
  assert(builder.value > 0);
500
538
 
501
539
  const block = builder.data_block;
@@ -503,6 +541,7 @@ pub fn TableType(
503
541
  assert(values_max.len == data.value_count_max);
504
542
 
505
543
  const values = values_max[0..builder.value];
544
+ const key_max = key_from_value(&values[values.len - 1]);
506
545
 
507
546
  if (config.verify) {
508
547
  var a = &values[0];
@@ -533,6 +572,7 @@ pub fn TableType(
533
572
  const block_padding = block[data.padding_offset..][0..data.padding_size];
534
573
  mem.set(u8, values_padding, 0);
535
574
  mem.set(u8, block_padding, 0);
575
+ assert(compare_keys(key_from_value(&values[values.len - 1]), key_max) == .eq);
536
576
 
537
577
  const header_bytes = block[0..@sizeOf(vsr.Header)];
538
578
  const header = mem.bytesAsValue(vsr.Header, header_bytes);
@@ -541,15 +581,14 @@ pub fn TableType(
541
581
  .cluster = options.cluster,
542
582
  .op = options.address,
543
583
  .request = @intCast(u32, values.len),
544
- .size = block_size - @intCast(u32, values_padding.len - block_padding.len),
584
+ .size = block_size - @intCast(u32, values_padding.len + block_padding.len),
545
585
  .command = .block,
586
+ .operation = BlockType.data.operation(),
546
587
  };
547
588
 
548
589
  header.set_checksum_body(block[@sizeOf(vsr.Header)..header.size]);
549
590
  header.set_checksum();
550
591
 
551
- const key_max = key_from_value(&values[values.len - 1]);
552
-
553
592
  const current = builder.data_block_count;
554
593
  index_data_keys(builder.index_block)[current] = key_max;
555
594
  index_data_addresses(builder.index_block)[current] = options.address;
@@ -559,7 +598,7 @@ pub fn TableType(
559
598
  builder.key_max = key_max;
560
599
 
561
600
  if (current == 0 and values.len == 1) {
562
- assert(compare_keys(builder.key_min, builder.key_max) != .gt);
601
+ assert(compare_keys(builder.key_min, builder.key_max) == .eq);
563
602
  } else {
564
603
  assert(compare_keys(builder.key_min, builder.key_max) == .lt);
565
604
  }
@@ -587,6 +626,7 @@ pub fn TableType(
587
626
 
588
627
  pub fn filter_block_finish(builder: *Builder, options: FilterFinishOptions) void {
589
628
  assert(!builder.filter_block_empty());
629
+ assert(options.address > 0);
590
630
 
591
631
  const header_bytes = builder.filter_block[0..@sizeOf(vsr.Header)];
592
632
  const header = mem.bytesAsValue(vsr.Header, header_bytes);
@@ -595,6 +635,7 @@ pub fn TableType(
595
635
  .op = options.address,
596
636
  .size = block_size - filter.padding_size,
597
637
  .command = .block,
638
+ .operation = BlockType.filter.operation(),
598
639
  };
599
640
 
600
641
  const body = builder.filter_block[@sizeOf(vsr.Header)..header.size];
@@ -626,6 +667,7 @@ pub fn TableType(
626
667
  };
627
668
 
628
669
  pub fn index_block_finish(builder: *Builder, options: IndexFinishOptions) TableInfo {
670
+ assert(options.address > 0);
629
671
  assert(builder.data_block_count > 0);
630
672
  assert(builder.value == 0);
631
673
  assert(builder.data_blocks_in_filter == 0);
@@ -658,6 +700,7 @@ pub fn TableType(
658
700
  .timestamp = options.snapshot_min,
659
701
  .size = index.size,
660
702
  .command = .block,
703
+ .operation = BlockType.index.operation(),
661
704
  };
662
705
  header.set_checksum_body(index_block[@sizeOf(vsr.Header)..header.size]);
663
706
  header.set_checksum();
@@ -764,14 +807,6 @@ pub fn TableType(
764
807
  return slice[0..index_filter_blocks_used(index_block)];
765
808
  }
766
809
 
767
- inline fn index_snapshot_min(index_block: BlockPtrConst) u32 {
768
- const header = mem.bytesAsValue(vsr.Header, index_block[0..@sizeOf(vsr.Header)]);
769
- const value = @intCast(u32, header.offset);
770
- assert(value > 0);
771
- assert(value < snapshot_latest);
772
- return value;
773
- }
774
-
775
810
  inline fn index_blocks_used(index_block: BlockPtrConst) u32 {
776
811
  return index_block_count + index_filter_blocks_used(index_block) +
777
812
  index_data_blocks_used(index_block);
@@ -794,7 +829,7 @@ pub fn TableType(
794
829
  }
795
830
 
796
831
  /// Returns the zero-based index of the data block that may contain the key.
797
- /// May be called on an index block only when the key is already in range of the table.
832
+ /// May be called on an index block only when the key is in range of the table.
798
833
  inline fn index_data_block_for_key(index_block: BlockPtrConst, key: Key) u32 {
799
834
  // Because we store key_max in the index block we can use the raw binary search
800
835
  // here and avoid the extra comparison. If the search finds an exact match, we
@@ -806,6 +841,7 @@ pub fn TableType(
806
841
  compare_keys,
807
842
  Table.index_data_keys_used(index_block),
808
843
  key,
844
+ .{},
809
845
  );
810
846
  assert(data_block_index < index_data_blocks_used(index_block));
811
847
  return data_block_index;
@@ -819,7 +855,7 @@ pub fn TableType(
819
855
  };
820
856
 
821
857
  /// Returns all data stored in the index block relating to a given key.
822
- /// May be called on an index block only when the key is already in range of the table.
858
+ /// May be called on an index block only when the key is in range of the table.
823
859
  pub inline fn index_blocks_for_key(index_block: BlockPtrConst, key: Key) IndexBlocks {
824
860
  const d = Table.index_data_block_for_key(index_block, key);
825
861
  const f = @divFloor(d, filter.data_block_count_max);
@@ -885,22 +921,21 @@ pub fn TableType(
885
921
  key,
886
922
  );
887
923
 
888
- if (values.len > 0) {
889
- const result = binary_search.binary_search_values(
890
- Key,
891
- Value,
892
- key_from_value,
893
- compare_keys,
894
- values,
895
- key,
896
- );
897
- if (result.exact) {
898
- const value = &values[result.index];
899
- if (config.verify) {
900
- assert(compare_keys(key, key_from_value(value)) == .eq);
901
- }
902
- return value;
924
+ const result = binary_search.binary_search_values(
925
+ Key,
926
+ Value,
927
+ key_from_value,
928
+ compare_keys,
929
+ values,
930
+ key,
931
+ .{},
932
+ );
933
+ if (result.exact) {
934
+ const value = &values[result.index];
935
+ if (config.verify) {
936
+ assert(compare_keys(key, key_from_value(value)) == .eq);
903
937
  }
938
+ return value;
904
939
  }
905
940
 
906
941
  if (config.verify) {