tigerbeetle-node 0.11.12 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/README.md +212 -196
  2. package/dist/bin/aarch64-linux-gnu/client.node +0 -0
  3. package/dist/bin/aarch64-linux-musl/client.node +0 -0
  4. package/dist/bin/aarch64-macos/client.node +0 -0
  5. package/dist/bin/x86_64-linux-gnu/client.node +0 -0
  6. package/dist/bin/x86_64-linux-musl/client.node +0 -0
  7. package/dist/bin/x86_64-macos/client.node +0 -0
  8. package/dist/index.js +33 -1
  9. package/dist/index.js.map +1 -1
  10. package/package-lock.json +66 -0
  11. package/package.json +8 -17
  12. package/src/index.ts +56 -1
  13. package/src/node.zig +10 -9
  14. package/dist/.client.node.sha256 +0 -1
  15. package/scripts/build_lib.sh +0 -61
  16. package/scripts/download_node_headers.sh +0 -32
  17. package/src/tigerbeetle/scripts/benchmark.bat +0 -48
  18. package/src/tigerbeetle/scripts/benchmark.sh +0 -66
  19. package/src/tigerbeetle/scripts/confirm_image.sh +0 -44
  20. package/src/tigerbeetle/scripts/fuzz_loop.sh +0 -15
  21. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +0 -7
  22. package/src/tigerbeetle/scripts/install.bat +0 -7
  23. package/src/tigerbeetle/scripts/install.sh +0 -21
  24. package/src/tigerbeetle/scripts/install_zig.bat +0 -113
  25. package/src/tigerbeetle/scripts/install_zig.sh +0 -90
  26. package/src/tigerbeetle/scripts/lint.zig +0 -199
  27. package/src/tigerbeetle/scripts/pre-commit.sh +0 -9
  28. package/src/tigerbeetle/scripts/scripts/benchmark.bat +0 -48
  29. package/src/tigerbeetle/scripts/scripts/benchmark.sh +0 -66
  30. package/src/tigerbeetle/scripts/scripts/confirm_image.sh +0 -44
  31. package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +0 -15
  32. package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +0 -7
  33. package/src/tigerbeetle/scripts/scripts/install.bat +0 -7
  34. package/src/tigerbeetle/scripts/scripts/install.sh +0 -21
  35. package/src/tigerbeetle/scripts/scripts/install_zig.bat +0 -113
  36. package/src/tigerbeetle/scripts/scripts/install_zig.sh +0 -90
  37. package/src/tigerbeetle/scripts/scripts/lint.zig +0 -199
  38. package/src/tigerbeetle/scripts/scripts/pre-commit.sh +0 -9
  39. package/src/tigerbeetle/scripts/scripts/shellcheck.sh +0 -5
  40. package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +0 -10
  41. package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +0 -14
  42. package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +0 -48
  43. package/src/tigerbeetle/scripts/scripts/validate_docs.sh +0 -23
  44. package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +0 -46
  45. package/src/tigerbeetle/scripts/shellcheck.sh +0 -5
  46. package/src/tigerbeetle/scripts/tests_on_alpine.sh +0 -10
  47. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +0 -14
  48. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -48
  49. package/src/tigerbeetle/scripts/validate_docs.sh +0 -23
  50. package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
  51. package/src/tigerbeetle/src/benchmark.zig +0 -314
  52. package/src/tigerbeetle/src/config.zig +0 -234
  53. package/src/tigerbeetle/src/constants.zig +0 -436
  54. package/src/tigerbeetle/src/ewah.zig +0 -286
  55. package/src/tigerbeetle/src/ewah_benchmark.zig +0 -120
  56. package/src/tigerbeetle/src/ewah_fuzz.zig +0 -130
  57. package/src/tigerbeetle/src/fifo.zig +0 -120
  58. package/src/tigerbeetle/src/io/benchmark.zig +0 -213
  59. package/src/tigerbeetle/src/io/darwin.zig +0 -814
  60. package/src/tigerbeetle/src/io/linux.zig +0 -1062
  61. package/src/tigerbeetle/src/io/test.zig +0 -643
  62. package/src/tigerbeetle/src/io/windows.zig +0 -1183
  63. package/src/tigerbeetle/src/io.zig +0 -34
  64. package/src/tigerbeetle/src/iops.zig +0 -107
  65. package/src/tigerbeetle/src/lsm/README.md +0 -308
  66. package/src/tigerbeetle/src/lsm/binary_search.zig +0 -341
  67. package/src/tigerbeetle/src/lsm/bloom_filter.zig +0 -125
  68. package/src/tigerbeetle/src/lsm/compaction.zig +0 -603
  69. package/src/tigerbeetle/src/lsm/composite_key.zig +0 -77
  70. package/src/tigerbeetle/src/lsm/direction.zig +0 -11
  71. package/src/tigerbeetle/src/lsm/eytzinger.zig +0 -587
  72. package/src/tigerbeetle/src/lsm/eytzinger_benchmark.zig +0 -330
  73. package/src/tigerbeetle/src/lsm/forest.zig +0 -204
  74. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -401
  75. package/src/tigerbeetle/src/lsm/grid.zig +0 -573
  76. package/src/tigerbeetle/src/lsm/groove.zig +0 -972
  77. package/src/tigerbeetle/src/lsm/k_way_merge.zig +0 -474
  78. package/src/tigerbeetle/src/lsm/level_iterator.zig +0 -332
  79. package/src/tigerbeetle/src/lsm/manifest.zig +0 -617
  80. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -877
  81. package/src/tigerbeetle/src/lsm/manifest_log.zig +0 -789
  82. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -691
  83. package/src/tigerbeetle/src/lsm/merge_iterator.zig +0 -106
  84. package/src/tigerbeetle/src/lsm/node_pool.zig +0 -235
  85. package/src/tigerbeetle/src/lsm/posted_groove.zig +0 -378
  86. package/src/tigerbeetle/src/lsm/segmented_array.zig +0 -1328
  87. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -148
  88. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -9
  89. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +0 -850
  90. package/src/tigerbeetle/src/lsm/table.zig +0 -1031
  91. package/src/tigerbeetle/src/lsm/table_immutable.zig +0 -203
  92. package/src/tigerbeetle/src/lsm/table_iterator.zig +0 -340
  93. package/src/tigerbeetle/src/lsm/table_mutable.zig +0 -220
  94. package/src/tigerbeetle/src/lsm/test.zig +0 -438
  95. package/src/tigerbeetle/src/lsm/tree.zig +0 -1193
  96. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -474
  97. package/src/tigerbeetle/src/message_bus.zig +0 -1012
  98. package/src/tigerbeetle/src/message_pool.zig +0 -156
  99. package/src/tigerbeetle/src/ring_buffer.zig +0 -399
  100. package/src/tigerbeetle/src/simulator.zig +0 -569
  101. package/src/tigerbeetle/src/state_machine/auditor.zig +0 -577
  102. package/src/tigerbeetle/src/state_machine/workload.zig +0 -883
  103. package/src/tigerbeetle/src/state_machine.zig +0 -1881
  104. package/src/tigerbeetle/src/static_allocator.zig +0 -65
  105. package/src/tigerbeetle/src/stdx.zig +0 -162
  106. package/src/tigerbeetle/src/storage.zig +0 -393
  107. package/src/tigerbeetle/src/testing/cluster/message_bus.zig +0 -82
  108. package/src/tigerbeetle/src/testing/cluster/network.zig +0 -237
  109. package/src/tigerbeetle/src/testing/cluster/state_checker.zig +0 -169
  110. package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -202
  111. package/src/tigerbeetle/src/testing/cluster.zig +0 -443
  112. package/src/tigerbeetle/src/testing/fuzz.zig +0 -140
  113. package/src/tigerbeetle/src/testing/hash_log.zig +0 -66
  114. package/src/tigerbeetle/src/testing/id.zig +0 -99
  115. package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -364
  116. package/src/tigerbeetle/src/testing/priority_queue.zig +0 -645
  117. package/src/tigerbeetle/src/testing/reply_sequence.zig +0 -139
  118. package/src/tigerbeetle/src/testing/state_machine.zig +0 -249
  119. package/src/tigerbeetle/src/testing/storage.zig +0 -757
  120. package/src/tigerbeetle/src/testing/table.zig +0 -247
  121. package/src/tigerbeetle/src/testing/time.zig +0 -84
  122. package/src/tigerbeetle/src/tigerbeetle.zig +0 -227
  123. package/src/tigerbeetle/src/time.zig +0 -112
  124. package/src/tigerbeetle/src/tracer.zig +0 -529
  125. package/src/tigerbeetle/src/unit_tests.zig +0 -42
  126. package/src/tigerbeetle/src/vopr.zig +0 -495
  127. package/src/tigerbeetle/src/vsr/README.md +0 -209
  128. package/src/tigerbeetle/src/vsr/client.zig +0 -544
  129. package/src/tigerbeetle/src/vsr/clock.zig +0 -853
  130. package/src/tigerbeetle/src/vsr/journal.zig +0 -2413
  131. package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +0 -111
  132. package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
  133. package/src/tigerbeetle/src/vsr/replica.zig +0 -6381
  134. package/src/tigerbeetle/src/vsr/replica_format.zig +0 -219
  135. package/src/tigerbeetle/src/vsr/superblock.zig +0 -1631
  136. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +0 -256
  137. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +0 -929
  138. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +0 -334
  139. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -390
  140. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +0 -615
  141. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +0 -394
  142. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -314
  143. package/src/tigerbeetle/src/vsr.zig +0 -1352
@@ -1,789 +0,0 @@
1
- //! Maintains an on-disk manifest log of the latest TableInfo's in an LSM tree's in-memory manifest.
2
- //!
3
- //! Invariants:
4
- //!
5
- //! * Checkpointing the manifest log must flush all buffered log blocks.
6
- //!
7
- //! * Opening the manifest log must emit only the latest TableInfo's to be inserted.
8
- //!
9
- //! * Opening the manifest log after a crash must result in exactly the same `compaction_set` in
10
- //! `SuperBlock.Manifest` as before the crash assuming that the crash was exactly at a checkpoint.
11
- //!
12
- //! * The latest version of a table must never be dropped from the log through a compaction, unless
13
- //! the table was removed.
14
- //!
15
- //! * Removes that are recorded in a log block must also queue that log block for compaction.
16
- //!
17
- //! * Compaction must compact partially full blocks, even where it must rewrite all entries to the
18
- //! tail end of the log.
19
- //!
20
- //! * If a remove is dropped from the log, then all prior inserts must already have been dropped.
21
-
22
- const std = @import("std");
23
- const assert = std.debug.assert;
24
- const math = std.math;
25
- const mem = std.mem;
26
-
27
- const log = std.log.scoped(.manifest_log);
28
-
29
- const constants = @import("../constants.zig");
30
- const vsr = @import("../vsr.zig");
31
- const stdx = @import("../stdx.zig");
32
-
33
- const SuperBlockType = vsr.SuperBlockType;
34
- const GridType = @import("grid.zig").GridType;
35
- const BlockType = @import("grid.zig").BlockType;
36
- const alloc_block = @import("grid.zig").alloc_block;
37
- const tree = @import("tree.zig");
38
- const RingBuffer = @import("../ring_buffer.zig").RingBuffer;
39
-
40
- /// ManifestLog block schema:
41
- /// │ vsr.Header │ operation=BlockType.manifest
42
- /// │ [entry_count_max]Label │ level index, insert|remove
43
- /// │ [≤entry_count_max]TableInfo │
44
- /// │ […]u8{0} │ padding (to end of block)
45
- /// Label and TableInfo entries correspond.
46
- pub fn ManifestLogType(comptime Storage: type, comptime TableInfo: type) type {
47
- return struct {
48
- const ManifestLog = @This();
49
-
50
- const SuperBlock = SuperBlockType(Storage);
51
- const Grid = GridType(Storage);
52
-
53
- pub const Block = ManifestLogBlockType(Storage, TableInfo);
54
- const BlockPtr = Grid.BlockPtr;
55
- const BlockPtrConst = Grid.BlockPtrConst;
56
- const Label = Block.Label;
57
-
58
- pub const Callback = fn (manifest_log: *ManifestLog) void;
59
-
60
- pub const OpenEvent = fn (
61
- manifest_log: *ManifestLog,
62
- level: u7,
63
- table: *const TableInfo,
64
- ) void;
65
-
66
- const alignment = 16;
67
-
68
- comptime {
69
- // Bit 7 is reserved to indicate whether the event is an insert or remove.
70
- assert(constants.lsm_levels <= math.maxInt(u7) + 1);
71
-
72
- assert(@sizeOf(Label) == @sizeOf(u8));
73
-
74
- // All TableInfo's should already be 16-byte aligned because of the leading checksum.
75
- assert(@alignOf(TableInfo) == alignment);
76
-
77
- // For keys { 8, 16, 24, 32 } all TableInfo's should be a multiple of the alignment.
78
- // However, we still store Label ahead of TableInfo to save space on the network.
79
- // This means we store fewer entries per manifest block, to gain less padding,
80
- // since we must store entry_count_max of whichever array is first in the layout.
81
- // For a better understanding of this decision, see Block.size() below.
82
- assert(@sizeOf(TableInfo) % alignment == 0);
83
- }
84
-
85
- /// The maximum number of table updates to the manifest by a half-measure of table
86
- /// compaction (not including manifest log compaction).
87
- ///
88
- /// Input tables are updated in the manifest (snapshot_max is reduced).
89
- /// Input tables are removed from the manifest (if not held by a persistent snapshot).
90
- /// Output tables are inserted into the manifest.
91
- // TODO If insert-then-remove can update in-memory, then we can only count input tables once.
92
- pub const compaction_appends_max = tree.compactions_max *
93
- (tree.compaction_tables_input_max + // Update snapshot_max.
94
- tree.compaction_tables_input_max + // Remove.
95
- tree.compaction_tables_output_max);
96
-
97
- const blocks_count_appends = stdx.div_ceil(compaction_appends_max, Block.entry_count_max);
98
-
99
- /// The upper-bound of manifest log blocks we must buffer.
100
- ///
101
- /// `blocks` must have sufficient capacity for:
102
- /// - a manifest log compaction (+1 block in the worst case)
103
- /// - a leftover open block from the previous ops (+1 block)
104
- /// - table updates from a half bar of compactions
105
- /// (This is typically +1 block, but may be more when the block size is small).
106
- /// TODO(Beat compaction): blocks_count_appends only needs enough for 1 beat.
107
- const blocks_count_max = 1 + 1 + blocks_count_appends;
108
-
109
- comptime {
110
- assert(blocks_count_max >= 3);
111
- assert(blocks_count_max == 3 or constants.block_size < 64 * 1024);
112
- }
113
-
114
- superblock: *SuperBlock,
115
- grid: *Grid,
116
- grid_reservation: ?Grid.Reservation = null,
117
- tree_hash: u128,
118
-
119
- /// The head block is used to accumulate a full block, to be written at the next flush.
120
- /// The remaining blocks must accommodate all further appends.
121
- blocks: RingBuffer(BlockPtr, blocks_count_max, .array),
122
-
123
- /// The number of blocks that have been appended to, filled up, and then closed.
124
- blocks_closed: u8 = 0,
125
-
126
- /// The number of entries in the open block.
127
- ///
128
- /// Invariants:
129
- /// - When `entry_count = 0`, there is no open block.
130
- /// - `entry_count < entry_count_max`. When `entry_count` reaches the maximum, the open
131
- /// block is closed, and `entry_count` resets to 0.
132
- entry_count: u32 = 0,
133
-
134
- opened: bool = false,
135
- open_event: OpenEvent = undefined,
136
- open_iterator: SuperBlock.Manifest.IteratorReverse = undefined,
137
-
138
- /// Set for the duration of `compact`.
139
- reading: bool = false,
140
- read: Grid.Read = undefined,
141
- read_callback: ?Callback = null,
142
- read_block_reference: ?SuperBlock.Manifest.BlockReference = null,
143
-
144
- /// Set for the duration of `flush` and `checkpoint`.
145
- writing: bool = false,
146
- write: Grid.Write = undefined,
147
- write_callback: ?Callback = null,
148
-
149
- pub fn init(allocator: mem.Allocator, grid: *Grid, tree_hash: u128) !ManifestLog {
150
- // TODO RingBuffer for .pointer should be extended to take care of alignment:
151
-
152
- var blocks: [blocks_count_max]BlockPtr = undefined;
153
- for (blocks) |*block, i| {
154
- errdefer for (blocks[0..i]) |b| allocator.free(b);
155
- block.* = try alloc_block(allocator);
156
- }
157
- errdefer for (blocks) |b| allocator.free(b);
158
-
159
- return ManifestLog{
160
- .superblock = grid.superblock,
161
- .grid = grid,
162
- .tree_hash = tree_hash,
163
- .blocks = .{ .buffer = blocks },
164
- };
165
- }
166
-
167
- pub fn deinit(manifest_log: *ManifestLog, allocator: mem.Allocator) void {
168
- for (manifest_log.blocks.buffer) |block| allocator.free(block);
169
- }
170
-
171
- /// Opens the manifest log.
172
- /// Reads the manifest blocks in reverse order and passes extent table inserts to event().
173
- /// Therefore, only the latest version of a table will be emitted by event() for insertion
174
- /// into the in-memory manifest. Older versions of a table in older manifest blocks will not
175
- /// be emitted, as an optimization to not replay all table mutations.
176
- /// SuperBlock.Manifest.tables is used to track the latest version of a table.
177
- pub fn open(manifest_log: *ManifestLog, event: OpenEvent, callback: Callback) void {
178
- assert(!manifest_log.opened);
179
- assert(!manifest_log.reading);
180
- assert(!manifest_log.writing);
181
- assert(manifest_log.read_callback == null);
182
-
183
- assert(manifest_log.blocks.count == 0);
184
- assert(manifest_log.blocks_closed == 0);
185
- assert(manifest_log.entry_count == 0);
186
-
187
- manifest_log.open_event = event;
188
- manifest_log.open_iterator = manifest_log.superblock.manifest.iterator_reverse(
189
- manifest_log.tree_hash,
190
- );
191
-
192
- manifest_log.reading = true;
193
- manifest_log.read_callback = callback;
194
-
195
- manifest_log.open_read_block();
196
- }
197
-
198
- fn open_read_block(manifest_log: *ManifestLog) void {
199
- assert(!manifest_log.opened);
200
- assert(manifest_log.reading);
201
- assert(!manifest_log.writing);
202
-
203
- assert(manifest_log.blocks.count == 0);
204
- assert(manifest_log.blocks_closed == 0);
205
- assert(manifest_log.entry_count == 0);
206
-
207
- manifest_log.read_block_reference = manifest_log.open_iterator.next();
208
-
209
- if (manifest_log.read_block_reference) |block| {
210
- assert(block.tree == manifest_log.tree_hash);
211
- assert(block.address > 0);
212
-
213
- manifest_log.grid.read_block(
214
- open_read_block_callback,
215
- &manifest_log.read,
216
- block.address,
217
- block.checksum,
218
- .manifest,
219
- );
220
- } else {
221
- manifest_log.opened = true;
222
- manifest_log.open_event = undefined;
223
- manifest_log.open_iterator = undefined;
224
-
225
- const callback = manifest_log.read_callback.?;
226
- manifest_log.reading = false;
227
- manifest_log.read_callback = null;
228
- assert(manifest_log.read_block_reference == null);
229
-
230
- callback(manifest_log);
231
- }
232
- }
233
-
234
- fn open_read_block_callback(read: *Grid.Read, block: Grid.BlockPtrConst) void {
235
- const manifest_log = @fieldParentPtr(ManifestLog, "read", read);
236
- assert(!manifest_log.opened);
237
- assert(manifest_log.reading);
238
- assert(!manifest_log.writing);
239
-
240
- const block_reference = manifest_log.read_block_reference.?;
241
- verify_block(block, block_reference.checksum, block_reference.address);
242
-
243
- const entry_count = Block.entry_count(block);
244
- const labels_used = Block.labels_const(block)[0..entry_count];
245
- const tables_used = Block.tables_const(block)[0..entry_count];
246
-
247
- const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
248
-
249
- var entry = entry_count;
250
- while (entry > 0) {
251
- entry -= 1;
252
-
253
- const label = labels_used[entry];
254
- const table = &tables_used[entry];
255
-
256
- if (manifest.insert_table_extent(manifest_log.tree_hash, table.address, block_reference.address, entry)) {
257
- switch (label.event) {
258
- .insert => manifest_log.open_event(manifest_log, label.level, table),
259
- .remove => manifest.queue_for_compaction(block_reference.address),
260
- }
261
- } else {
262
- manifest.queue_for_compaction(block_reference.address);
263
- }
264
- }
265
-
266
- if (Block.entry_count(block) < Block.entry_count_max) {
267
- manifest.queue_for_compaction(block_reference.address);
268
- }
269
-
270
- log.debug("{}: opened: checksum={} address={} entries={}", .{
271
- manifest_log.tree_hash,
272
- block_reference.checksum,
273
- block_reference.address,
274
- entry_count,
275
- });
276
-
277
- manifest_log.open_read_block();
278
- }
279
-
280
- /// Appends an insert, an update, or a direct move of a table to a level.
281
- /// A move is only recorded as an insert, there is no remove from the previous level, since
282
- /// this is safer (no potential to get the event order wrong) and reduces fragmentation.
283
- pub fn insert(manifest_log: *ManifestLog, level: u7, table: *const TableInfo) void {
284
- assert(!manifest_log.writing);
285
- manifest_log.append(.{ .level = level, .event = .insert }, table);
286
- }
287
-
288
- /// Appends the removal of a table from a level.
289
- /// The table must have previously been inserted to the manifest log.
290
- pub fn remove(manifest_log: *ManifestLog, level: u7, table: *const TableInfo) void {
291
- assert(!manifest_log.writing);
292
- manifest_log.append(.{ .level = level, .event = .remove }, table);
293
- }
294
-
295
- fn append(manifest_log: *ManifestLog, label: Label, table: *const TableInfo) void {
296
- assert(manifest_log.opened);
297
- assert(label.level < constants.lsm_levels);
298
- assert(table.address > 0);
299
- assert(table.snapshot_min > 0);
300
- assert(table.snapshot_max > table.snapshot_min);
301
-
302
- if (manifest_log.entry_count == 0) {
303
- assert(manifest_log.blocks.count == manifest_log.blocks_closed);
304
- manifest_log.acquire_block();
305
- } else if (manifest_log.entry_count > 0) {
306
- assert(manifest_log.blocks.count > 0);
307
- }
308
-
309
- assert(manifest_log.entry_count < Block.entry_count_max);
310
- assert(manifest_log.blocks.count - manifest_log.blocks_closed == 1);
311
-
312
- log.debug(
313
- "{}: {s}: level={} checksum={} address={} flags={} snapshot={}..{}",
314
- .{
315
- manifest_log.tree_hash,
316
- @tagName(label.event),
317
- label.level,
318
- table.checksum,
319
- table.address,
320
- table.flags,
321
- table.snapshot_min,
322
- table.snapshot_max,
323
- },
324
- );
325
-
326
- const block: BlockPtr = manifest_log.blocks.tail().?;
327
- const entry = manifest_log.entry_count;
328
- Block.labels(block)[entry] = label;
329
- Block.tables(block)[entry] = table.*;
330
-
331
- const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
332
- const address = Block.address(block);
333
- if (manifest.update_table_extent(manifest_log.tree_hash, table.address, address, entry)) |previous_block| {
334
- manifest.queue_for_compaction(previous_block);
335
- if (label.event == .remove) manifest.queue_for_compaction(address);
336
- } else {
337
- // A remove must remove a insert, which implies that it must update the extent.
338
- assert(label.event != .remove);
339
- }
340
-
341
- manifest_log.entry_count += 1;
342
- if (manifest_log.entry_count == Block.entry_count_max) {
343
- manifest_log.close_block();
344
- assert(manifest_log.entry_count == 0);
345
- }
346
- }
347
-
348
- fn flush(manifest_log: *ManifestLog, callback: Callback) void {
349
- assert(manifest_log.opened);
350
- assert(!manifest_log.reading);
351
- assert(!manifest_log.writing);
352
- assert(manifest_log.write_callback == null);
353
-
354
- manifest_log.writing = true;
355
- manifest_log.write_callback = callback;
356
-
357
- log.debug("{}: flush: writing {} block(s)", .{
358
- manifest_log.tree_hash,
359
- manifest_log.blocks_closed,
360
- });
361
-
362
- // The manifest is updated synchronously relative to the beginning of compact() and
363
- // checkpoint() so that the SuperBlock.Manifest.append()s are deterministic relative
364
- // to other trees' manifest logs.
365
- const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
366
- var i: usize = 0;
367
- while (i < manifest_log.blocks_closed) : (i += 1) {
368
- const block = manifest_log.blocks.get_ptr(i).?.*;
369
- verify_block(block, null, null);
370
-
371
- const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
372
- const address = Block.address(block);
373
- assert(address > 0);
374
-
375
- manifest.append(manifest_log.tree_hash, header.checksum, address);
376
- if (Block.entry_count(block) < Block.entry_count_max) {
377
- manifest.queue_for_compaction(address);
378
- }
379
- }
380
-
381
- manifest_log.write_block();
382
- }
383
-
384
- fn write_block(manifest_log: *ManifestLog) void {
385
- assert(manifest_log.opened);
386
- assert(manifest_log.writing);
387
- assert(manifest_log.blocks_closed <= manifest_log.blocks.count);
388
-
389
- if (manifest_log.blocks_closed == 0) {
390
- if (manifest_log.blocks.count == 0) {
391
- assert(manifest_log.entry_count == 0);
392
- } else {
393
- assert(manifest_log.blocks.count == 1);
394
- assert(manifest_log.entry_count < Block.entry_count_max);
395
- }
396
-
397
- const callback = manifest_log.write_callback.?;
398
- manifest_log.write_callback = null;
399
- manifest_log.writing = false;
400
-
401
- callback(manifest_log);
402
- return;
403
- }
404
-
405
- const block = manifest_log.blocks.head_ptr().?;
406
- verify_block(block.*, null, null);
407
-
408
- const header = mem.bytesAsValue(vsr.Header, block.*[0..@sizeOf(vsr.Header)]);
409
- const address = Block.address(block.*);
410
- assert(address > 0);
411
-
412
- const entry_count = Block.entry_count(block.*);
413
-
414
- if (manifest_log.blocks_closed == 1 and manifest_log.blocks.count == 1) {
415
- // This might be the last block of a checkpoint, which can be a partial block.
416
- assert(entry_count > 0);
417
- } else {
418
- assert(entry_count == Block.entry_count_max);
419
- }
420
-
421
- log.debug("{}: write_block: checksum={} address={} entries={}", .{
422
- manifest_log.tree_hash,
423
- header.checksum,
424
- address,
425
- entry_count,
426
- });
427
-
428
- manifest_log.grid.write_block(
429
- write_block_callback,
430
- &manifest_log.write,
431
- block,
432
- address,
433
- );
434
- manifest_log.blocks.advance_head();
435
- }
436
-
437
- fn write_block_callback(write: *Grid.Write) void {
438
- const manifest_log = @fieldParentPtr(ManifestLog, "write", write);
439
- assert(manifest_log.opened);
440
- assert(manifest_log.writing);
441
-
442
- manifest_log.blocks_closed -= 1;
443
- assert(manifest_log.blocks_closed <= manifest_log.blocks.count);
444
-
445
- manifest_log.write_block();
446
- }
447
-
448
- pub fn reserve(manifest_log: *ManifestLog) void {
449
- assert(manifest_log.opened);
450
- assert(!manifest_log.reading);
451
- assert(!manifest_log.writing);
452
- assert(manifest_log.read_callback == null);
453
- assert(manifest_log.write_callback == null);
454
- assert(manifest_log.grid_reservation == null);
455
- // reserve() is called at the start of compaction, so we have:
456
- // - at most 1 closed block, and
457
- // - at most 1 open block
458
- // due to the last log compaction plus a leftover partial block.
459
- assert(manifest_log.blocks_closed <= 1);
460
- assert(manifest_log.blocks.count <= manifest_log.blocks_closed + 1);
461
-
462
- // TODO Make sure this cannot fail — before compaction begins verify that enough free
463
- // blocks are available for all reservations.
464
- // +1 for the manifest log block compaction, which acquires at most one block.
465
- manifest_log.grid_reservation = manifest_log.grid.reserve(1 + blocks_count_appends).?;
466
- }
467
-
468
- /// `compact` does not close a partial block; that is only necessary during `checkpoint`.
469
- pub fn compact(manifest_log: *ManifestLog, callback: Callback) void {
470
- assert(manifest_log.opened);
471
- assert(!manifest_log.reading);
472
- assert(!manifest_log.writing);
473
- assert(manifest_log.read_callback == null);
474
- assert(manifest_log.write_callback == null);
475
- assert(manifest_log.grid_reservation != null);
476
-
477
- const free_set = manifest_log.grid.superblock.free_set;
478
- assert(free_set.count_free_reserved(manifest_log.grid_reservation.?) >= 1);
479
-
480
- manifest_log.read_callback = callback;
481
- manifest_log.flush(compact_flush_callback);
482
- }
483
-
484
- fn compact_flush_callback(manifest_log: *ManifestLog) void {
485
- const callback = manifest_log.read_callback.?;
486
-
487
- assert(manifest_log.opened);
488
- assert(!manifest_log.reading);
489
- assert(!manifest_log.writing);
490
- assert(manifest_log.blocks_closed == 0);
491
- assert(manifest_log.grid_reservation != null);
492
-
493
- const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
494
-
495
- // Compact a single manifest block — to minimize latency spikes, we want to do the bare
496
- // minimum of compaction work required.
497
- // TODO Compact more than 1 block if fragmentation is outstripping the compaction rate.
498
- // (Make sure to update the grid block reservation to account for this).
499
- // Or assert that compactions cannot update blocks fast enough to outpace manifest
500
- // log compaction (relative to the number of updates that fit in a manifest log block).
501
- if (manifest.oldest_block_queued_for_compaction(manifest_log.tree_hash)) |block| {
502
- assert(block.tree == manifest_log.tree_hash);
503
- assert(block.address > 0);
504
-
505
- manifest_log.reading = true;
506
- manifest_log.read_block_reference = block;
507
-
508
- manifest_log.grid.read_block(
509
- compact_read_block_callback,
510
- &manifest_log.read,
511
- block.address,
512
- block.checksum,
513
- .manifest,
514
- );
515
- } else {
516
- manifest_log.read_callback = null;
517
- manifest_log.grid.forfeit(manifest_log.grid_reservation.?);
518
- manifest_log.grid_reservation = null;
519
- callback(manifest_log);
520
- }
521
- }
522
-
523
- fn compact_read_block_callback(read: *Grid.Read, block: BlockPtrConst) void {
524
- const manifest_log = @fieldParentPtr(ManifestLog, "read", read);
525
- assert(manifest_log.opened);
526
- assert(manifest_log.reading);
527
- assert(!manifest_log.writing);
528
-
529
- const block_reference = manifest_log.read_block_reference.?;
530
- verify_block(block, block_reference.checksum, block_reference.address);
531
-
532
- const entry_count = Block.entry_count(block);
533
- const labels_used = Block.labels_const(block)[0..entry_count];
534
- const tables_used = Block.tables_const(block)[0..entry_count];
535
-
536
- const manifest: *SuperBlock.Manifest = &manifest_log.superblock.manifest;
537
- assert(manifest.tables.count() > 0);
538
-
539
- var frees: u32 = 0;
540
- var entry: u32 = 0;
541
- while (entry < entry_count) : (entry += 1) {
542
- const label = labels_used[entry];
543
- const table = &tables_used[entry];
544
-
545
- // Remove the extent if the table is the latest version.
546
- // We must iterate entries in forward order to drop the extent here.
547
- // Otherwise, stale versions earlier in the block may reappear.
548
- if (manifest.remove_table_extent(manifest_log.tree_hash, table.address, block_reference.address, entry)) {
549
- switch (label.event) {
550
- // Append the table, updating the table extent:
551
- .insert => manifest_log.append(label, table),
552
- // Since we compact oldest blocks first, we know that we have already
553
- // compacted all inserts that were eclipsed by this remove, so this remove
554
- // can now be safely dropped.
555
- .remove => frees += 1,
556
- }
557
- } else {
558
- // The table is not the latest version and can dropped.
559
- frees += 1;
560
- }
561
- }
562
-
563
- log.debug("{}: compacted: checksum={} address={} frees={}/{}", .{
564
- manifest_log.tree_hash,
565
- block_reference.checksum,
566
- block_reference.address,
567
- frees,
568
- entry_count,
569
- });
570
-
571
- // Blocks may be compacted if they contain frees, or are not completely full.
572
- // For example, a partial block may be flushed as part of a checkpoint.
573
- assert(frees > 0 or entry_count < Block.entry_count_max);
574
-
575
- assert(manifest.queued_for_compaction(block_reference.address));
576
- manifest.remove(
577
- manifest_log.tree_hash,
578
- block_reference.checksum,
579
- block_reference.address,
580
- );
581
- assert(!manifest.queued_for_compaction(block_reference.address));
582
-
583
- manifest_log.grid.release(block_reference.address);
584
- manifest_log.grid.forfeit(manifest_log.grid_reservation.?);
585
- manifest_log.grid_reservation = null;
586
-
587
- const callback = manifest_log.read_callback.?;
588
- manifest_log.reading = false;
589
- manifest_log.read_callback = null;
590
- manifest_log.read_block_reference = null;
591
-
592
- callback(manifest_log);
593
- }
594
-
595
- pub fn checkpoint(manifest_log: *ManifestLog, callback: Callback) void {
596
- assert(manifest_log.opened);
597
- assert(!manifest_log.reading);
598
- assert(!manifest_log.writing);
599
- assert(manifest_log.write_callback == null);
600
- assert(manifest_log.grid_reservation == null);
601
-
602
- if (manifest_log.entry_count > 0) {
603
- manifest_log.close_block();
604
- assert(manifest_log.entry_count == 0);
605
- assert(manifest_log.blocks_closed > 0);
606
- assert(manifest_log.blocks_closed == manifest_log.blocks.count);
607
- }
608
-
609
- manifest_log.flush(callback);
610
- }
611
-
612
- fn acquire_block(manifest_log: *ManifestLog) void {
613
- assert(manifest_log.opened);
614
- assert(manifest_log.entry_count == 0);
615
- assert(manifest_log.blocks.count == manifest_log.blocks_closed);
616
- assert(!manifest_log.blocks.full());
617
-
618
- manifest_log.blocks.advance_tail();
619
-
620
- const block: BlockPtr = manifest_log.blocks.tail().?;
621
-
622
- const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
623
- header.* = .{
624
- .cluster = manifest_log.superblock.working.cluster,
625
- .op = manifest_log.grid.acquire(manifest_log.grid_reservation.?),
626
- .size = undefined,
627
- .command = .block,
628
- .operation = BlockType.manifest.operation(),
629
- };
630
- }
631
-
632
- fn close_block(manifest_log: *ManifestLog) void {
633
- assert(manifest_log.blocks.count == manifest_log.blocks_closed + 1);
634
-
635
- const block: BlockPtr = manifest_log.blocks.tail().?;
636
- const entry_count = manifest_log.entry_count;
637
- assert(entry_count > 0);
638
- assert(entry_count <= Block.entry_count_max);
639
-
640
- const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
641
- assert(header.cluster == manifest_log.superblock.working.cluster);
642
- assert(header.op > 0);
643
- assert(header.command == .block);
644
- header.size = Block.size(entry_count);
645
-
646
- // Zero unused labels:
647
- mem.set(u8, mem.sliceAsBytes(Block.labels(block)[entry_count..]), 0);
648
-
649
- // Zero unused tables, and padding:
650
- mem.set(u8, block[header.size..], 0);
651
-
652
- header.set_checksum_body(block[@sizeOf(vsr.Header)..header.size]);
653
- header.set_checksum();
654
-
655
- verify_block(block, null, null);
656
- assert(Block.entry_count(block) == entry_count);
657
-
658
- log.debug("{}: close_block: checksum={} address={} entries={}", .{
659
- manifest_log.tree_hash,
660
- header.checksum,
661
- Block.address(block),
662
- entry_count,
663
- });
664
-
665
- manifest_log.blocks_closed += 1;
666
- manifest_log.entry_count = 0;
667
- assert(manifest_log.blocks.count == manifest_log.blocks_closed);
668
- }
669
-
670
- fn verify_block(block: BlockPtrConst, checksum: ?u128, address: ?u64) void {
671
- const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
672
- assert(BlockType.from(header.operation) == .manifest);
673
-
674
- if (constants.verify) {
675
- assert(header.valid_checksum());
676
- assert(header.valid_checksum_body(block[@sizeOf(vsr.Header)..header.size]));
677
- }
678
-
679
- assert(checksum == null or header.checksum == checksum.?);
680
-
681
- assert(Block.address(block) > 0);
682
- assert(address == null or Block.address(block) == address.?);
683
-
684
- const entry_count = Block.entry_count(block);
685
- assert(entry_count > 0);
686
- }
687
- };
688
- }
689
-
690
- fn ManifestLogBlockType(comptime Storage: type, comptime TableInfo: type) type {
691
- return struct {
692
- const Grid = GridType(Storage);
693
- const BlockPtr = Grid.BlockPtr;
694
- const BlockPtrConst = Grid.BlockPtrConst;
695
-
696
- const block_body_size = constants.block_size - @sizeOf(vsr.Header);
697
- const entry_size = @sizeOf(Label) + @sizeOf(TableInfo);
698
- const entry_count_max_unaligned = @divFloor(block_body_size, entry_size);
699
- pub const entry_count_max = @divFloor(
700
- entry_count_max_unaligned,
701
- @alignOf(TableInfo),
702
- ) * @alignOf(TableInfo);
703
-
704
- comptime {
705
- assert(entry_count_max > 0);
706
- assert((entry_count_max * @sizeOf(Label)) % @alignOf(TableInfo) == 0);
707
- assert((entry_count_max * @sizeOf(TableInfo)) % @alignOf(TableInfo) == 0);
708
- }
709
-
710
- pub const Label = packed struct {
711
- level: u7,
712
- event: enum(u1) { insert, remove },
713
- };
714
-
715
- pub fn address(block: BlockPtrConst) u64 {
716
- const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
717
- assert(header.command == .block);
718
-
719
- const block_address = header.op;
720
- assert(block_address > 0);
721
- return block_address;
722
- }
723
-
724
- pub fn checksum(block: BlockPtrConst) u128 {
725
- const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
726
- assert(header.command == .block);
727
-
728
- return header.checksum;
729
- }
730
-
731
- pub fn entry_count(block: BlockPtrConst) u32 {
732
- const header = mem.bytesAsValue(vsr.Header, block[0..@sizeOf(vsr.Header)]);
733
- assert(header.command == .block);
734
-
735
- const labels_size = entry_count_max * @sizeOf(Label);
736
- const tables_size = header.size - @sizeOf(vsr.Header) - labels_size;
737
-
738
- const entry_count_ = @intCast(u32, @divExact(tables_size, @sizeOf(TableInfo)));
739
- assert(entry_count_ > 0);
740
- assert(entry_count_ <= entry_count_max);
741
- return entry_count_;
742
- }
743
-
744
- pub fn size(entry_count_: u32) u32 {
745
- assert(entry_count_ > 0);
746
- assert(entry_count_ <= entry_count_max);
747
-
748
- // Encode the smaller type first because this will be multiplied by entry_count_max.
749
- const labels_size = entry_count_max * @sizeOf(Label);
750
- assert(labels_size == labels_size_max);
751
- assert((@sizeOf(vsr.Header) + labels_size) % @alignOf(TableInfo) == 0);
752
- const tables_size = entry_count_ * @sizeOf(TableInfo);
753
-
754
- return @sizeOf(vsr.Header) + labels_size + tables_size;
755
- }
756
-
757
- const labels_size_max = entry_count_max * @sizeOf(Label);
758
-
759
- pub fn labels(block: BlockPtr) *[entry_count_max]Label {
760
- return mem.bytesAsSlice(
761
- Label,
762
- block[@sizeOf(vsr.Header)..][0..labels_size_max],
763
- )[0..entry_count_max];
764
- }
765
-
766
- pub fn labels_const(block: BlockPtrConst) *const [entry_count_max]Label {
767
- return mem.bytesAsSlice(
768
- Label,
769
- block[@sizeOf(vsr.Header)..][0..labels_size_max],
770
- )[0..entry_count_max];
771
- }
772
-
773
- const tables_size_max = entry_count_max * @sizeOf(TableInfo);
774
-
775
- pub fn tables(block: BlockPtr) *[entry_count_max]TableInfo {
776
- return mem.bytesAsSlice(
777
- TableInfo,
778
- block[@sizeOf(vsr.Header) + labels_size_max ..][0..tables_size_max],
779
- )[0..entry_count_max];
780
- }
781
-
782
- pub fn tables_const(block: BlockPtrConst) *const [entry_count_max]TableInfo {
783
- return mem.bytesAsSlice(
784
- TableInfo,
785
- block[@sizeOf(vsr.Header) + labels_size_max ..][0..tables_size_max],
786
- )[0..entry_count_max];
787
- }
788
- };
789
- }