tigerbeetle-node 0.11.0 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.client.node.sha256 +1 -0
- package/package.json +5 -3
- package/src/tigerbeetle/scripts/fuzz_loop.sh +1 -1
- package/src/tigerbeetle/scripts/pre-commit.sh +2 -2
- package/src/tigerbeetle/scripts/validate_docs.sh +17 -0
- package/src/tigerbeetle/src/benchmark.zig +25 -11
- package/src/tigerbeetle/src/c/tb_client/context.zig +248 -47
- package/src/tigerbeetle/src/c/tb_client/echo_client.zig +108 -0
- package/src/tigerbeetle/src/c/tb_client/packet.zig +2 -2
- package/src/tigerbeetle/src/c/tb_client/signal.zig +2 -4
- package/src/tigerbeetle/src/c/tb_client/thread.zig +17 -256
- package/src/tigerbeetle/src/c/tb_client.h +18 -4
- package/src/tigerbeetle/src/c/tb_client.zig +88 -26
- package/src/tigerbeetle/src/c/tb_client_header_test.zig +135 -0
- package/src/tigerbeetle/src/c/test.zig +371 -1
- package/src/tigerbeetle/src/cli.zig +90 -18
- package/src/tigerbeetle/src/config.zig +12 -4
- package/src/tigerbeetle/src/demo.zig +2 -1
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +1 -1
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +13 -0
- package/src/tigerbeetle/src/ewah.zig +11 -33
- package/src/tigerbeetle/src/ewah_benchmark.zig +8 -9
- package/src/tigerbeetle/src/lsm/README.md +97 -3
- package/src/tigerbeetle/src/lsm/compaction.zig +32 -7
- package/src/tigerbeetle/src/{eytzinger_benchmark.zig → lsm/eytzinger_benchmark.zig} +34 -21
- package/src/tigerbeetle/src/lsm/forest_fuzz.zig +34 -32
- package/src/tigerbeetle/src/lsm/grid.zig +39 -21
- package/src/tigerbeetle/src/lsm/groove.zig +1 -0
- package/src/tigerbeetle/src/lsm/k_way_merge.zig +3 -3
- package/src/tigerbeetle/src/lsm/level_iterator.zig +1 -1
- package/src/tigerbeetle/src/lsm/manifest.zig +13 -0
- package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -49
- package/src/tigerbeetle/src/lsm/manifest_log.zig +173 -335
- package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +665 -0
- package/src/tigerbeetle/src/lsm/node_pool.zig +4 -0
- package/src/tigerbeetle/src/lsm/posted_groove.zig +1 -0
- package/src/tigerbeetle/src/lsm/segmented_array.zig +24 -15
- package/src/tigerbeetle/src/lsm/table.zig +32 -20
- package/src/tigerbeetle/src/lsm/table_immutable.zig +1 -1
- package/src/tigerbeetle/src/lsm/table_iterator.zig +4 -5
- package/src/tigerbeetle/src/lsm/test.zig +13 -2
- package/src/tigerbeetle/src/lsm/tree.zig +45 -7
- package/src/tigerbeetle/src/lsm/tree_fuzz.zig +36 -32
- package/src/tigerbeetle/src/main.zig +69 -13
- package/src/tigerbeetle/src/message_bus.zig +18 -7
- package/src/tigerbeetle/src/message_pool.zig +8 -2
- package/src/tigerbeetle/src/ring_buffer.zig +7 -3
- package/src/tigerbeetle/src/simulator.zig +38 -11
- package/src/tigerbeetle/src/state_machine.zig +48 -23
- package/src/tigerbeetle/src/test/accounting/workload.zig +9 -5
- package/src/tigerbeetle/src/test/cluster.zig +15 -33
- package/src/tigerbeetle/src/test/conductor.zig +2 -1
- package/src/tigerbeetle/src/test/network.zig +45 -19
- package/src/tigerbeetle/src/test/packet_simulator.zig +40 -29
- package/src/tigerbeetle/src/test/state_checker.zig +5 -7
- package/src/tigerbeetle/src/test/storage.zig +453 -110
- package/src/tigerbeetle/src/test/storage_checker.zig +204 -0
- package/src/tigerbeetle/src/tigerbeetle.zig +1 -0
- package/src/tigerbeetle/src/unit_tests.zig +7 -1
- package/src/tigerbeetle/src/util.zig +97 -11
- package/src/tigerbeetle/src/vopr.zig +2 -1
- package/src/tigerbeetle/src/vsr/client.zig +8 -3
- package/src/tigerbeetle/src/vsr/journal.zig +280 -202
- package/src/tigerbeetle/src/vsr/replica.zig +169 -31
- package/src/tigerbeetle/src/vsr/superblock.zig +356 -629
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -6
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +414 -151
- package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +332 -0
- package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +349 -0
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +44 -9
- package/src/tigerbeetle/src/vsr/superblock_quorums.zig +394 -0
- package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +312 -0
- package/src/tigerbeetle/src/vsr.zig +19 -5
- package/src/tigerbeetle/src/benchmark_array_search.zig +0 -317
- package/src/tigerbeetle/src/benchmarks/perf.zig +0 -299
- package/src/tigerbeetle/src/vopr_hub/README.md +0 -58
- package/src/tigerbeetle/src/vopr_hub/SETUP.md +0 -199
- package/src/tigerbeetle/src/vopr_hub/go.mod +0 -3
- package/src/tigerbeetle/src/vopr_hub/main.go +0 -1022
- package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +0 -3
- package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +0 -403
|
@@ -38,7 +38,10 @@ const Environment = struct {
|
|
|
38
38
|
const cluster = 32;
|
|
39
39
|
const replica = 4;
|
|
40
40
|
// TODO Is this appropriate for the number of fuzz_ops we want to run?
|
|
41
|
-
const size_max = vsr.Zone.superblock.size().? +
|
|
41
|
+
const size_max = vsr.Zone.superblock.size().? +
|
|
42
|
+
vsr.Zone.wal_headers.size().? +
|
|
43
|
+
vsr.Zone.wal_prepares.size().? +
|
|
44
|
+
1024 * 1024 * 1024;
|
|
42
45
|
|
|
43
46
|
const node_count = 1024;
|
|
44
47
|
// This is the smallest size that set_associative_cache will allow us.
|
|
@@ -80,6 +83,7 @@ const Environment = struct {
|
|
|
80
83
|
forest: Forest,
|
|
81
84
|
// We need @fieldParentPtr() of forest, so we can't use an optional Forest.
|
|
82
85
|
forest_exists: bool,
|
|
86
|
+
checkpoint_op: ?u64 = null,
|
|
83
87
|
|
|
84
88
|
fn init(env: *Environment, storage: *Storage) !void {
|
|
85
89
|
env.state = .uninit;
|
|
@@ -184,7 +188,8 @@ const Environment = struct {
|
|
|
184
188
|
env.change_state(.forest_compacting, .forest_open);
|
|
185
189
|
}
|
|
186
190
|
|
|
187
|
-
pub fn checkpoint(env: *Environment) void {
|
|
191
|
+
pub fn checkpoint(env: *Environment, op: u64) void {
|
|
192
|
+
env.checkpoint_op = op - config.lsm_batch_multiple;
|
|
188
193
|
env.change_state(.forest_open, .forest_checkpointing);
|
|
189
194
|
env.forest.checkpoint(forest_checkpoint_callback);
|
|
190
195
|
env.tick_until_state_change(.forest_checkpointing, .superblock_checkpointing);
|
|
@@ -194,7 +199,14 @@ const Environment = struct {
|
|
|
194
199
|
fn forest_checkpoint_callback(forest: *Forest) void {
|
|
195
200
|
const env = @fieldParentPtr(@This(), "forest", forest);
|
|
196
201
|
env.change_state(.forest_checkpointing, .superblock_checkpointing);
|
|
197
|
-
env.superblock.checkpoint(superblock_checkpoint_callback, &env.superblock_context
|
|
202
|
+
env.superblock.checkpoint(superblock_checkpoint_callback, &env.superblock_context, .{
|
|
203
|
+
.commit_min_checksum = env.superblock.working.vsr_state.commit_min_checksum + 1,
|
|
204
|
+
.commit_min = env.checkpoint_op.?,
|
|
205
|
+
.commit_max = env.checkpoint_op.? + 1,
|
|
206
|
+
.view_normal = 0,
|
|
207
|
+
.view = 0,
|
|
208
|
+
});
|
|
209
|
+
env.checkpoint_op = null;
|
|
198
210
|
}
|
|
199
211
|
|
|
200
212
|
fn superblock_checkpoint_callback(superblock_context: *SuperBlock.Context) void {
|
|
@@ -236,17 +248,17 @@ const Environment = struct {
|
|
|
236
248
|
|
|
237
249
|
for (fuzz_ops) |fuzz_op, fuzz_op_index| {
|
|
238
250
|
log.debug("Running fuzz_ops[{}/{}] == {}", .{ fuzz_op_index, fuzz_ops.len, fuzz_op });
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
251
|
+
const storage_size_used = storage.size_used();
|
|
252
|
+
log.debug("storage.size_used = {}/{}", .{ storage_size_used, storage.size });
|
|
253
|
+
const model_size = model.count() * @sizeOf(Account);
|
|
254
|
+
log.debug("space_amplification = {d:.2}", .{
|
|
255
|
+
@intToFloat(f64, storage_size_used) / @intToFloat(f64, model_size),
|
|
256
|
+
});
|
|
244
257
|
// Apply fuzz_op to the forest and the model.
|
|
245
258
|
switch (fuzz_op) {
|
|
246
259
|
.compact => |compact| {
|
|
247
260
|
env.compact(compact.op);
|
|
248
|
-
if (compact.checkpoint)
|
|
249
|
-
env.checkpoint();
|
|
261
|
+
if (compact.checkpoint) env.checkpoint(compact.op);
|
|
250
262
|
},
|
|
251
263
|
.put_account => |account| {
|
|
252
264
|
env.forest.grooves.accounts.put(&account);
|
|
@@ -274,27 +286,9 @@ const Environment = struct {
|
|
|
274
286
|
}
|
|
275
287
|
};
|
|
276
288
|
|
|
277
|
-
pub fn run_fuzz_ops(fuzz_ops: []const FuzzOp) !void {
|
|
289
|
+
pub fn run_fuzz_ops(storage_options: Storage.Options, fuzz_ops: []const FuzzOp) !void {
|
|
278
290
|
// Init mocked storage.
|
|
279
|
-
var storage = try Storage.init(
|
|
280
|
-
allocator,
|
|
281
|
-
Environment.size_max,
|
|
282
|
-
Storage.Options{
|
|
283
|
-
// We don't apply storage faults yet, so this seed doesn't matter.
|
|
284
|
-
.seed = 0xdeadbeef,
|
|
285
|
-
.read_latency_min = 0,
|
|
286
|
-
.read_latency_mean = 0,
|
|
287
|
-
.write_latency_min = 0,
|
|
288
|
-
.write_latency_mean = 0,
|
|
289
|
-
.read_fault_probability = 0,
|
|
290
|
-
.write_fault_probability = 0,
|
|
291
|
-
},
|
|
292
|
-
0,
|
|
293
|
-
.{
|
|
294
|
-
.first_offset = 0,
|
|
295
|
-
.period = 0,
|
|
296
|
-
},
|
|
297
|
-
);
|
|
291
|
+
var storage = try Storage.init(allocator, Environment.size_max, storage_options);
|
|
298
292
|
defer storage.deinit(allocator);
|
|
299
293
|
|
|
300
294
|
try Environment.format(&storage);
|
|
@@ -354,6 +348,7 @@ pub fn generate_fuzz_ops(random: std.rand.Random) ![]const FuzzOp {
|
|
|
354
348
|
const checkpoint =
|
|
355
349
|
// Can only checkpoint on the last beat of the bar.
|
|
356
350
|
compact_op % config.lsm_batch_multiple == config.lsm_batch_multiple - 1 and
|
|
351
|
+
compact_op > config.lsm_batch_multiple and
|
|
357
352
|
// Checkpoint at roughly the same rate as log wraparound.
|
|
358
353
|
random.uintLessThan(usize, Environment.compacts_per_checkpoint) == 0;
|
|
359
354
|
break :compact FuzzOp{
|
|
@@ -402,11 +397,18 @@ pub fn generate_fuzz_ops(random: std.rand.Random) ![]const FuzzOp {
|
|
|
402
397
|
pub fn main() !void {
|
|
403
398
|
const fuzz_args = try fuzz.parse_fuzz_args(allocator);
|
|
404
399
|
var rng = std.rand.DefaultPrng.init(fuzz_args.seed);
|
|
400
|
+
const random = rng.random();
|
|
405
401
|
|
|
406
|
-
const fuzz_ops = try generate_fuzz_ops(
|
|
402
|
+
const fuzz_ops = try generate_fuzz_ops(random);
|
|
407
403
|
defer allocator.free(fuzz_ops);
|
|
408
404
|
|
|
409
|
-
try run_fuzz_ops(
|
|
405
|
+
try run_fuzz_ops(Storage.Options{
|
|
406
|
+
.seed = random.int(u64),
|
|
407
|
+
.read_latency_min = 0,
|
|
408
|
+
.read_latency_mean = 0 + fuzz.random_int_exponential(random, u64, 20),
|
|
409
|
+
.write_latency_min = 0,
|
|
410
|
+
.write_latency_mean = 0 + fuzz.random_int_exponential(random, u64, 20),
|
|
411
|
+
}, fuzz_ops);
|
|
410
412
|
|
|
411
413
|
log.info("Passed!", .{});
|
|
412
414
|
}
|
|
@@ -4,11 +4,13 @@ const mem = std.mem;
|
|
|
4
4
|
|
|
5
5
|
const config = @import("../config.zig");
|
|
6
6
|
const vsr = @import("../vsr.zig");
|
|
7
|
+
const free_set = @import("../vsr/superblock_free_set.zig");
|
|
7
8
|
|
|
8
9
|
const SuperBlockType = vsr.SuperBlockType;
|
|
9
10
|
const FIFO = @import("../fifo.zig").FIFO;
|
|
10
11
|
const IOPS = @import("../iops.zig").IOPS;
|
|
11
12
|
const SetAssociativeCache = @import("set_associative_cache.zig").SetAssociativeCache;
|
|
13
|
+
const util = @import("../util.zig");
|
|
12
14
|
|
|
13
15
|
const log = std.log.scoped(.grid);
|
|
14
16
|
|
|
@@ -90,6 +92,7 @@ pub fn GridType(comptime Storage: type) type {
|
|
|
90
92
|
|
|
91
93
|
pub const BlockPtr = *align(config.sector_size) [block_size]u8;
|
|
92
94
|
pub const BlockPtrConst = *align(config.sector_size) const [block_size]u8;
|
|
95
|
+
pub const Reservation = free_set.Reservation;
|
|
93
96
|
|
|
94
97
|
pub const Write = struct {
|
|
95
98
|
callback: fn (*Grid.Write) void,
|
|
@@ -166,31 +169,46 @@ pub fn GridType(comptime Storage: type) type {
|
|
|
166
169
|
}
|
|
167
170
|
|
|
168
171
|
pub fn tick(grid: *Grid) void {
|
|
169
|
-
|
|
170
172
|
// Resolve reads that were seen in the cache during start_read()
|
|
171
173
|
// but deferred to be asynchronously resolved on the next tick.
|
|
172
174
|
//
|
|
173
|
-
//
|
|
174
|
-
//
|
|
175
|
-
//
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
175
|
+
// Drain directly from the queue so that new cache reads (added upon completion of old
|
|
176
|
+
// cache reads) that can be serviced immediately aren't deferred until the next tick
|
|
177
|
+
// (which may be milliseconds later due to IO.run_for_ns). This is necessary to ensure
|
|
178
|
+
// that groove prefetch completes promptly.
|
|
179
|
+
//
|
|
180
|
+
// Even still, we cap the reads processed to prevent going over
|
|
181
|
+
// any implicit time slice expected of Grid.tick(). This limit is fairly arbitrary.
|
|
182
|
+
var retry_max: u32 = 100_000;
|
|
183
|
+
while (grid.read_cached_queue.pop()) |read| {
|
|
179
184
|
if (grid.cache.get(read.address)) |block| {
|
|
180
|
-
read.callback(read, block);
|
|
185
|
+
read.callback(read, block);
|
|
181
186
|
} else {
|
|
182
187
|
grid.start_read(read);
|
|
183
188
|
}
|
|
189
|
+
|
|
190
|
+
retry_max -= 1;
|
|
191
|
+
if (retry_max == 0) break;
|
|
184
192
|
}
|
|
185
193
|
}
|
|
186
194
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
195
|
+
/// Returning null indicates that there are not enough free blocks to fill the reservation.
|
|
196
|
+
pub fn reserve(grid: *Grid, blocks_count: usize) ?Reservation {
|
|
197
|
+
return grid.superblock.free_set.reserve(blocks_count);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/// Forfeit a reservation.
|
|
201
|
+
pub fn forfeit(grid: *Grid, reservation: Reservation) void {
|
|
202
|
+
return grid.superblock.free_set.forfeit(reservation);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/// Returns a just-allocated block.
|
|
206
|
+
/// The caller is responsible for not acquiring more blocks than they reserved.
|
|
207
|
+
pub fn acquire(grid: *Grid, reservation: Reservation) u64 {
|
|
208
|
+
return grid.superblock.free_set.acquire(reservation).?;
|
|
191
209
|
}
|
|
192
210
|
|
|
193
|
-
/// This function should be used to release addresses, instead of
|
|
211
|
+
/// This function should be used to release addresses, instead of release()
|
|
194
212
|
/// on the free set directly, as this also demotes the address within the block cache.
|
|
195
213
|
/// This reduces conflict misses in the block cache, by freeing ways soon after they are
|
|
196
214
|
/// released.
|
|
@@ -199,12 +217,12 @@ pub fn GridType(comptime Storage: type) type {
|
|
|
199
217
|
/// checkpoint.
|
|
200
218
|
///
|
|
201
219
|
/// Asserts that the address is not currently being read from or written to.
|
|
202
|
-
pub fn
|
|
220
|
+
pub fn release(grid: *Grid, address: u64) void {
|
|
203
221
|
grid.assert_not_writing(address, null);
|
|
204
222
|
grid.assert_not_reading(address, null);
|
|
205
223
|
|
|
206
224
|
grid.cache.demote(address);
|
|
207
|
-
grid.superblock.free_set.
|
|
225
|
+
grid.superblock.free_set.release(address);
|
|
208
226
|
}
|
|
209
227
|
|
|
210
228
|
/// Assert that the address is not currently being written to.
|
|
@@ -320,7 +338,7 @@ pub fn GridType(comptime Storage: type) type {
|
|
|
320
338
|
grid,
|
|
321
339
|
completed_write.address,
|
|
322
340
|
);
|
|
323
|
-
|
|
341
|
+
util.copy_disjoint(.exact, u8, cached_block, completed_write.block);
|
|
324
342
|
|
|
325
343
|
grid.write_iops.release(iop);
|
|
326
344
|
|
|
@@ -438,8 +456,8 @@ pub fn GridType(comptime Storage: type) type {
|
|
|
438
456
|
// read_block_callback(), but that would issue a new call to read_sectors().
|
|
439
457
|
iop.reads.push(read);
|
|
440
458
|
{
|
|
441
|
-
// Make a copy here to avoid an infinite loop from pending_reads being
|
|
442
|
-
// re-added to read_queue after not matching the current read.
|
|
459
|
+
// Make a copy here to avoid an infinite loop from pending_reads being
|
|
460
|
+
// re-added to read_queue after not matching the current read.
|
|
443
461
|
var copy = grid.read_queue;
|
|
444
462
|
grid.read_queue = .{};
|
|
445
463
|
while (copy.pop()) |pending_read| {
|
|
@@ -449,7 +467,7 @@ pub fn GridType(comptime Storage: type) type {
|
|
|
449
467
|
} else {
|
|
450
468
|
grid.read_queue.push(pending_read);
|
|
451
469
|
}
|
|
452
|
-
}
|
|
470
|
+
}
|
|
453
471
|
}
|
|
454
472
|
|
|
455
473
|
grid.superblock.storage.read_sectors(
|
|
@@ -490,7 +508,7 @@ pub fn GridType(comptime Storage: type) type {
|
|
|
490
508
|
assert(header.operation == block_type.operation());
|
|
491
509
|
|
|
492
510
|
// NOTE: read callbacks resolved here could queue up reads into this very iop.
|
|
493
|
-
// This extends this while loop, but that's fine as it keeps the callbacks
|
|
511
|
+
// This extends this while loop, but that's fine as it keeps the callbacks
|
|
494
512
|
// asynchronous to themselves (preventing something like a stack-overflow).
|
|
495
513
|
while (iop.reads.pop()) |read| {
|
|
496
514
|
assert(read.address == address);
|
|
@@ -530,7 +548,7 @@ pub fn GridType(comptime Storage: type) type {
|
|
|
530
548
|
grid.read_iops.release(iop);
|
|
531
549
|
|
|
532
550
|
// Always iterate through the full list of pending reads instead of just one to ensure
|
|
533
|
-
// that those serviced from the cache don't prevent others waiting for an IOP from
|
|
551
|
+
// that those serviced from the cache don't prevent others waiting for an IOP from
|
|
534
552
|
// seeing the IOP that was just released.
|
|
535
553
|
var copy = grid.read_queue;
|
|
536
554
|
grid.read_queue = .{};
|
|
@@ -587,6 +587,7 @@ pub fn GrooveType(
|
|
|
587
587
|
id_tree_value.timestamp,
|
|
588
588
|
)) |object| {
|
|
589
589
|
assert(!ObjectTreeHelpers(Object).tombstone(object));
|
|
590
|
+
assert(object.id == id);
|
|
590
591
|
groove.prefetch_objects.putAssumeCapacity(object.*, {});
|
|
591
592
|
} else {
|
|
592
593
|
// The id was in the IdTree's value cache, but not in the ObjectTree's
|
|
@@ -17,7 +17,7 @@ pub fn KWayMergeIterator(
|
|
|
17
17
|
comptime stream_peek: fn (
|
|
18
18
|
context: *const Context,
|
|
19
19
|
stream_index: u32,
|
|
20
|
-
) error{Empty, Drained}!Key,
|
|
20
|
+
) error{ Empty, Drained }!Key,
|
|
21
21
|
comptime stream_pop: fn (context: *Context, stream_index: u32) Value,
|
|
22
22
|
/// Returns true if stream A has higher precedence than stream B.
|
|
23
23
|
/// This is used to deduplicate values across streams.
|
|
@@ -69,7 +69,7 @@ pub fn KWayMergeIterator(
|
|
|
69
69
|
var stream_index: u32 = 0;
|
|
70
70
|
while (stream_index < stream_count_max) : (stream_index += 1) {
|
|
71
71
|
it.keys[it.k] = stream_peek(context, stream_index) catch |err| switch (err) {
|
|
72
|
-
// On initialization, the streams should either have data already
|
|
72
|
+
// On initialization, the streams should either have data already
|
|
73
73
|
// buffered up to peek or be empty and have no more values to produce.
|
|
74
74
|
error.Drained => unreachable,
|
|
75
75
|
error.Empty => continue,
|
|
@@ -225,7 +225,7 @@ fn TestContext(comptime k_max: u32) type {
|
|
|
225
225
|
return math.order(a, b);
|
|
226
226
|
}
|
|
227
227
|
|
|
228
|
-
fn stream_peek(context: *const Self, stream_index: u32) error{Empty, Drained}!u32 {
|
|
228
|
+
fn stream_peek(context: *const Self, stream_index: u32) error{ Empty, Drained }!u32 {
|
|
229
229
|
// TODO: test for Drained somehow as well
|
|
230
230
|
const stream = context.streams[stream_index];
|
|
231
231
|
if (stream.len == 0) return error.Empty;
|
|
@@ -283,7 +283,7 @@ pub fn LevelIteratorType(comptime Table: type, comptime Storage: type) type {
|
|
|
283
283
|
/// - error.Empty when there are no values remaining to iterate.
|
|
284
284
|
/// - error.Drained when the iterator isn't empty, but the values
|
|
285
285
|
/// still need to be buffered into memory via tick().
|
|
286
|
-
pub fn peek(it: LevelIterator) error{Empty, Drained}!Key {
|
|
286
|
+
pub fn peek(it: LevelIterator) error{ Empty, Drained }!Key {
|
|
287
287
|
if (it.values.head_ptr_const()) |value| return key_from_value(value);
|
|
288
288
|
|
|
289
289
|
const scope = it.tables.head_ptr_const() orelse {
|
|
@@ -217,6 +217,8 @@ pub fn ManifestType(comptime Table: type, comptime Storage: type) type {
|
|
|
217
217
|
table: *TableInfo,
|
|
218
218
|
) void {
|
|
219
219
|
const manifest_level = &manifest.levels[level];
|
|
220
|
+
|
|
221
|
+
assert(table.snapshot_max >= snapshot);
|
|
220
222
|
manifest_level.set_snapshot_max(snapshot, table);
|
|
221
223
|
assert(table.snapshot_max == snapshot);
|
|
222
224
|
|
|
@@ -545,8 +547,16 @@ pub fn ManifestType(comptime Table: type, comptime Storage: type) type {
|
|
|
545
547
|
return true;
|
|
546
548
|
}
|
|
547
549
|
|
|
550
|
+
pub fn reserve(manifest: *Manifest) void {
|
|
551
|
+
assert(manifest.compact_callback == null);
|
|
552
|
+
assert(manifest.checkpoint_callback == null);
|
|
553
|
+
|
|
554
|
+
manifest.manifest_log.reserve();
|
|
555
|
+
}
|
|
556
|
+
|
|
548
557
|
pub fn compact(manifest: *Manifest, callback: Callback) void {
|
|
549
558
|
assert(manifest.compact_callback == null);
|
|
559
|
+
assert(manifest.checkpoint_callback == null);
|
|
550
560
|
manifest.compact_callback = callback;
|
|
551
561
|
|
|
552
562
|
manifest.manifest_log.compact(manifest_log_compact_callback);
|
|
@@ -555,6 +565,7 @@ pub fn ManifestType(comptime Table: type, comptime Storage: type) type {
|
|
|
555
565
|
fn manifest_log_compact_callback(manifest_log: *ManifestLog) void {
|
|
556
566
|
const manifest = @fieldParentPtr(Manifest, "manifest_log", manifest_log);
|
|
557
567
|
assert(manifest.compact_callback != null);
|
|
568
|
+
assert(manifest.checkpoint_callback == null);
|
|
558
569
|
|
|
559
570
|
const callback = manifest.compact_callback.?;
|
|
560
571
|
manifest.compact_callback = null;
|
|
@@ -562,6 +573,7 @@ pub fn ManifestType(comptime Table: type, comptime Storage: type) type {
|
|
|
562
573
|
}
|
|
563
574
|
|
|
564
575
|
pub fn checkpoint(manifest: *Manifest, callback: Callback) void {
|
|
576
|
+
assert(manifest.compact_callback == null);
|
|
565
577
|
assert(manifest.checkpoint_callback == null);
|
|
566
578
|
manifest.checkpoint_callback = callback;
|
|
567
579
|
|
|
@@ -570,6 +582,7 @@ pub fn ManifestType(comptime Table: type, comptime Storage: type) type {
|
|
|
570
582
|
|
|
571
583
|
fn manifest_log_checkpoint_callback(manifest_log: *ManifestLog) void {
|
|
572
584
|
const manifest = @fieldParentPtr(Manifest, "manifest_log", manifest_log);
|
|
585
|
+
assert(manifest.compact_callback == null);
|
|
573
586
|
assert(manifest.checkpoint_callback != null);
|
|
574
587
|
|
|
575
588
|
const callback = manifest.checkpoint_callback.?;
|
|
@@ -1,52 +1,3 @@
|
|
|
1
|
-
//! A ManifestLevel is an in-memory collection of the table metadata for a single level of a tree.
|
|
2
|
-
//!
|
|
3
|
-
//! For a given level and snapshot, there may be gaps in the key ranges of the visible tables,
|
|
4
|
-
//! but the key ranges are disjoint.
|
|
5
|
-
//!
|
|
6
|
-
//! A level's tables can be visualized in 2D as a partitioned rectangle.
|
|
7
|
-
//! For example, given the ManifestLevel tables (with values chosen for visualization, not realism):
|
|
8
|
-
//!
|
|
9
|
-
//! label A B C D E F G H I J K L M
|
|
10
|
-
//! key_min 0 4 12 16 4 8 12 26 4 25 4 16 24
|
|
11
|
-
//! key_max 3 11 15 19 7 11 15 27 7 27 11 19 27
|
|
12
|
-
//! snapshot_min 1 1 1 1 3 3 3 3 5 5 7 7 7
|
|
13
|
-
//! snapshot_max 9 3 3 7 5 7 9 5 7 7 9 9 9
|
|
14
|
-
//!
|
|
15
|
-
//! 0 1 2
|
|
16
|
-
//! 0 4 8 2 6 0 4 8
|
|
17
|
-
//! 9┌───┬───────┬───┬───┬───┬───┐
|
|
18
|
-
//! │ │ K │ │ L │###│ M │
|
|
19
|
-
//! 7│ ├───┬───┤ ├───┤###└┬──┤
|
|
20
|
-
//! │ │ I │ │ G │ │####│ J│
|
|
21
|
-
//! 5│ A ├───┤ F │ │ │####└┬─┤
|
|
22
|
-
//! │ │ E │ │ │ D │#####│H│
|
|
23
|
-
//! 3│ ├───┴───┼───┤ │#####└─┤
|
|
24
|
-
//! │ │ B │ C │ │#######│
|
|
25
|
-
//! 1└───┴───────┴───┴───┴───────┘
|
|
26
|
-
//!
|
|
27
|
-
//! Example iterations:
|
|
28
|
-
//!
|
|
29
|
-
//! visibility snapshots direction key_min key_max tables
|
|
30
|
-
//! visible 2 ascending 0 28 A, B, C, D
|
|
31
|
-
//! visible 4 ascending 0 28 A, E, F, G, D, H
|
|
32
|
-
//! visible 6 descending 12 28 J, D, G
|
|
33
|
-
//! visible 8 ascending 0 28 A, K, G, L, M
|
|
34
|
-
//! invisible 2, 4, 6 ascending 0 28 K, L, M
|
|
35
|
-
//!
|
|
36
|
-
//! Legend:
|
|
37
|
-
//!
|
|
38
|
-
//! * "#" represents a gap — no tables cover these keys during the snapshot.
|
|
39
|
-
//! * The horizontal axis represents the key range.
|
|
40
|
-
//! * The vertical axis represents the snapshot range.
|
|
41
|
-
//! * Each rectangle is a table within the manifest level.
|
|
42
|
-
//! * The sides of each rectangle depict:
|
|
43
|
-
//! * left: table.key_min (the diagram is inclusive, and the table.key_min is inclusive)
|
|
44
|
-
//! * right: table.key_max (the diagram is EXCLUSIVE, but the table.key_max is INCLUSIVE)
|
|
45
|
-
//! * bottom: table.snapshot_min (inclusive)
|
|
46
|
-
//! * top: table.snapshot_max (inclusive)
|
|
47
|
-
//! * (Not depicted: tables may have `table.key_min == table.key_max`.)
|
|
48
|
-
//! * (Not depicted: the newest set of tables would have `table.snapshot_max == maxInt(u64)`.)
|
|
49
|
-
//!
|
|
50
1
|
const std = @import("std");
|
|
51
2
|
const assert = std.debug.assert;
|
|
52
3
|
const math = std.math;
|