tigerbeetle 0.0.38 → 0.0.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/tb_client/tigerbeetle/build.zig +14 -45
- data/ext/tb_client/tigerbeetle/src/build/fetch.zig +112 -0
- data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2 -5
- data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +4 -1
- data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +56 -29
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +2 -24
- data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +7 -8
- data/ext/tb_client/tigerbeetle/src/message_bus.zig +8 -3
- data/ext/tb_client/tigerbeetle/src/stdx/huge_page_allocator.zig +115 -0
- data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +3 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +4 -4
- data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +61 -62
- data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +39 -20
- data/ext/tb_client/tigerbeetle/src/tidy.zig +2 -1
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +1 -1
- data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +1 -1
- data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +10 -9
- data/lib/tigerbeetle/version.rb +2 -2
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 234689582eb24285e6dced340dceaad9abc90f259512ac12343b796f7daa1088
|
|
4
|
+
data.tar.gz: 0f402fb280af28464ca401a00ec7ad8d2e06b1cf5d8a0e7253c2a4ae147a20dd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: da4ee2ecd0088a903b1b0f4341f2e8fc68d4be85bf990aa7b2caeeb99d04fecec75f2c3412124dcea3d345915ed4a55a534ce860a694f43f8e752fef0d97af96
|
|
7
|
+
data.tar.gz: 4a8e2049ff1db496e3b1eb33ed37b7ae7f6216f9963be9db27e90eac579251e1686e2e837df7430d190d4a5768e2ca4ae5f03627a47dbc2d65f1423be69a1f29
|
data/CHANGELOG.md
CHANGED
|
@@ -2272,55 +2272,24 @@ fn fetch(b: *std.Build, options: struct {
|
|
|
2272
2272
|
file_name: []const u8,
|
|
2273
2273
|
hash: ?[]const u8,
|
|
2274
2274
|
}) std.Build.LazyPath {
|
|
2275
|
-
const
|
|
2276
|
-
.name = "
|
|
2275
|
+
const fetch_step = b.addRunArtifact(b.addExecutable(.{
|
|
2276
|
+
.name = "fetch",
|
|
2277
2277
|
.root_module = b.createModule(.{
|
|
2278
|
-
.root_source_file = b.
|
|
2279
|
-
\\const builtin = @import("builtin");
|
|
2280
|
-
\\const std = @import("std");
|
|
2281
|
-
\\const assert = std.debug.assert;
|
|
2282
|
-
\\
|
|
2283
|
-
\\pub fn main() !void {
|
|
2284
|
-
\\ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
2285
|
-
\\ const allocator = arena.allocator();
|
|
2286
|
-
\\ const args = try std.process.argsAlloc(allocator);
|
|
2287
|
-
\\ assert(args.len == 5 or args.len == 6);
|
|
2288
|
-
\\
|
|
2289
|
-
\\ const hash_and_newline = try std.fs.cwd().readFileAlloc(allocator, args[2], 128);
|
|
2290
|
-
\\ assert(hash_and_newline[hash_and_newline.len - 1] == '\n');
|
|
2291
|
-
\\ const hash = hash_and_newline[0 .. hash_and_newline.len - 1];
|
|
2292
|
-
\\ if (args.len == 6 and !std.mem.eql(u8, args[5], hash)) {
|
|
2293
|
-
\\ std.debug.panic(
|
|
2294
|
-
\\ \\bad hash
|
|
2295
|
-
\\ \\specified: {s}
|
|
2296
|
-
\\ \\downloaded: {s}
|
|
2297
|
-
\\ \\
|
|
2298
|
-
\\ , .{ args[5], hash });
|
|
2299
|
-
\\ }
|
|
2300
|
-
\\
|
|
2301
|
-
\\ const source_path = try std.fs.path.join(allocator, &.{ args[1], hash, args[3] });
|
|
2302
|
-
\\ try std.fs.cwd().copyFile(
|
|
2303
|
-
\\ source_path,
|
|
2304
|
-
\\ std.fs.cwd(),
|
|
2305
|
-
\\ args[4],
|
|
2306
|
-
\\ .{},
|
|
2307
|
-
\\ );
|
|
2308
|
-
\\}
|
|
2309
|
-
),
|
|
2278
|
+
.root_source_file = b.path("./src/build/fetch.zig"),
|
|
2310
2279
|
.target = b.graph.host,
|
|
2311
2280
|
}),
|
|
2312
2281
|
}));
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
|
|
2317
|
-
b.
|
|
2318
|
-
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2282
|
+
fetch_step.setName(b.fmt("fetch {s}", .{options.url}));
|
|
2283
|
+
|
|
2284
|
+
fetch_step.addArgs(&.{
|
|
2285
|
+
b.graph.zig_exe,
|
|
2286
|
+
b.graph.global_cache_root.path orelse ".",
|
|
2287
|
+
options.url,
|
|
2288
|
+
options.file_name,
|
|
2289
|
+
});
|
|
2290
|
+
const result = fetch_step.addOutputFileArg(options.file_name);
|
|
2291
|
+
if (options.hash) |hash| fetch_step.addArg(hash);
|
|
2292
|
+
|
|
2324
2293
|
return result;
|
|
2325
2294
|
}
|
|
2326
2295
|
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
const Allocator = std.mem.Allocator;
|
|
4
|
+
|
|
5
|
+
const log = std.log;
|
|
6
|
+
|
|
7
|
+
pub const std_options: std.Options = .{
|
|
8
|
+
.log_level = .info,
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
pub fn main() !void {
|
|
12
|
+
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
13
|
+
const arena = arena_instance.allocator();
|
|
14
|
+
|
|
15
|
+
const args = try std.process.argsAlloc(arena);
|
|
16
|
+
assert(args.len == 6 or args.len == 7);
|
|
17
|
+
|
|
18
|
+
_, const zig, const global_cache, const url, const file_name, const out = args[0..6].*;
|
|
19
|
+
const hash_optional = if (args.len == 7) args[6] else null;
|
|
20
|
+
assert(args.len <= 7);
|
|
21
|
+
|
|
22
|
+
if (hash_optional) |hash| {
|
|
23
|
+
// Fast path --- don't touch the Internet if we have the hash locally.
|
|
24
|
+
const cached = path_join(arena, &.{ global_cache, "p", hash, file_name });
|
|
25
|
+
if (std.fs.cwd().copyFile(cached, std.fs.cwd(), out, .{})) {
|
|
26
|
+
log.debug("download skipped: cache hit", .{});
|
|
27
|
+
return;
|
|
28
|
+
} else |_| { // Time to ask for forgiveness!
|
|
29
|
+
log.debug("download: cache miss", .{});
|
|
30
|
+
}
|
|
31
|
+
} else {
|
|
32
|
+
log.debug("download: no hash", .{});
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// If curl is available, use it for robust downloads, and then
|
|
36
|
+
// `zig fetch` a local file to get the hash. Otherwise, fetch
|
|
37
|
+
// the url directly.
|
|
38
|
+
const hash = if (exec_ok(arena, &.{ "curl", "--version" })) hash: {
|
|
39
|
+
log.debug("download: curl", .{});
|
|
40
|
+
const url_file_name = url[std.mem.lastIndexOf(u8, url, "/").?..];
|
|
41
|
+
const tmp_dir = path_join(arena, &.{
|
|
42
|
+
global_cache,
|
|
43
|
+
"tmp",
|
|
44
|
+
&std.fmt.bytesToHex(std.mem.asBytes(&std.crypto.random.int(u64)), .lower),
|
|
45
|
+
});
|
|
46
|
+
defer std.fs.cwd().deleteTree(tmp_dir) catch {};
|
|
47
|
+
|
|
48
|
+
try std.fs.cwd().makePath(tmp_dir);
|
|
49
|
+
|
|
50
|
+
const curl_output = path_join(arena, &.{ tmp_dir, url_file_name });
|
|
51
|
+
_ = try exec(arena, &(.{
|
|
52
|
+
"curl", "--retry-all-errors",
|
|
53
|
+
"--retry", "5",
|
|
54
|
+
"--retry-max-time", "120",
|
|
55
|
+
"--location", url,
|
|
56
|
+
"--output", curl_output,
|
|
57
|
+
}));
|
|
58
|
+
break :hash try exec(arena, &.{ zig, "fetch", curl_output });
|
|
59
|
+
} else hash: {
|
|
60
|
+
log.debug("download: zig fetch", .{});
|
|
61
|
+
break :hash try exec(arena, &.{ zig, "fetch", url });
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
if (hash_optional) |hash_specified| {
|
|
65
|
+
if (!std.mem.eql(u8, hash, hash_specified)) {
|
|
66
|
+
log.err(
|
|
67
|
+
\\bad hash
|
|
68
|
+
\\specified: {s}
|
|
69
|
+
\\fetched: {s}
|
|
70
|
+
\\
|
|
71
|
+
, .{ hash_specified, hash });
|
|
72
|
+
return error.BadHash;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const cached = path_join(arena, &.{ global_cache, "p", hash, file_name });
|
|
77
|
+
errdefer log.err("copying from {s}", .{cached});
|
|
78
|
+
|
|
79
|
+
try std.fs.cwd().copyFile(cached, std.fs.cwd(), out, .{});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
fn path_join(arena: Allocator, components: []const []const u8) []const u8 {
|
|
83
|
+
return std.fs.path.join(arena, components) catch |err| oom(err);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
fn exec_ok(arena: Allocator, argv: []const []const u8) bool {
|
|
87
|
+
assert(argv.len > 0);
|
|
88
|
+
const result = std.process.Child.run(.{ .allocator = arena, .argv = argv }) catch return false;
|
|
89
|
+
return result.term == .Exited and result.term.Exited == 0;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
fn exec(arena: Allocator, argv: []const []const u8) ![]const u8 {
|
|
93
|
+
assert(argv.len > 0);
|
|
94
|
+
const result = std.process.Child.run(.{ .allocator = arena, .argv = argv }) catch |err| {
|
|
95
|
+
log.err("running {s}: {}", .{ argv, err });
|
|
96
|
+
return err;
|
|
97
|
+
};
|
|
98
|
+
if (!(result.term == .Exited and result.term.Exited == 0)) {
|
|
99
|
+
log.err("running {s}: {}\n{s}", .{ argv, result.term, result.stderr });
|
|
100
|
+
return error.Exec;
|
|
101
|
+
}
|
|
102
|
+
if (std.mem.indexOfScalar(u8, result.stdout, '\n')) |first_newline| {
|
|
103
|
+
if (first_newline + 1 == result.stdout.len) {
|
|
104
|
+
return result.stdout[0 .. result.stdout.len - 1];
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return result.stdout;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
fn oom(_: error{OutOfMemory}) noreturn {
|
|
111
|
+
@panic("OOM");
|
|
112
|
+
}
|
|
@@ -255,17 +255,14 @@ pub fn ResourcePoolType(comptime Grid: type) type {
|
|
|
255
255
|
};
|
|
256
256
|
}
|
|
257
257
|
|
|
258
|
-
pub fn CompactionType(
|
|
259
|
-
comptime Table: type,
|
|
260
|
-
comptime Tree: type,
|
|
261
|
-
comptime Storage: type,
|
|
262
|
-
) type {
|
|
258
|
+
pub fn CompactionType(comptime Tree: type, comptime Storage: type) type {
|
|
263
259
|
return struct {
|
|
264
260
|
const Compaction = @This();
|
|
265
261
|
|
|
266
262
|
const Grid = GridType(Storage);
|
|
267
263
|
const ResourcePool = ResourcePoolType(Grid);
|
|
268
264
|
|
|
265
|
+
const Table = Tree.Table;
|
|
269
266
|
const Manifest = ManifestType(Table, Storage);
|
|
270
267
|
const TableInfo = TableInfoType(Table);
|
|
271
268
|
const TableInfoReference = Manifest.TableInfoReference;
|
|
@@ -37,9 +37,12 @@ pub fn CompositeKeyType(comptime Field: type) type {
|
|
|
37
37
|
|
|
38
38
|
pub const Key = std.meta.Int(
|
|
39
39
|
.unsigned,
|
|
40
|
+
// Little-endian:
|
|
40
41
|
@bitSizeOf(u64) + @bitSizeOf(Field) + @bitSizeOf(Pad),
|
|
41
42
|
);
|
|
42
43
|
|
|
44
|
+
// NB: ideally, we should swap `field` and `timestamp` here to maintain
|
|
45
|
+
// little-endian order throughout, but we are constrained by existing data.
|
|
43
46
|
field: Field align(field_bitsize_alignment),
|
|
44
47
|
/// The most significant bit must be unset as it is used to indicate a tombstone.
|
|
45
48
|
timestamp: u64,
|
|
@@ -65,7 +68,7 @@ pub fn CompositeKeyType(comptime Field: type) type {
|
|
|
65
68
|
return value.timestamp & ~tombstone_bit;
|
|
66
69
|
} else {
|
|
67
70
|
comptime assert(@sizeOf(Key) == @sizeOf(Field) * 2);
|
|
68
|
-
return @as(Key, value.
|
|
71
|
+
return (@as(Key, value.field) << 64) | @as(Key, value.timestamp & ~tombstone_bit);
|
|
69
72
|
}
|
|
70
73
|
}
|
|
71
74
|
|
|
@@ -241,6 +241,8 @@ pub fn ForestType(comptime _Storage: type, comptime groove_cfg: anytype) type {
|
|
|
241
241
|
} = null,
|
|
242
242
|
|
|
243
243
|
grid: *Grid,
|
|
244
|
+
next_tick: Grid.NextTick = undefined,
|
|
245
|
+
|
|
244
246
|
grooves: Grooves,
|
|
245
247
|
node_pool: NodePool,
|
|
246
248
|
manifest_log: ManifestLog,
|
|
@@ -463,13 +465,24 @@ pub fn ForestType(comptime _Storage: type, comptime groove_cfg: anytype) type {
|
|
|
463
465
|
.manifest_log_done = false,
|
|
464
466
|
};
|
|
465
467
|
|
|
466
|
-
//
|
|
467
|
-
//
|
|
468
|
-
//
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
468
|
+
// No compactions are run during the absolute first bar, or during
|
|
469
|
+
// the first bar of the checkpoint that we are currently recovering
|
|
470
|
+
// from (see `op_compacted` for the rationale behind this).
|
|
471
|
+
if (op < constants.lsm_compaction_ops or
|
|
472
|
+
forest.grid.superblock.working.vsr_state.op_compacted(op))
|
|
473
|
+
{
|
|
474
|
+
forest.compaction_progress.?.manifest_log_done = true;
|
|
475
|
+
forest.compaction_progress.?.trees_done = true;
|
|
476
|
+
forest.grid.on_next_tick(compact_finish_next_tick, &forest.next_tick);
|
|
477
|
+
return;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Manifest log compaction. Run on the last beat of each half-bar.
|
|
481
|
+
// Start before forest compaction for lesser fragmentation, as
|
|
482
|
+
// manifest log grid reservations are much smaller than compaction's.
|
|
483
|
+
// TODO: Figure out a plan wrt the pacing here. Putting it on the
|
|
484
|
+
// last beat kinda-sorta balances out, because we expect to naturally
|
|
485
|
+
// do less other compaction work on the last beat.
|
|
473
486
|
if (last_beat or last_half_beat) {
|
|
474
487
|
forest.manifest_log.compact(compact_manifest_log_callback, op);
|
|
475
488
|
} else {
|
|
@@ -479,6 +492,13 @@ pub fn ForestType(comptime _Storage: type, comptime groove_cfg: anytype) type {
|
|
|
479
492
|
forest.compaction_schedule.beat_start(compact_trees_callback, op);
|
|
480
493
|
}
|
|
481
494
|
|
|
495
|
+
fn compact_finish_next_tick(next_tick: *Grid.NextTick) void {
|
|
496
|
+
const forest: *Forest = @alignCast(
|
|
497
|
+
@fieldParentPtr("next_tick", next_tick),
|
|
498
|
+
);
|
|
499
|
+
forest.compact_finish();
|
|
500
|
+
}
|
|
501
|
+
|
|
482
502
|
fn compact_trees_callback(forest: *Forest) void {
|
|
483
503
|
assert(forest.progress.? == .compact);
|
|
484
504
|
assert(forest.compaction_progress != null);
|
|
@@ -547,7 +567,10 @@ pub fn ForestType(comptime _Storage: type, comptime groove_cfg: anytype) type {
|
|
|
547
567
|
}
|
|
548
568
|
|
|
549
569
|
if (last_beat or last_half_beat) {
|
|
550
|
-
|
|
570
|
+
if (forest.compaction_schedule.bar_input_size) |bar_input_size| {
|
|
571
|
+
assert(bar_input_size == 0);
|
|
572
|
+
forest.compaction_schedule.bar_input_size = null;
|
|
573
|
+
}
|
|
551
574
|
|
|
552
575
|
// On the last beat of the bar, make sure that manifest log compaction is finished.
|
|
553
576
|
forest.manifest_log.compact_end();
|
|
@@ -891,8 +914,8 @@ fn CompactionScheduleType(comptime Forest: type, comptime Grid: type) type {
|
|
|
891
914
|
pool: ResourcePool,
|
|
892
915
|
next_tick: Grid.NextTick = undefined,
|
|
893
916
|
callback: ?*const fn (*Forest) void = null,
|
|
894
|
-
bar_input_size: u64 =
|
|
895
|
-
beat_input_size: u64 =
|
|
917
|
+
bar_input_size: ?u64 = null,
|
|
918
|
+
beat_input_size: ?u64 = null,
|
|
896
919
|
|
|
897
920
|
const CompactionSchedule = @This();
|
|
898
921
|
const ResourcePool = ResourcePoolType(Grid);
|
|
@@ -926,17 +949,13 @@ fn CompactionScheduleType(comptime Forest: type, comptime Grid: type) type {
|
|
|
926
949
|
assert(self.pool.grid_reservation == null);
|
|
927
950
|
|
|
928
951
|
assert(self.callback == null);
|
|
929
|
-
assert(
|
|
952
|
+
assert(op >= constants.lsm_compaction_ops);
|
|
953
|
+
assert(!self.grid.superblock.working.vsr_state.op_compacted(op));
|
|
954
|
+
assert(self.beat_input_size == null);
|
|
930
955
|
|
|
956
|
+
self.beat_input_size = 0;
|
|
931
957
|
self.callback = callback;
|
|
932
958
|
|
|
933
|
-
if (op < constants.lsm_compaction_ops or
|
|
934
|
-
self.grid.superblock.working.vsr_state.op_compacted(op))
|
|
935
|
-
{
|
|
936
|
-
self.beat_finish();
|
|
937
|
-
return;
|
|
938
|
-
}
|
|
939
|
-
|
|
940
959
|
const half_bar = @divExact(constants.lsm_compaction_ops, 2);
|
|
941
960
|
const compaction_beat = op % constants.lsm_compaction_ops;
|
|
942
961
|
|
|
@@ -945,8 +964,9 @@ fn CompactionScheduleType(comptime Forest: type, comptime Grid: type) type {
|
|
|
945
964
|
|
|
946
965
|
if (first_beat or half_beat) {
|
|
947
966
|
assert(self.pool.blocks_acquired() == 0);
|
|
948
|
-
assert(self.bar_input_size ==
|
|
967
|
+
assert(self.bar_input_size == null);
|
|
949
968
|
|
|
969
|
+
var bar_input_size: u64 = 0;
|
|
950
970
|
for (0..constants.lsm_levels) |level_b| {
|
|
951
971
|
if (level_active(.{ .level_b = level_b, .op = op })) {
|
|
952
972
|
inline for (comptime std.enums.values(Forest.TreeID)) |tree_id| {
|
|
@@ -966,17 +986,18 @@ fn CompactionScheduleType(comptime Forest: type, comptime Grid: type) type {
|
|
|
966
986
|
);
|
|
967
987
|
const bar_input_values = compaction.bar_commence(op);
|
|
968
988
|
|
|
969
|
-
|
|
989
|
+
bar_input_size += (bar_input_values * @sizeOf(Value));
|
|
970
990
|
}
|
|
971
991
|
}
|
|
972
992
|
}
|
|
993
|
+
self.bar_input_size = bar_input_size;
|
|
973
994
|
}
|
|
974
995
|
|
|
975
996
|
const beats_total = half_bar;
|
|
976
997
|
const beats_done = compaction_beat % half_bar;
|
|
977
998
|
const beats_remaining = beats_total - beats_done;
|
|
978
999
|
|
|
979
|
-
self.beat_input_size = stdx.div_ceil(self.bar_input_size
|
|
1000
|
+
self.beat_input_size = stdx.div_ceil(self.bar_input_size.?, beats_remaining);
|
|
980
1001
|
|
|
981
1002
|
// This is akin to a dry run for the actual compaction work that is going to happen
|
|
982
1003
|
// during this beat, wherein we:
|
|
@@ -987,7 +1008,7 @@ fn CompactionScheduleType(comptime Forest: type, comptime Grid: type) type {
|
|
|
987
1008
|
var beat_index_blocks_max: u64 = 1;
|
|
988
1009
|
var beat_value_blocks_max: u64 = 1;
|
|
989
1010
|
|
|
990
|
-
var beat_input_size = self.beat_input_size
|
|
1011
|
+
var beat_input_size = self.beat_input_size.?;
|
|
991
1012
|
for (0..constants.lsm_levels) |level_b| {
|
|
992
1013
|
if (level_active(.{ .level_b = level_b, .op = op })) {
|
|
993
1014
|
inline for (comptime std.enums.values(Forest.TreeID)) |tree_id| {
|
|
@@ -1004,15 +1025,16 @@ fn CompactionScheduleType(comptime Forest: type, comptime Grid: type) type {
|
|
|
1004
1025
|
// The +1 is for imperfections in pacing our immutable table, which
|
|
1005
1026
|
// might cause us to overshoot by a single block (limited to 1 due
|
|
1006
1027
|
// to how the immutable table values are consumed.)
|
|
1007
|
-
|
|
1028
|
+
const beat_value_blocks = stdx.div_ceil(
|
|
1008
1029
|
compaction.quotas.beat,
|
|
1009
1030
|
Table.layout.block_value_count_max,
|
|
1010
1031
|
) + 1;
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
beat_value_blocks_max,
|
|
1032
|
+
const beat_index_blocks = stdx.div_ceil(
|
|
1033
|
+
beat_value_blocks,
|
|
1014
1034
|
Table.value_block_count_max,
|
|
1015
1035
|
);
|
|
1036
|
+
beat_value_blocks_max += beat_value_blocks;
|
|
1037
|
+
beat_index_blocks_max += beat_index_blocks;
|
|
1016
1038
|
|
|
1017
1039
|
beat_input_size -|= (compaction.quotas.beat * @sizeOf(Value));
|
|
1018
1040
|
}
|
|
@@ -1029,12 +1051,12 @@ fn CompactionScheduleType(comptime Forest: type, comptime Grid: type) type {
|
|
|
1029
1051
|
|
|
1030
1052
|
fn beat_resume(self: *CompactionSchedule) void {
|
|
1031
1053
|
assert(self.callback != null);
|
|
1054
|
+
assert(self.pool.grid_reservation != null);
|
|
1032
1055
|
|
|
1033
1056
|
if (self.beat_input_size == 0) {
|
|
1034
1057
|
self.beat_finish();
|
|
1035
1058
|
return;
|
|
1036
1059
|
}
|
|
1037
|
-
assert(self.pool.grid_reservation != null);
|
|
1038
1060
|
|
|
1039
1061
|
const op = self.forest.progress.?.compact.op;
|
|
1040
1062
|
|
|
@@ -1065,8 +1087,8 @@ fn CompactionScheduleType(comptime Forest: type, comptime Grid: type) type {
|
|
|
1065
1087
|
inline else => |id| {
|
|
1066
1088
|
const Value = Forest.tree_info_for_id(id).Tree.Value;
|
|
1067
1089
|
const input_bytes_consumed = values_consumed * @sizeOf(Value);
|
|
1068
|
-
self.bar_input_size -= input_bytes_consumed;
|
|
1069
|
-
self.beat_input_size -|= input_bytes_consumed;
|
|
1090
|
+
self.bar_input_size.? -= input_bytes_consumed;
|
|
1091
|
+
self.beat_input_size.? -|= input_bytes_consumed;
|
|
1070
1092
|
},
|
|
1071
1093
|
}
|
|
1072
1094
|
|
|
@@ -1075,11 +1097,16 @@ fn CompactionScheduleType(comptime Forest: type, comptime Grid: type) type {
|
|
|
1075
1097
|
|
|
1076
1098
|
fn beat_finish(self: *CompactionSchedule) void {
|
|
1077
1099
|
assert(self.callback != null);
|
|
1100
|
+
|
|
1101
|
+
assert(self.bar_input_size.? >= 0);
|
|
1078
1102
|
assert(self.beat_input_size == 0);
|
|
1103
|
+
self.beat_input_size = null;
|
|
1104
|
+
|
|
1079
1105
|
if (self.pool.grid_reservation) |reservation| {
|
|
1080
1106
|
self.grid.forfeit(reservation);
|
|
1081
1107
|
self.pool.grid_reservation = null;
|
|
1082
1108
|
}
|
|
1109
|
+
|
|
1083
1110
|
self.grid.on_next_tick(beat_finish_next_tick, &self.next_tick);
|
|
1084
1111
|
}
|
|
1085
1112
|
|
|
@@ -676,6 +676,8 @@ pub fn ManifestLogType(comptime Storage: type) type {
|
|
|
676
676
|
assert(manifest_log.blocks.count ==
|
|
677
677
|
manifest_log.blocks_closed + @intFromBool(manifest_log.entry_count > 0));
|
|
678
678
|
assert(manifest_log.compact_blocks == null);
|
|
679
|
+
assert(op >= constants.lsm_compaction_ops);
|
|
680
|
+
assert(!manifest_log.superblock.working.vsr_state.op_compacted(op));
|
|
679
681
|
|
|
680
682
|
// TODO: Currently manifest compaction is hardcoded to run on the last beat of each
|
|
681
683
|
// half-bar.
|
|
@@ -685,14 +687,6 @@ pub fn ManifestLogType(comptime Storage: type) type {
|
|
|
685
687
|
|
|
686
688
|
manifest_log.grid.trace.start(.compact_manifest);
|
|
687
689
|
|
|
688
|
-
if (op < constants.lsm_compaction_ops or
|
|
689
|
-
manifest_log.superblock.working.vsr_state.op_compacted(op))
|
|
690
|
-
{
|
|
691
|
-
manifest_log.read_callback = callback;
|
|
692
|
-
manifest_log.grid.on_next_tick(compact_tick_callback, &manifest_log.next_tick);
|
|
693
|
-
return;
|
|
694
|
-
}
|
|
695
|
-
|
|
696
690
|
manifest_log.compact_blocks = @min(
|
|
697
691
|
manifest_log.pace.half_bar_compact_blocks(.{
|
|
698
692
|
.log_blocks_count = @intCast(manifest_log.log_block_checksums.count),
|
|
@@ -712,22 +706,6 @@ pub fn ManifestLogType(comptime Storage: type) type {
|
|
|
712
706
|
manifest_log.flush(compact_next_block);
|
|
713
707
|
}
|
|
714
708
|
|
|
715
|
-
fn compact_tick_callback(next_tick: *Grid.NextTick) void {
|
|
716
|
-
const manifest_log: *ManifestLog = @alignCast(@fieldParentPtr("next_tick", next_tick));
|
|
717
|
-
assert(manifest_log.write_callback == null);
|
|
718
|
-
assert(manifest_log.grid_reservation == null);
|
|
719
|
-
assert(manifest_log.blocks_closed == 0);
|
|
720
|
-
assert(manifest_log.blocks.count == 0);
|
|
721
|
-
assert(manifest_log.entry_count == 0);
|
|
722
|
-
assert(manifest_log.compact_blocks == null);
|
|
723
|
-
|
|
724
|
-
manifest_log.grid.trace.stop(.compact_manifest);
|
|
725
|
-
|
|
726
|
-
const callback = manifest_log.read_callback.?;
|
|
727
|
-
manifest_log.read_callback = null;
|
|
728
|
-
callback(manifest_log);
|
|
729
|
-
}
|
|
730
|
-
|
|
731
709
|
fn compact_next_block(manifest_log: *ManifestLog) void {
|
|
732
710
|
assert(manifest_log.opened);
|
|
733
711
|
assert(!manifest_log.reading);
|
|
@@ -54,7 +54,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type) type {
|
|
|
54
54
|
const KeyRange = Manifest.KeyRange;
|
|
55
55
|
|
|
56
56
|
const CompactionType = @import("compaction.zig").CompactionType;
|
|
57
|
-
pub const Compaction = CompactionType(
|
|
57
|
+
pub const Compaction = CompactionType(Tree, Storage);
|
|
58
58
|
|
|
59
59
|
pub const LookupMemoryResult = union(enum) {
|
|
60
60
|
negative,
|
|
@@ -218,13 +218,12 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type) type {
|
|
|
218
218
|
///
|
|
219
219
|
/// Specifically, it checks whether the key exists within the Tree's key range.
|
|
220
220
|
pub fn key_range_contains(tree: *const Tree, snapshot: u64, key: Key) bool {
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
}
|
|
221
|
+
// TODO We currently assume that the snapshot passed in is the latest snapshot.
|
|
222
|
+
// This must be changed when persistent snapshots are implemented.
|
|
223
|
+
assert(snapshot < snapshot_latest);
|
|
224
|
+
return tree.key_range != null and
|
|
225
|
+
tree.key_range.?.key_min <= key and
|
|
226
|
+
key <= tree.key_range.?.key_max;
|
|
228
227
|
}
|
|
229
228
|
|
|
230
229
|
/// This function is intended to never be called by regular code. It only
|
|
@@ -717,6 +717,10 @@ pub fn MessageBusType(comptime IO: type) type {
|
|
|
717
717
|
// * client_likely → client
|
|
718
718
|
assert(connection.peer == .unknown or connection.peer == .client_likely);
|
|
719
719
|
|
|
720
|
+
if (connection.peer == .client_likely) {
|
|
721
|
+
assert(connection.peer.client_likely == client_id);
|
|
722
|
+
}
|
|
723
|
+
|
|
720
724
|
// If there is a connection to this client, terminate and replace it.
|
|
721
725
|
const result = bus.clients.getOrPutAssumeCapacity(client_id);
|
|
722
726
|
if (result.found_existing) {
|
|
@@ -737,7 +741,7 @@ pub fn MessageBusType(comptime IO: type) type {
|
|
|
737
741
|
}
|
|
738
742
|
|
|
739
743
|
result.value_ptr.* = connection;
|
|
740
|
-
log.info("{}: set_and_verify_peer connection from client={}", .{
|
|
744
|
+
log.info("{}: set_and_verify_peer: connection from client={}", .{
|
|
741
745
|
bus.id,
|
|
742
746
|
client_id,
|
|
743
747
|
});
|
|
@@ -756,7 +760,7 @@ pub fn MessageBusType(comptime IO: type) type {
|
|
|
756
760
|
bus.clients.getOrPutAssumeCapacity(client_id);
|
|
757
761
|
if (!result.found_existing) {
|
|
758
762
|
result.value_ptr.* = connection;
|
|
759
|
-
log.info("{}: set_and_verify_peer connection from " ++
|
|
763
|
+
log.info("{}: set_and_verify_peer: connection from " ++
|
|
760
764
|
"client_likely={}", .{ bus.id, client_id });
|
|
761
765
|
}
|
|
762
766
|
},
|
|
@@ -771,7 +775,7 @@ pub fn MessageBusType(comptime IO: type) type {
|
|
|
771
775
|
return true;
|
|
772
776
|
}
|
|
773
777
|
|
|
774
|
-
/// Attempt moving messages from recv buffer into
|
|
778
|
+
/// Attempt moving messages from recv buffer into replica for processing. Called when recv
|
|
775
779
|
/// syscall completes, or when a replica signals readiness to consume previously suspended
|
|
776
780
|
/// messages.
|
|
777
781
|
fn recv_buffer_drain(bus: *MessageBus, connection: *Connection) void {
|
|
@@ -1065,6 +1069,7 @@ pub fn MessageBusType(comptime IO: type) type {
|
|
|
1065
1069
|
assert(connection.recv_buffer == null);
|
|
1066
1070
|
assert(connection.send_queue.empty());
|
|
1067
1071
|
assert(connection.fd == null);
|
|
1072
|
+
assert(!bus.connections_suspended.contains(connection));
|
|
1068
1073
|
|
|
1069
1074
|
result catch |err| {
|
|
1070
1075
|
log.warn("{}: on_close: to={} {}", .{ bus.id, connection.peer, err });
|