tigerbeetle-node 0.11.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -3
- package/src/tigerbeetle/scripts/fuzz_loop.sh +1 -1
- package/src/tigerbeetle/scripts/pre-commit.sh +2 -2
- package/src/tigerbeetle/scripts/validate_docs.sh +17 -0
- package/src/tigerbeetle/src/benchmark.zig +25 -11
- package/src/tigerbeetle/src/c/tb_client/context.zig +248 -47
- package/src/tigerbeetle/src/c/tb_client/echo_client.zig +108 -0
- package/src/tigerbeetle/src/c/tb_client/packet.zig +2 -2
- package/src/tigerbeetle/src/c/tb_client/signal.zig +2 -4
- package/src/tigerbeetle/src/c/tb_client/thread.zig +17 -256
- package/src/tigerbeetle/src/c/tb_client.h +18 -4
- package/src/tigerbeetle/src/c/tb_client.zig +88 -26
- package/src/tigerbeetle/src/c/tb_client_header_test.zig +135 -0
- package/src/tigerbeetle/src/c/test.zig +371 -1
- package/src/tigerbeetle/src/cli.zig +36 -6
- package/src/tigerbeetle/src/config.zig +10 -1
- package/src/tigerbeetle/src/demo.zig +2 -1
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +1 -1
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +13 -0
- package/src/tigerbeetle/src/ewah.zig +11 -33
- package/src/tigerbeetle/src/ewah_benchmark.zig +8 -9
- package/src/tigerbeetle/src/lsm/README.md +97 -3
- package/src/tigerbeetle/src/lsm/compaction.zig +32 -7
- package/src/tigerbeetle/src/{eytzinger_benchmark.zig → lsm/eytzinger_benchmark.zig} +34 -21
- package/src/tigerbeetle/src/lsm/forest_fuzz.zig +34 -32
- package/src/tigerbeetle/src/lsm/grid.zig +39 -21
- package/src/tigerbeetle/src/lsm/groove.zig +1 -0
- package/src/tigerbeetle/src/lsm/k_way_merge.zig +3 -3
- package/src/tigerbeetle/src/lsm/level_iterator.zig +1 -1
- package/src/tigerbeetle/src/lsm/manifest.zig +13 -0
- package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -49
- package/src/tigerbeetle/src/lsm/manifest_log.zig +173 -335
- package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +665 -0
- package/src/tigerbeetle/src/lsm/node_pool.zig +4 -0
- package/src/tigerbeetle/src/lsm/posted_groove.zig +1 -0
- package/src/tigerbeetle/src/lsm/segmented_array.zig +24 -15
- package/src/tigerbeetle/src/lsm/table.zig +32 -20
- package/src/tigerbeetle/src/lsm/table_immutable.zig +1 -1
- package/src/tigerbeetle/src/lsm/table_iterator.zig +4 -5
- package/src/tigerbeetle/src/lsm/test.zig +13 -2
- package/src/tigerbeetle/src/lsm/tree.zig +45 -7
- package/src/tigerbeetle/src/lsm/tree_fuzz.zig +36 -32
- package/src/tigerbeetle/src/main.zig +55 -2
- package/src/tigerbeetle/src/message_bus.zig +18 -7
- package/src/tigerbeetle/src/message_pool.zig +8 -2
- package/src/tigerbeetle/src/ring_buffer.zig +7 -3
- package/src/tigerbeetle/src/simulator.zig +38 -11
- package/src/tigerbeetle/src/state_machine.zig +47 -22
- package/src/tigerbeetle/src/test/accounting/workload.zig +9 -5
- package/src/tigerbeetle/src/test/cluster.zig +15 -33
- package/src/tigerbeetle/src/test/conductor.zig +2 -1
- package/src/tigerbeetle/src/test/network.zig +45 -19
- package/src/tigerbeetle/src/test/packet_simulator.zig +40 -29
- package/src/tigerbeetle/src/test/state_checker.zig +5 -7
- package/src/tigerbeetle/src/test/storage.zig +453 -110
- package/src/tigerbeetle/src/test/storage_checker.zig +204 -0
- package/src/tigerbeetle/src/tigerbeetle.zig +1 -0
- package/src/tigerbeetle/src/unit_tests.zig +6 -1
- package/src/tigerbeetle/src/util.zig +97 -11
- package/src/tigerbeetle/src/vopr.zig +2 -1
- package/src/tigerbeetle/src/vsr/client.zig +8 -3
- package/src/tigerbeetle/src/vsr/journal.zig +280 -202
- package/src/tigerbeetle/src/vsr/replica.zig +169 -31
- package/src/tigerbeetle/src/vsr/superblock.zig +356 -629
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -6
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +414 -151
- package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +332 -0
- package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +349 -0
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +44 -9
- package/src/tigerbeetle/src/vsr/superblock_quorums.zig +394 -0
- package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +312 -0
- package/src/tigerbeetle/src/vsr.zig +19 -5
- package/src/tigerbeetle/src/benchmark_array_search.zig +0 -317
- package/src/tigerbeetle/src/benchmarks/perf.zig +0 -299
- package/src/tigerbeetle/src/vopr_hub/README.md +0 -58
- package/src/tigerbeetle/src/vopr_hub/SETUP.md +0 -199
- package/src/tigerbeetle/src/vopr_hub/go.mod +0 -3
- package/src/tigerbeetle/src/vopr_hub/main.go +0 -1022
- package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +0 -3
- package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +0 -403
|
@@ -4,6 +4,7 @@ const assert = std.debug.assert;
|
|
|
4
4
|
const math = std.math;
|
|
5
5
|
const mem = std.mem;
|
|
6
6
|
|
|
7
|
+
const util = @import("../util.zig");
|
|
7
8
|
const div_ceil = @import("../util.zig").div_ceil;
|
|
8
9
|
const binary_search_values_raw = @import("binary_search.zig").binary_search_values_raw;
|
|
9
10
|
const binary_search_keys = @import("binary_search.zig").binary_search_keys;
|
|
@@ -279,12 +280,13 @@ fn SegmentedArrayType(
|
|
|
279
280
|
|
|
280
281
|
const total = array.count(a) + @intCast(u32, elements.len);
|
|
281
282
|
if (total <= node_capacity) {
|
|
282
|
-
|
|
283
|
+
util.copy_right(
|
|
284
|
+
.inexact,
|
|
283
285
|
T,
|
|
284
286
|
a_pointer[cursor.relative_index + elements.len ..],
|
|
285
287
|
a_pointer[cursor.relative_index..array.count(a)],
|
|
286
288
|
);
|
|
287
|
-
|
|
289
|
+
util.copy_disjoint(.inexact, T, a_pointer[cursor.relative_index..], elements);
|
|
288
290
|
|
|
289
291
|
array.increment_indexes_after(a, @intCast(u32, elements.len));
|
|
290
292
|
return;
|
|
@@ -346,7 +348,8 @@ fn SegmentedArrayType(
|
|
|
346
348
|
|
|
347
349
|
if (a_half < cursor.relative_index) {
|
|
348
350
|
// Move the part of `a` that is past the half-way point into `b`.
|
|
349
|
-
|
|
351
|
+
util.copy_right(
|
|
352
|
+
.inexact,
|
|
350
353
|
T,
|
|
351
354
|
b_half_pointer,
|
|
352
355
|
a_pointer[a_half..cursor.relative_index],
|
|
@@ -380,10 +383,10 @@ fn SegmentedArrayType(
|
|
|
380
383
|
const source_a = source[0..target_a.len];
|
|
381
384
|
const source_b = source[target_a.len..];
|
|
382
385
|
if (target_b.ptr != source_b.ptr) {
|
|
383
|
-
|
|
386
|
+
util.copy_right(.exact, T, target_b, source_b);
|
|
384
387
|
}
|
|
385
388
|
if (target_a.ptr != source_a.ptr) {
|
|
386
|
-
|
|
389
|
+
util.copy_right(.exact, T, target_a, source_a);
|
|
387
390
|
}
|
|
388
391
|
}
|
|
389
392
|
|
|
@@ -392,12 +395,14 @@ fn SegmentedArrayType(
|
|
|
392
395
|
assert(node <= array.node_count);
|
|
393
396
|
assert(array.node_count + 1 <= node_count_max);
|
|
394
397
|
|
|
395
|
-
|
|
398
|
+
util.copy_right(
|
|
399
|
+
.exact,
|
|
396
400
|
?*[node_capacity]T,
|
|
397
401
|
array.nodes[node + 1 .. array.node_count + 1],
|
|
398
402
|
array.nodes[node..array.node_count],
|
|
399
403
|
);
|
|
400
|
-
|
|
404
|
+
util.copy_right(
|
|
405
|
+
.exact,
|
|
401
406
|
u32,
|
|
402
407
|
array.indexes[node + 1 .. array.node_count + 2],
|
|
403
408
|
array.indexes[node .. array.node_count + 1],
|
|
@@ -465,7 +470,8 @@ fn SegmentedArrayType(
|
|
|
465
470
|
|
|
466
471
|
// Remove elements from exactly one node:
|
|
467
472
|
if (a_remaining + remove_count <= array.count(a)) {
|
|
468
|
-
|
|
473
|
+
util.copy_left(
|
|
474
|
+
.inexact,
|
|
469
475
|
T,
|
|
470
476
|
a_pointer[a_remaining..],
|
|
471
477
|
a_pointer[a_remaining + remove_count .. array.count(a)],
|
|
@@ -491,7 +497,7 @@ fn SegmentedArrayType(
|
|
|
491
497
|
assert(a_remaining > 0 or b_remaining.len > 0);
|
|
492
498
|
|
|
493
499
|
if (a_remaining >= half) {
|
|
494
|
-
|
|
500
|
+
util.copy_left(.inexact, T, b_pointer, b_remaining);
|
|
495
501
|
|
|
496
502
|
array.indexes[b] = array.indexes[a] + a_remaining;
|
|
497
503
|
array.decrement_indexes_after(b, remove_count);
|
|
@@ -508,7 +514,7 @@ fn SegmentedArrayType(
|
|
|
508
514
|
assert(a_remaining < half and b_remaining.len < half);
|
|
509
515
|
assert(a_remaining + b_remaining.len <= node_capacity);
|
|
510
516
|
|
|
511
|
-
|
|
517
|
+
util.copy_disjoint(.inexact, T, a_pointer[a_remaining..], b_remaining);
|
|
512
518
|
|
|
513
519
|
array.indexes[b] = array.indexes[a] + a_remaining + @intCast(u32, b_remaining.len);
|
|
514
520
|
array.decrement_indexes_after(b, remove_count);
|
|
@@ -570,7 +576,7 @@ fn SegmentedArrayType(
|
|
|
570
576
|
|
|
571
577
|
const total = array.count(a) + @intCast(u32, b_elements.len);
|
|
572
578
|
if (total <= node_capacity) {
|
|
573
|
-
|
|
579
|
+
util.copy_disjoint(.inexact, T, a_pointer[array.count(a)..], b_elements);
|
|
574
580
|
|
|
575
581
|
array.indexes[b] = array.indexes[b + 1];
|
|
576
582
|
array.remove_empty_node_at(node_pool, b);
|
|
@@ -582,12 +588,13 @@ fn SegmentedArrayType(
|
|
|
582
588
|
assert(a_half >= b_half);
|
|
583
589
|
assert(a_half + b_half == total);
|
|
584
590
|
|
|
585
|
-
|
|
591
|
+
util.copy_disjoint(
|
|
592
|
+
.exact,
|
|
586
593
|
T,
|
|
587
594
|
a_pointer[array.count(a)..a_half],
|
|
588
595
|
b_elements[0 .. a_half - array.count(a)],
|
|
589
596
|
);
|
|
590
|
-
|
|
597
|
+
util.copy_left(.inexact, T, b_pointer, b_elements[a_half - array.count(a) ..]);
|
|
591
598
|
|
|
592
599
|
array.indexes[b] = array.indexes[a] + a_half;
|
|
593
600
|
|
|
@@ -609,12 +616,14 @@ fn SegmentedArrayType(
|
|
|
609
616
|
@ptrCast(NodePool.Node, @alignCast(NodePool.node_alignment, array.nodes[node].?)),
|
|
610
617
|
);
|
|
611
618
|
|
|
612
|
-
|
|
619
|
+
util.copy_left(
|
|
620
|
+
.exact,
|
|
613
621
|
?*[node_capacity]T,
|
|
614
622
|
array.nodes[node .. array.node_count - 1],
|
|
615
623
|
array.nodes[node + 1 .. array.node_count],
|
|
616
624
|
);
|
|
617
|
-
|
|
625
|
+
util.copy_left(
|
|
626
|
+
.exact,
|
|
618
627
|
u32,
|
|
619
628
|
array.indexes[node..array.node_count],
|
|
620
629
|
array.indexes[node + 1 .. array.node_count + 1],
|
|
@@ -8,7 +8,8 @@ const vsr = @import("../vsr.zig");
|
|
|
8
8
|
const binary_search = @import("binary_search.zig");
|
|
9
9
|
const bloom_filter = @import("bloom_filter.zig");
|
|
10
10
|
|
|
11
|
-
const
|
|
11
|
+
const util = @import("../util.zig");
|
|
12
|
+
const div_ceil = util.div_ceil;
|
|
12
13
|
const eytzinger = @import("eytzinger.zig").eytzinger;
|
|
13
14
|
const snapshot_latest = @import("tree.zig").snapshot_latest;
|
|
14
15
|
|
|
@@ -125,6 +126,8 @@ pub fn TableType(
|
|
|
125
126
|
const block_body_size = block_size - @sizeOf(vsr.Header);
|
|
126
127
|
|
|
127
128
|
pub const layout = layout: {
|
|
129
|
+
@setEvalBranchQuota(10_000);
|
|
130
|
+
|
|
128
131
|
assert(block_size % config.sector_size == 0);
|
|
129
132
|
assert(math.isPowerOfTwo(table_size_max));
|
|
130
133
|
assert(math.isPowerOfTwo(block_size));
|
|
@@ -181,7 +184,8 @@ pub fn TableType(
|
|
|
181
184
|
assert((block_keys_layout_count * key_size) % config.cache_line_size == 0);
|
|
182
185
|
|
|
183
186
|
const block_key_layout_size = block_keys_layout_count * key_size;
|
|
184
|
-
const block_key_count =
|
|
187
|
+
const block_key_count =
|
|
188
|
+
if (block_keys_layout_count == 0) 0 else block_keys_layout_count - 1;
|
|
185
189
|
|
|
186
190
|
const block_value_count_max = @divFloor(
|
|
187
191
|
block_body_size - block_key_layout_size,
|
|
@@ -199,6 +203,7 @@ pub fn TableType(
|
|
|
199
203
|
);
|
|
200
204
|
|
|
201
205
|
// Compute the number of data and filter blocks by solving the constraints:
|
|
206
|
+
// * the cumulative table size must not exceed lsm_table_size_max
|
|
202
207
|
// * the filter and data blocks' metadata must fix in the index block
|
|
203
208
|
// * the filter blocks must index all data blocks
|
|
204
209
|
// * minimize the number of filter blocks
|
|
@@ -233,13 +238,18 @@ pub fn TableType(
|
|
|
233
238
|
.filter_block_count_max = filter_blocks,
|
|
234
239
|
|
|
235
240
|
// The number of data blocks covered by a single filter block.
|
|
236
|
-
.filter_data_block_count_max =
|
|
241
|
+
.filter_data_block_count_max = std.math.min(
|
|
242
|
+
filter_data_block_count_max,
|
|
243
|
+
data_blocks,
|
|
244
|
+
),
|
|
237
245
|
};
|
|
238
246
|
};
|
|
239
247
|
|
|
240
248
|
const index_block_count = 1;
|
|
241
|
-
const filter_block_count_max = layout.filter_block_count_max;
|
|
249
|
+
pub const filter_block_count_max = layout.filter_block_count_max;
|
|
242
250
|
pub const data_block_count_max = layout.data_block_count_max;
|
|
251
|
+
pub const block_count_max =
|
|
252
|
+
index_block_count + filter_block_count_max + data_block_count_max;
|
|
243
253
|
|
|
244
254
|
const index = struct {
|
|
245
255
|
const size = @sizeOf(vsr.Header) + filter_checksums_size + data_checksums_size +
|
|
@@ -504,7 +514,7 @@ pub fn TableType(
|
|
|
504
514
|
const values_max = data_block_values(builder.data_block);
|
|
505
515
|
assert(values_max.len == data.value_count_max);
|
|
506
516
|
|
|
507
|
-
|
|
517
|
+
util.copy_disjoint(.inexact, Value, values_max[builder.value..], values);
|
|
508
518
|
builder.value += @intCast(u32, values.len);
|
|
509
519
|
|
|
510
520
|
for (values) |*value| {
|
|
@@ -552,21 +562,23 @@ pub fn TableType(
|
|
|
552
562
|
}
|
|
553
563
|
|
|
554
564
|
assert(@divExact(data.key_layout_size, key_size) == data.key_count + 1);
|
|
555
|
-
|
|
556
|
-
@
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
565
|
+
if (data.key_count > 0) {
|
|
566
|
+
const key_layout_bytes = @alignCast(
|
|
567
|
+
@alignOf(Key),
|
|
568
|
+
block[data.key_layout_offset..][0..data.key_layout_size],
|
|
569
|
+
);
|
|
570
|
+
const key_layout = mem.bytesAsValue([data.key_count + 1]Key, key_layout_bytes);
|
|
571
|
+
|
|
572
|
+
const e = eytzinger(data.key_count, data.value_count_max);
|
|
573
|
+
e.layout_from_keys_or_values(
|
|
574
|
+
Key,
|
|
575
|
+
Value,
|
|
576
|
+
key_from_value,
|
|
577
|
+
sentinel_key,
|
|
578
|
+
values,
|
|
579
|
+
key_layout,
|
|
580
|
+
);
|
|
581
|
+
}
|
|
570
582
|
|
|
571
583
|
const values_padding = mem.sliceAsBytes(values_max[builder.value..]);
|
|
572
584
|
const block_padding = block[data.padding_offset..][0..data.padding_size];
|
|
@@ -187,7 +187,7 @@ pub fn TableImmutableIteratorType(comptime Table: type, comptime Storage: type)
|
|
|
187
187
|
return true; // All values are "buffered" in memory.
|
|
188
188
|
}
|
|
189
189
|
|
|
190
|
-
pub fn peek(it: *const TableImmutableIterator) error{Empty, Drained}!Table.Key {
|
|
190
|
+
pub fn peek(it: *const TableImmutableIterator) error{ Empty, Drained }!Table.Key {
|
|
191
191
|
// NOTE: This iterator is never Drained as all values are in memory (tick is a no-op).
|
|
192
192
|
assert(!it.table.free);
|
|
193
193
|
if (it.values_index == it.table.values.len) return error.Empty;
|
|
@@ -5,6 +5,7 @@ const assert = std.debug.assert;
|
|
|
5
5
|
|
|
6
6
|
const config = @import("../config.zig");
|
|
7
7
|
|
|
8
|
+
const util = @import("../util.zig");
|
|
8
9
|
const RingBuffer = @import("../ring_buffer.zig").RingBuffer;
|
|
9
10
|
const ManifestType = @import("manifest.zig").ManifestType;
|
|
10
11
|
const GridType = @import("grid.zig").GridType;
|
|
@@ -90,8 +91,6 @@ pub fn TableIteratorType(comptime Table: type, comptime Storage: type) type {
|
|
|
90
91
|
}
|
|
91
92
|
|
|
92
93
|
pub fn deinit(it: *TableIterator, allocator: mem.Allocator) void {
|
|
93
|
-
assert(!it.read_pending);
|
|
94
|
-
|
|
95
94
|
allocator.free(it.index_block);
|
|
96
95
|
it.values.deinit(allocator);
|
|
97
96
|
for (it.data_blocks.buffer) |block| allocator.free(block);
|
|
@@ -186,7 +185,7 @@ pub fn TableIteratorType(comptime Table: type, comptime Storage: type) type {
|
|
|
186
185
|
|
|
187
186
|
// Copy the bytes read into a buffer owned by the iterator since the Grid
|
|
188
187
|
// only guarantees the provided pointer to be valid in this callback.
|
|
189
|
-
|
|
188
|
+
util.copy_disjoint(.exact, u8, it.index_block, block);
|
|
190
189
|
|
|
191
190
|
if (it.index_block_callback) |callback| {
|
|
192
191
|
it.index_block_callback = null;
|
|
@@ -218,7 +217,7 @@ pub fn TableIteratorType(comptime Table: type, comptime Storage: type) type {
|
|
|
218
217
|
|
|
219
218
|
// Copy the bytes read into a buffer owned by the iterator since the Grid
|
|
220
219
|
// only guarantees the provided pointer to be valid in this callback.
|
|
221
|
-
|
|
220
|
+
util.copy_disjoint(.exact, u8, it.data_blocks.next_tail().?, block);
|
|
222
221
|
|
|
223
222
|
it.data_blocks.advance_tail();
|
|
224
223
|
it.data_block_index += 1;
|
|
@@ -264,7 +263,7 @@ pub fn TableIteratorType(comptime Table: type, comptime Storage: type) type {
|
|
|
264
263
|
/// - error.Empty when there are no values remaining to iterate.
|
|
265
264
|
/// - error.Drained when the iterator isn't empty, but some values
|
|
266
265
|
/// still need to be buffered into memory via tick().
|
|
267
|
-
pub fn peek(it: TableIterator) error{Empty, Drained}!Table.Key {
|
|
266
|
+
pub fn peek(it: TableIterator) error{ Empty, Drained }!Table.Key {
|
|
268
267
|
assert(!it.read_pending);
|
|
269
268
|
assert(!it.read_table_index);
|
|
270
269
|
|
|
@@ -27,7 +27,10 @@ const SuperBlock = vsr.SuperBlockType(Storage);
|
|
|
27
27
|
const Environment = struct {
|
|
28
28
|
const cluster = 32;
|
|
29
29
|
const replica = 4;
|
|
30
|
-
const size_max = vsr.Zone.superblock.size().? +
|
|
30
|
+
const size_max = vsr.Zone.superblock.size().? +
|
|
31
|
+
vsr.Zone.wal_headers.size().? +
|
|
32
|
+
vsr.Zone.wal_prepares.size().? +
|
|
33
|
+
(512 + 64) * 1024 * 1024;
|
|
31
34
|
|
|
32
35
|
const node_count = 1024;
|
|
33
36
|
const cache_entries_max = 2 * 1024 * 1024;
|
|
@@ -196,8 +199,16 @@ const Environment = struct {
|
|
|
196
199
|
|
|
197
200
|
log.debug("forest checkpointing completed!", .{});
|
|
198
201
|
|
|
202
|
+
var vsr_state = env.superblock.staging.vsr_state;
|
|
203
|
+
vsr_state.commit_min += 1;
|
|
204
|
+
vsr_state.commit_min_checkpoint += 1;
|
|
205
|
+
|
|
199
206
|
env.state = .superblock_checkpointing;
|
|
200
|
-
env.superblock.checkpoint(
|
|
207
|
+
env.superblock.checkpoint(
|
|
208
|
+
superblock_checkpoint_callback,
|
|
209
|
+
&env.superblock_context,
|
|
210
|
+
vsr_state,
|
|
211
|
+
);
|
|
201
212
|
}
|
|
202
213
|
|
|
203
214
|
fn superblock_checkpoint_callback(superblock_context: *SuperBlock.Context) void {
|
|
@@ -51,6 +51,21 @@ const half_bar_beat_count = @divExact(config.lsm_batch_multiple, 2);
|
|
|
51
51
|
/// The maximum number of tables for a single tree.
|
|
52
52
|
pub const table_count_max = table_count_max_for_tree(config.lsm_growth_factor, config.lsm_levels);
|
|
53
53
|
|
|
54
|
+
/// The upper-bound count of input tables to a single tree's compaction.
|
|
55
|
+
///
|
|
56
|
+
/// - +1 from level A.
|
|
57
|
+
/// - +lsm_growth_factor from level B. The A-input table cannot overlap with an extra B-input table
|
|
58
|
+
/// because input table selection is least-overlap. If the input table overlaps on one or both
|
|
59
|
+
/// edges, there must be another table with less overlap to select.
|
|
60
|
+
pub const compaction_tables_input_max = 1 + config.lsm_growth_factor;
|
|
61
|
+
|
|
62
|
+
/// The upper-bound count of output tables from a single tree's compaction.
|
|
63
|
+
/// In the "worst" case, no keys are overwritten/merged, and no tombstones are dropped.
|
|
64
|
+
pub const compaction_tables_output_max = compaction_tables_input_max;
|
|
65
|
+
|
|
66
|
+
/// The maximum number of concurrent compactions (per tree).
|
|
67
|
+
pub const compactions_max = div_ceil(config.lsm_levels, 2);
|
|
68
|
+
|
|
54
69
|
pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_name: []const u8) type {
|
|
55
70
|
const Key = TreeTable.Key;
|
|
56
71
|
const Value = TreeTable.Value;
|
|
@@ -490,6 +505,21 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
|
|
|
490
505
|
|
|
491
506
|
tree.compaction_op = op;
|
|
492
507
|
|
|
508
|
+
if (op < config.lsm_batch_multiple) {
|
|
509
|
+
// There is nothing to compact for the first measure.
|
|
510
|
+
// We skip the main compaction code path first compaction bar entirely because it
|
|
511
|
+
// is a special case — its first beat is 1, not 0.
|
|
512
|
+
|
|
513
|
+
tree.lookup_snapshot_max = op + 1;
|
|
514
|
+
if (op + 1 == config.lsm_batch_multiple) {
|
|
515
|
+
tree.compact_mutable_table_into_immutable();
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
// TODO Defer this callback until tick() to avoid stack growth.
|
|
519
|
+
callback(tree);
|
|
520
|
+
return;
|
|
521
|
+
}
|
|
522
|
+
|
|
493
523
|
if (tree.grid.superblock.working.vsr_state.op_compacted(op)) {
|
|
494
524
|
// We recovered from a checkpoint, and must avoid replaying one bar of
|
|
495
525
|
// compactions that were applied before the checkpoint. Repeating these ops'
|
|
@@ -497,7 +527,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
|
|
|
497
527
|
// causing the storage state of the replica to diverge from the cluster.
|
|
498
528
|
// See also: lookup_snapshot_max_for_checkpoint().
|
|
499
529
|
|
|
500
|
-
if (
|
|
530
|
+
if (op + 1 == tree.lookup_snapshot_max) {
|
|
501
531
|
// This is the last op of the skipped compaction bar.
|
|
502
532
|
// Prepare the immutable table for the next bar — since this state is
|
|
503
533
|
// in-memory, it cannot be skipped.
|
|
@@ -535,6 +565,8 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
|
|
|
535
565
|
config.lsm_batch_multiple,
|
|
536
566
|
});
|
|
537
567
|
|
|
568
|
+
if (start) tree.manifest.reserve();
|
|
569
|
+
|
|
538
570
|
// Try to start compacting the immutable table.
|
|
539
571
|
const even_levels = compaction_beat < half_bar_beat_count;
|
|
540
572
|
if (even_levels) {
|
|
@@ -571,6 +603,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
|
|
|
571
603
|
);
|
|
572
604
|
|
|
573
605
|
assert(range.table_count >= 1);
|
|
606
|
+
assert(range.table_count <= compaction_tables_input_max);
|
|
574
607
|
assert(compare_keys(range.key_min, tree.table_immutable.key_min()) != .gt);
|
|
575
608
|
assert(compare_keys(range.key_max, tree.table_immutable.key_max()) != .lt);
|
|
576
609
|
|
|
@@ -595,6 +628,9 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
|
|
|
595
628
|
}
|
|
596
629
|
|
|
597
630
|
fn compact_start_table(tree: *Tree, op_min: u64, context: CompactionTableContext) void {
|
|
631
|
+
const compaction_beat = tree.compaction_op % half_bar_beat_count;
|
|
632
|
+
assert(compaction_beat == 0);
|
|
633
|
+
|
|
598
634
|
assert(context.level_a < config.lsm_levels);
|
|
599
635
|
assert(context.level_b < config.lsm_levels);
|
|
600
636
|
assert(context.level_a + 1 == context.level_b);
|
|
@@ -602,15 +638,15 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
|
|
|
602
638
|
// Do not start compaction if level A does not require compaction.
|
|
603
639
|
const table_range = tree.manifest.compaction_table(context.level_a) orelse return;
|
|
604
640
|
const table = table_range.table;
|
|
605
|
-
const range = table_range.range;
|
|
606
641
|
|
|
607
|
-
assert(range.table_count >= 1);
|
|
642
|
+
assert(table_range.range.table_count >= 1);
|
|
643
|
+
assert(table_range.range.table_count <= compaction_tables_input_max);
|
|
608
644
|
assert(compare_keys(table.key_min, table.key_max) != .gt);
|
|
609
|
-
assert(compare_keys(range.key_min, table.key_min) != .gt);
|
|
610
|
-
assert(compare_keys(range.key_max, table.key_max) != .lt);
|
|
645
|
+
assert(compare_keys(table_range.range.key_min, table.key_min) != .gt);
|
|
646
|
+
assert(compare_keys(table_range.range.key_max, table.key_max) != .lt);
|
|
611
647
|
|
|
612
648
|
log.debug(tree_name ++ ": compacting {d} tables from level {d} to level {d}", .{
|
|
613
|
-
range.table_count,
|
|
649
|
+
table_range.range.table_count,
|
|
614
650
|
context.level_a,
|
|
615
651
|
context.level_b,
|
|
616
652
|
});
|
|
@@ -888,11 +924,13 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
|
|
|
888
924
|
}
|
|
889
925
|
|
|
890
926
|
pub fn checkpoint(tree: *Tree, callback: fn (*Tree) void) void {
|
|
891
|
-
// Assert no outstanding compact_tick() work
|
|
927
|
+
// Assert no outstanding compact_tick() work.
|
|
892
928
|
assert(tree.compaction_io_pending == 0);
|
|
893
929
|
assert(tree.compaction_callback == null);
|
|
894
930
|
assert(tree.compaction_op > 0);
|
|
895
931
|
assert(tree.compaction_op + 1 == tree.lookup_snapshot_max);
|
|
932
|
+
// Don't re-run the checkpoint we recovered from.
|
|
933
|
+
assert(!tree.grid.superblock.working.vsr_state.op_compacted(tree.compaction_op));
|
|
896
934
|
|
|
897
935
|
// Assert that this is the last beat in the compaction bar.
|
|
898
936
|
const compaction_beat = tree.compaction_op % config.lsm_batch_multiple;
|
|
@@ -82,7 +82,10 @@ const Environment = struct {
|
|
|
82
82
|
const cluster = 32;
|
|
83
83
|
const replica = 4;
|
|
84
84
|
// TODO Is this appropriate for the number of fuzz_ops we want to run?
|
|
85
|
-
const size_max = vsr.Zone.superblock.size().? +
|
|
85
|
+
const size_max = vsr.Zone.superblock.size().? +
|
|
86
|
+
vsr.Zone.wal_headers.size().? +
|
|
87
|
+
vsr.Zone.wal_prepares.size().? +
|
|
88
|
+
1024 * 1024 * 1024;
|
|
86
89
|
|
|
87
90
|
const node_count = 1024;
|
|
88
91
|
// This is the smallest size that set_associative_cache will allow us.
|
|
@@ -126,6 +129,7 @@ const Environment = struct {
|
|
|
126
129
|
tree_exists: bool,
|
|
127
130
|
lookup_context: Tree.LookupContext = undefined,
|
|
128
131
|
lookup_value: ?*const Key.Value = null,
|
|
132
|
+
checkpoint_op: ?u64 = null,
|
|
129
133
|
|
|
130
134
|
fn init(env: *Environment, storage: *Storage) !void {
|
|
131
135
|
env.state = .uninit;
|
|
@@ -238,7 +242,8 @@ const Environment = struct {
|
|
|
238
242
|
env.change_state(.tree_compacting, .tree_open);
|
|
239
243
|
}
|
|
240
244
|
|
|
241
|
-
pub fn checkpoint(env: *Environment) void {
|
|
245
|
+
pub fn checkpoint(env: *Environment, op: u64) void {
|
|
246
|
+
env.checkpoint_op = op - config.lsm_batch_multiple;
|
|
242
247
|
env.change_state(.tree_open, .tree_checkpointing);
|
|
243
248
|
env.tree.checkpoint(tree_checkpoint_callback);
|
|
244
249
|
env.tick_until_state_change(.tree_checkpointing, .superblock_checkpointing);
|
|
@@ -248,7 +253,14 @@ const Environment = struct {
|
|
|
248
253
|
fn tree_checkpoint_callback(tree: *Tree) void {
|
|
249
254
|
const env = @fieldParentPtr(@This(), "tree", tree);
|
|
250
255
|
env.change_state(.tree_checkpointing, .superblock_checkpointing);
|
|
251
|
-
env.superblock.checkpoint(superblock_checkpoint_callback, &env.superblock_context
|
|
256
|
+
env.superblock.checkpoint(superblock_checkpoint_callback, &env.superblock_context, .{
|
|
257
|
+
.commit_min_checksum = env.superblock.working.vsr_state.commit_min_checksum + 1,
|
|
258
|
+
.commit_min = env.checkpoint_op.?,
|
|
259
|
+
.commit_max = env.checkpoint_op.? + 1,
|
|
260
|
+
.view_normal = 0,
|
|
261
|
+
.view = 0,
|
|
262
|
+
});
|
|
263
|
+
env.checkpoint_op = null;
|
|
252
264
|
}
|
|
253
265
|
|
|
254
266
|
fn superblock_checkpoint_callback(superblock_context: *SuperBlock.Context) void {
|
|
@@ -292,17 +304,17 @@ const Environment = struct {
|
|
|
292
304
|
|
|
293
305
|
for (fuzz_ops) |fuzz_op, fuzz_op_index| {
|
|
294
306
|
log.debug("Running fuzz_ops[{}/{}] == {}", .{ fuzz_op_index, fuzz_ops.len, fuzz_op });
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
307
|
+
const storage_size_used = storage.size_used();
|
|
308
|
+
log.debug("storage.size_used = {}/{}", .{ storage_size_used, storage.size });
|
|
309
|
+
const model_size = model.count() * @sizeOf(Key.Value);
|
|
310
|
+
log.debug("space_amplification = {d:.2}", .{
|
|
311
|
+
@intToFloat(f64, storage_size_used) / @intToFloat(f64, model_size),
|
|
312
|
+
});
|
|
300
313
|
// Apply fuzz_op to the tree and the model.
|
|
301
314
|
switch (fuzz_op) {
|
|
302
315
|
.compact => |compact| {
|
|
303
316
|
env.compact(compact.op);
|
|
304
|
-
if (compact.checkpoint)
|
|
305
|
-
env.checkpoint();
|
|
317
|
+
if (compact.checkpoint) env.checkpoint(compact.op);
|
|
306
318
|
},
|
|
307
319
|
.put => |value| {
|
|
308
320
|
env.tree.put(&value);
|
|
@@ -333,27 +345,9 @@ const Environment = struct {
|
|
|
333
345
|
}
|
|
334
346
|
};
|
|
335
347
|
|
|
336
|
-
pub fn run_fuzz_ops(fuzz_ops: []const FuzzOp) !void {
|
|
348
|
+
pub fn run_fuzz_ops(storage_options: Storage.Options, fuzz_ops: []const FuzzOp) !void {
|
|
337
349
|
// Init mocked storage.
|
|
338
|
-
var storage = try Storage.init(
|
|
339
|
-
allocator,
|
|
340
|
-
Environment.size_max,
|
|
341
|
-
Storage.Options{
|
|
342
|
-
// We don't apply storage faults yet, so this seed doesn't matter.
|
|
343
|
-
.seed = 0xdeadbeef,
|
|
344
|
-
.read_latency_min = 0,
|
|
345
|
-
.read_latency_mean = 0,
|
|
346
|
-
.write_latency_min = 0,
|
|
347
|
-
.write_latency_mean = 0,
|
|
348
|
-
.read_fault_probability = 0,
|
|
349
|
-
.write_fault_probability = 0,
|
|
350
|
-
},
|
|
351
|
-
0,
|
|
352
|
-
.{
|
|
353
|
-
.first_offset = 0,
|
|
354
|
-
.period = 0,
|
|
355
|
-
},
|
|
356
|
-
);
|
|
350
|
+
var storage = try Storage.init(allocator, Environment.size_max, storage_options);
|
|
357
351
|
defer storage.deinit(allocator);
|
|
358
352
|
|
|
359
353
|
try Environment.format(&storage);
|
|
@@ -412,6 +406,7 @@ pub fn generate_fuzz_ops(random: std.rand.Random) ![]const FuzzOp {
|
|
|
412
406
|
const checkpoint =
|
|
413
407
|
// Can only checkpoint on the last beat of the bar.
|
|
414
408
|
compact_op % config.lsm_batch_multiple == config.lsm_batch_multiple - 1 and
|
|
409
|
+
compact_op > config.lsm_batch_multiple and
|
|
415
410
|
// Checkpoint at roughly the same rate as log wraparound.
|
|
416
411
|
random.uintLessThan(usize, Environment.compacts_per_checkpoint) == 0;
|
|
417
412
|
break :compact FuzzOp{
|
|
@@ -447,11 +442,20 @@ pub fn generate_fuzz_ops(random: std.rand.Random) ![]const FuzzOp {
|
|
|
447
442
|
pub fn main() !void {
|
|
448
443
|
const fuzz_args = try fuzz.parse_fuzz_args(allocator);
|
|
449
444
|
var rng = std.rand.DefaultPrng.init(fuzz_args.seed);
|
|
445
|
+
const random = rng.random();
|
|
450
446
|
|
|
451
|
-
const fuzz_ops = try generate_fuzz_ops(
|
|
447
|
+
const fuzz_ops = try generate_fuzz_ops(random);
|
|
452
448
|
defer allocator.free(fuzz_ops);
|
|
453
449
|
|
|
454
|
-
|
|
450
|
+
const storage_options = .{
|
|
451
|
+
.seed = random.int(u64),
|
|
452
|
+
.read_latency_min = 0,
|
|
453
|
+
.read_latency_mean = 0 + fuzz.random_int_exponential(random, u64, 20),
|
|
454
|
+
.write_latency_min = 0,
|
|
455
|
+
.write_latency_mean = 0 + fuzz.random_int_exponential(random, u64, 20),
|
|
456
|
+
};
|
|
457
|
+
|
|
458
|
+
try run_fuzz_ops(storage_options, fuzz_ops);
|
|
455
459
|
|
|
456
460
|
log.info("Passed!", .{});
|
|
457
461
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
const builtin = @import("builtin");
|
|
1
2
|
const std = @import("std");
|
|
2
3
|
const assert = std.debug.assert;
|
|
3
4
|
const fmt = std.fmt;
|
|
@@ -5,6 +6,7 @@ const mem = std.mem;
|
|
|
5
6
|
const os = std.os;
|
|
6
7
|
const log = std.log.scoped(.main);
|
|
7
8
|
|
|
9
|
+
const build_options = @import("tigerbeetle_build_options");
|
|
8
10
|
const config = @import("config.zig");
|
|
9
11
|
pub const log_level: std.log.Level = @intToEnum(std.log.Level, config.log_level);
|
|
10
12
|
|
|
@@ -45,6 +47,7 @@ pub fn main() !void {
|
|
|
45
47
|
switch (parse_args) {
|
|
46
48
|
.format => |*args| try Command.format(allocator, args.cluster, args.replica, args.path),
|
|
47
49
|
.start => |*args| try Command.start(&arena, args.addresses, args.memory, args.path),
|
|
50
|
+
.version => |*args| try Command.version(allocator, args.verbose),
|
|
48
51
|
}
|
|
49
52
|
}
|
|
50
53
|
|
|
@@ -123,7 +126,7 @@ const Command = struct {
|
|
|
123
126
|
defer command.deinit(allocator);
|
|
124
127
|
|
|
125
128
|
var replica: Replica = undefined;
|
|
126
|
-
|
|
129
|
+
replica.open(allocator, .{
|
|
127
130
|
.replica_count = @intCast(u8, addresses.len),
|
|
128
131
|
.storage = &command.storage,
|
|
129
132
|
.message_pool = &command.message_pool,
|
|
@@ -141,7 +144,7 @@ const Command = struct {
|
|
|
141
144
|
},
|
|
142
145
|
}) catch |err| switch (err) {
|
|
143
146
|
error.NoAddress => fatal("all --addresses must be provided", .{}),
|
|
144
|
-
else =>
|
|
147
|
+
else => |e| return e,
|
|
145
148
|
};
|
|
146
149
|
|
|
147
150
|
// Calculate how many bytes are allocated inside `arena`.
|
|
@@ -173,4 +176,54 @@ const Command = struct {
|
|
|
173
176
|
try command.io.run_for_ns(config.tick_ms * std.time.ns_per_ms);
|
|
174
177
|
}
|
|
175
178
|
}
|
|
179
|
+
|
|
180
|
+
pub fn version(allocator: mem.Allocator, verbose: bool) !void {
|
|
181
|
+
_ = allocator;
|
|
182
|
+
|
|
183
|
+
var stdout_buffer = std.io.bufferedWriter(std.io.getStdOut().writer());
|
|
184
|
+
const stdout = stdout_buffer.writer();
|
|
185
|
+
// TODO Pass an actual version number in on build, instead of just saying "experimental".
|
|
186
|
+
try stdout.writeAll("TigerBeetle version experimental\n");
|
|
187
|
+
|
|
188
|
+
if (verbose) {
|
|
189
|
+
try std.fmt.format(
|
|
190
|
+
stdout,
|
|
191
|
+
\\
|
|
192
|
+
\\git_commit="{s}"
|
|
193
|
+
\\
|
|
194
|
+
,
|
|
195
|
+
.{build_options.git_commit orelse "?"},
|
|
196
|
+
);
|
|
197
|
+
|
|
198
|
+
try stdout.writeAll("\n");
|
|
199
|
+
inline for (.{ "zig_version", "mode" }) |declaration| {
|
|
200
|
+
try print_value(stdout, declaration, @field(builtin, declaration));
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
try stdout.writeAll("\n");
|
|
204
|
+
inline for (std.meta.declarations(config)) |declaration| {
|
|
205
|
+
if (!declaration.is_pub) continue;
|
|
206
|
+
try print_value(stdout, declaration.name, @field(config, declaration.name));
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
try stdout_buffer.flush();
|
|
210
|
+
}
|
|
176
211
|
};
|
|
212
|
+
|
|
213
|
+
fn print_value(
|
|
214
|
+
writer: anytype,
|
|
215
|
+
comptime field: []const u8,
|
|
216
|
+
comptime value: anytype,
|
|
217
|
+
) !void {
|
|
218
|
+
if (@typeInfo(@TypeOf(value)) == .Pointer) {
|
|
219
|
+
try std.fmt.format(writer, "{s}=\"{s}\"\n", .{
|
|
220
|
+
field,
|
|
221
|
+
std.fmt.fmtSliceEscapeLower(value),
|
|
222
|
+
});
|
|
223
|
+
} else {
|
|
224
|
+
try std.fmt.format(writer, "{s}={}\n", .{
|
|
225
|
+
field,
|
|
226
|
+
value,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
}
|