tigerbeetle-node 0.11.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/package.json +4 -3
  2. package/src/tigerbeetle/scripts/fuzz_loop.sh +1 -1
  3. package/src/tigerbeetle/scripts/pre-commit.sh +2 -2
  4. package/src/tigerbeetle/scripts/validate_docs.sh +17 -0
  5. package/src/tigerbeetle/src/benchmark.zig +25 -11
  6. package/src/tigerbeetle/src/c/tb_client/context.zig +248 -47
  7. package/src/tigerbeetle/src/c/tb_client/echo_client.zig +108 -0
  8. package/src/tigerbeetle/src/c/tb_client/packet.zig +2 -2
  9. package/src/tigerbeetle/src/c/tb_client/signal.zig +2 -4
  10. package/src/tigerbeetle/src/c/tb_client/thread.zig +17 -256
  11. package/src/tigerbeetle/src/c/tb_client.h +18 -4
  12. package/src/tigerbeetle/src/c/tb_client.zig +88 -26
  13. package/src/tigerbeetle/src/c/tb_client_header_test.zig +135 -0
  14. package/src/tigerbeetle/src/c/test.zig +371 -1
  15. package/src/tigerbeetle/src/cli.zig +36 -6
  16. package/src/tigerbeetle/src/config.zig +10 -1
  17. package/src/tigerbeetle/src/demo.zig +2 -1
  18. package/src/tigerbeetle/src/demo_01_create_accounts.zig +1 -1
  19. package/src/tigerbeetle/src/demo_03_create_transfers.zig +13 -0
  20. package/src/tigerbeetle/src/ewah.zig +11 -33
  21. package/src/tigerbeetle/src/ewah_benchmark.zig +8 -9
  22. package/src/tigerbeetle/src/lsm/README.md +97 -3
  23. package/src/tigerbeetle/src/lsm/compaction.zig +32 -7
  24. package/src/tigerbeetle/src/{eytzinger_benchmark.zig → lsm/eytzinger_benchmark.zig} +34 -21
  25. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +34 -32
  26. package/src/tigerbeetle/src/lsm/grid.zig +39 -21
  27. package/src/tigerbeetle/src/lsm/groove.zig +1 -0
  28. package/src/tigerbeetle/src/lsm/k_way_merge.zig +3 -3
  29. package/src/tigerbeetle/src/lsm/level_iterator.zig +1 -1
  30. package/src/tigerbeetle/src/lsm/manifest.zig +13 -0
  31. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -49
  32. package/src/tigerbeetle/src/lsm/manifest_log.zig +173 -335
  33. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +665 -0
  34. package/src/tigerbeetle/src/lsm/node_pool.zig +4 -0
  35. package/src/tigerbeetle/src/lsm/posted_groove.zig +1 -0
  36. package/src/tigerbeetle/src/lsm/segmented_array.zig +24 -15
  37. package/src/tigerbeetle/src/lsm/table.zig +32 -20
  38. package/src/tigerbeetle/src/lsm/table_immutable.zig +1 -1
  39. package/src/tigerbeetle/src/lsm/table_iterator.zig +4 -5
  40. package/src/tigerbeetle/src/lsm/test.zig +13 -2
  41. package/src/tigerbeetle/src/lsm/tree.zig +45 -7
  42. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +36 -32
  43. package/src/tigerbeetle/src/main.zig +55 -2
  44. package/src/tigerbeetle/src/message_bus.zig +18 -7
  45. package/src/tigerbeetle/src/message_pool.zig +8 -2
  46. package/src/tigerbeetle/src/ring_buffer.zig +7 -3
  47. package/src/tigerbeetle/src/simulator.zig +38 -11
  48. package/src/tigerbeetle/src/state_machine.zig +47 -22
  49. package/src/tigerbeetle/src/test/accounting/workload.zig +9 -5
  50. package/src/tigerbeetle/src/test/cluster.zig +15 -33
  51. package/src/tigerbeetle/src/test/conductor.zig +2 -1
  52. package/src/tigerbeetle/src/test/network.zig +45 -19
  53. package/src/tigerbeetle/src/test/packet_simulator.zig +40 -29
  54. package/src/tigerbeetle/src/test/state_checker.zig +5 -7
  55. package/src/tigerbeetle/src/test/storage.zig +453 -110
  56. package/src/tigerbeetle/src/test/storage_checker.zig +204 -0
  57. package/src/tigerbeetle/src/tigerbeetle.zig +1 -0
  58. package/src/tigerbeetle/src/unit_tests.zig +6 -1
  59. package/src/tigerbeetle/src/util.zig +97 -11
  60. package/src/tigerbeetle/src/vopr.zig +2 -1
  61. package/src/tigerbeetle/src/vsr/client.zig +8 -3
  62. package/src/tigerbeetle/src/vsr/journal.zig +280 -202
  63. package/src/tigerbeetle/src/vsr/replica.zig +169 -31
  64. package/src/tigerbeetle/src/vsr/superblock.zig +356 -629
  65. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -6
  66. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +414 -151
  67. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +332 -0
  68. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +349 -0
  69. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +44 -9
  70. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +394 -0
  71. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +312 -0
  72. package/src/tigerbeetle/src/vsr.zig +19 -5
  73. package/src/tigerbeetle/src/benchmark_array_search.zig +0 -317
  74. package/src/tigerbeetle/src/benchmarks/perf.zig +0 -299
  75. package/src/tigerbeetle/src/vopr_hub/README.md +0 -58
  76. package/src/tigerbeetle/src/vopr_hub/SETUP.md +0 -199
  77. package/src/tigerbeetle/src/vopr_hub/go.mod +0 -3
  78. package/src/tigerbeetle/src/vopr_hub/main.go +0 -1022
  79. package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +0 -3
  80. package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +0 -403
@@ -4,6 +4,7 @@ const assert = std.debug.assert;
4
4
  const math = std.math;
5
5
  const mem = std.mem;
6
6
 
7
+ const util = @import("../util.zig");
7
8
  const div_ceil = @import("../util.zig").div_ceil;
8
9
  const binary_search_values_raw = @import("binary_search.zig").binary_search_values_raw;
9
10
  const binary_search_keys = @import("binary_search.zig").binary_search_keys;
@@ -279,12 +280,13 @@ fn SegmentedArrayType(
279
280
 
280
281
  const total = array.count(a) + @intCast(u32, elements.len);
281
282
  if (total <= node_capacity) {
282
- mem.copyBackwards(
283
+ util.copy_right(
284
+ .inexact,
283
285
  T,
284
286
  a_pointer[cursor.relative_index + elements.len ..],
285
287
  a_pointer[cursor.relative_index..array.count(a)],
286
288
  );
287
- mem.copy(T, a_pointer[cursor.relative_index..], elements);
289
+ util.copy_disjoint(.inexact, T, a_pointer[cursor.relative_index..], elements);
288
290
 
289
291
  array.increment_indexes_after(a, @intCast(u32, elements.len));
290
292
  return;
@@ -346,7 +348,8 @@ fn SegmentedArrayType(
346
348
 
347
349
  if (a_half < cursor.relative_index) {
348
350
  // Move the part of `a` that is past the half-way point into `b`.
349
- mem.copyBackwards(
351
+ util.copy_right(
352
+ .inexact,
350
353
  T,
351
354
  b_half_pointer,
352
355
  a_pointer[a_half..cursor.relative_index],
@@ -380,10 +383,10 @@ fn SegmentedArrayType(
380
383
  const source_a = source[0..target_a.len];
381
384
  const source_b = source[target_a.len..];
382
385
  if (target_b.ptr != source_b.ptr) {
383
- mem.copyBackwards(T, target_b, source_b);
386
+ util.copy_right(.exact, T, target_b, source_b);
384
387
  }
385
388
  if (target_a.ptr != source_a.ptr) {
386
- mem.copyBackwards(T, target_a, source_a);
389
+ util.copy_right(.exact, T, target_a, source_a);
387
390
  }
388
391
  }
389
392
 
@@ -392,12 +395,14 @@ fn SegmentedArrayType(
392
395
  assert(node <= array.node_count);
393
396
  assert(array.node_count + 1 <= node_count_max);
394
397
 
395
- mem.copyBackwards(
398
+ util.copy_right(
399
+ .exact,
396
400
  ?*[node_capacity]T,
397
401
  array.nodes[node + 1 .. array.node_count + 1],
398
402
  array.nodes[node..array.node_count],
399
403
  );
400
- mem.copyBackwards(
404
+ util.copy_right(
405
+ .exact,
401
406
  u32,
402
407
  array.indexes[node + 1 .. array.node_count + 2],
403
408
  array.indexes[node .. array.node_count + 1],
@@ -465,7 +470,8 @@ fn SegmentedArrayType(
465
470
 
466
471
  // Remove elements from exactly one node:
467
472
  if (a_remaining + remove_count <= array.count(a)) {
468
- mem.copy(
473
+ util.copy_left(
474
+ .inexact,
469
475
  T,
470
476
  a_pointer[a_remaining..],
471
477
  a_pointer[a_remaining + remove_count .. array.count(a)],
@@ -491,7 +497,7 @@ fn SegmentedArrayType(
491
497
  assert(a_remaining > 0 or b_remaining.len > 0);
492
498
 
493
499
  if (a_remaining >= half) {
494
- mem.copy(T, b_pointer, b_remaining);
500
+ util.copy_left(.inexact, T, b_pointer, b_remaining);
495
501
 
496
502
  array.indexes[b] = array.indexes[a] + a_remaining;
497
503
  array.decrement_indexes_after(b, remove_count);
@@ -508,7 +514,7 @@ fn SegmentedArrayType(
508
514
  assert(a_remaining < half and b_remaining.len < half);
509
515
  assert(a_remaining + b_remaining.len <= node_capacity);
510
516
 
511
- mem.copy(T, a_pointer[a_remaining..], b_remaining);
517
+ util.copy_disjoint(.inexact, T, a_pointer[a_remaining..], b_remaining);
512
518
 
513
519
  array.indexes[b] = array.indexes[a] + a_remaining + @intCast(u32, b_remaining.len);
514
520
  array.decrement_indexes_after(b, remove_count);
@@ -570,7 +576,7 @@ fn SegmentedArrayType(
570
576
 
571
577
  const total = array.count(a) + @intCast(u32, b_elements.len);
572
578
  if (total <= node_capacity) {
573
- mem.copy(T, a_pointer[array.count(a)..], b_elements);
579
+ util.copy_disjoint(.inexact, T, a_pointer[array.count(a)..], b_elements);
574
580
 
575
581
  array.indexes[b] = array.indexes[b + 1];
576
582
  array.remove_empty_node_at(node_pool, b);
@@ -582,12 +588,13 @@ fn SegmentedArrayType(
582
588
  assert(a_half >= b_half);
583
589
  assert(a_half + b_half == total);
584
590
 
585
- mem.copy(
591
+ util.copy_disjoint(
592
+ .exact,
586
593
  T,
587
594
  a_pointer[array.count(a)..a_half],
588
595
  b_elements[0 .. a_half - array.count(a)],
589
596
  );
590
- mem.copy(T, b_pointer, b_elements[a_half - array.count(a) ..]);
597
+ util.copy_left(.inexact, T, b_pointer, b_elements[a_half - array.count(a) ..]);
591
598
 
592
599
  array.indexes[b] = array.indexes[a] + a_half;
593
600
 
@@ -609,12 +616,14 @@ fn SegmentedArrayType(
609
616
  @ptrCast(NodePool.Node, @alignCast(NodePool.node_alignment, array.nodes[node].?)),
610
617
  );
611
618
 
612
- mem.copy(
619
+ util.copy_left(
620
+ .exact,
613
621
  ?*[node_capacity]T,
614
622
  array.nodes[node .. array.node_count - 1],
615
623
  array.nodes[node + 1 .. array.node_count],
616
624
  );
617
- mem.copy(
625
+ util.copy_left(
626
+ .exact,
618
627
  u32,
619
628
  array.indexes[node..array.node_count],
620
629
  array.indexes[node + 1 .. array.node_count + 1],
@@ -8,7 +8,8 @@ const vsr = @import("../vsr.zig");
8
8
  const binary_search = @import("binary_search.zig");
9
9
  const bloom_filter = @import("bloom_filter.zig");
10
10
 
11
- const div_ceil = @import("../util.zig").div_ceil;
11
+ const util = @import("../util.zig");
12
+ const div_ceil = util.div_ceil;
12
13
  const eytzinger = @import("eytzinger.zig").eytzinger;
13
14
  const snapshot_latest = @import("tree.zig").snapshot_latest;
14
15
 
@@ -125,6 +126,8 @@ pub fn TableType(
125
126
  const block_body_size = block_size - @sizeOf(vsr.Header);
126
127
 
127
128
  pub const layout = layout: {
129
+ @setEvalBranchQuota(10_000);
130
+
128
131
  assert(block_size % config.sector_size == 0);
129
132
  assert(math.isPowerOfTwo(table_size_max));
130
133
  assert(math.isPowerOfTwo(block_size));
@@ -181,7 +184,8 @@ pub fn TableType(
181
184
  assert((block_keys_layout_count * key_size) % config.cache_line_size == 0);
182
185
 
183
186
  const block_key_layout_size = block_keys_layout_count * key_size;
184
- const block_key_count = block_keys_layout_count - 1;
187
+ const block_key_count =
188
+ if (block_keys_layout_count == 0) 0 else block_keys_layout_count - 1;
185
189
 
186
190
  const block_value_count_max = @divFloor(
187
191
  block_body_size - block_key_layout_size,
@@ -199,6 +203,7 @@ pub fn TableType(
199
203
  );
200
204
 
201
205
  // Compute the number of data and filter blocks by solving the constraints:
206
+ // * the cumulative table size must not exceed lsm_table_size_max
202
207
  // * the filter and data blocks' metadata must fix in the index block
203
208
  // * the filter blocks must index all data blocks
204
209
  // * minimize the number of filter blocks
@@ -233,13 +238,18 @@ pub fn TableType(
233
238
  .filter_block_count_max = filter_blocks,
234
239
 
235
240
  // The number of data blocks covered by a single filter block.
236
- .filter_data_block_count_max = filter_data_block_count_max,
241
+ .filter_data_block_count_max = std.math.min(
242
+ filter_data_block_count_max,
243
+ data_blocks,
244
+ ),
237
245
  };
238
246
  };
239
247
 
240
248
  const index_block_count = 1;
241
- const filter_block_count_max = layout.filter_block_count_max;
249
+ pub const filter_block_count_max = layout.filter_block_count_max;
242
250
  pub const data_block_count_max = layout.data_block_count_max;
251
+ pub const block_count_max =
252
+ index_block_count + filter_block_count_max + data_block_count_max;
243
253
 
244
254
  const index = struct {
245
255
  const size = @sizeOf(vsr.Header) + filter_checksums_size + data_checksums_size +
@@ -504,7 +514,7 @@ pub fn TableType(
504
514
  const values_max = data_block_values(builder.data_block);
505
515
  assert(values_max.len == data.value_count_max);
506
516
 
507
- mem.copy(Value, values_max[builder.value..], values);
517
+ util.copy_disjoint(.inexact, Value, values_max[builder.value..], values);
508
518
  builder.value += @intCast(u32, values.len);
509
519
 
510
520
  for (values) |*value| {
@@ -552,21 +562,23 @@ pub fn TableType(
552
562
  }
553
563
 
554
564
  assert(@divExact(data.key_layout_size, key_size) == data.key_count + 1);
555
- const key_layout_bytes = @alignCast(
556
- @alignOf(Key),
557
- block[data.key_layout_offset..][0..data.key_layout_size],
558
- );
559
- const key_layout = mem.bytesAsValue([data.key_count + 1]Key, key_layout_bytes);
560
-
561
- const e = eytzinger(data.key_count, data.value_count_max);
562
- e.layout_from_keys_or_values(
563
- Key,
564
- Value,
565
- key_from_value,
566
- sentinel_key,
567
- values,
568
- key_layout,
569
- );
565
+ if (data.key_count > 0) {
566
+ const key_layout_bytes = @alignCast(
567
+ @alignOf(Key),
568
+ block[data.key_layout_offset..][0..data.key_layout_size],
569
+ );
570
+ const key_layout = mem.bytesAsValue([data.key_count + 1]Key, key_layout_bytes);
571
+
572
+ const e = eytzinger(data.key_count, data.value_count_max);
573
+ e.layout_from_keys_or_values(
574
+ Key,
575
+ Value,
576
+ key_from_value,
577
+ sentinel_key,
578
+ values,
579
+ key_layout,
580
+ );
581
+ }
570
582
 
571
583
  const values_padding = mem.sliceAsBytes(values_max[builder.value..]);
572
584
  const block_padding = block[data.padding_offset..][0..data.padding_size];
@@ -187,7 +187,7 @@ pub fn TableImmutableIteratorType(comptime Table: type, comptime Storage: type)
187
187
  return true; // All values are "buffered" in memory.
188
188
  }
189
189
 
190
- pub fn peek(it: *const TableImmutableIterator) error{Empty, Drained}!Table.Key {
190
+ pub fn peek(it: *const TableImmutableIterator) error{ Empty, Drained }!Table.Key {
191
191
  // NOTE: This iterator is never Drained as all values are in memory (tick is a no-op).
192
192
  assert(!it.table.free);
193
193
  if (it.values_index == it.table.values.len) return error.Empty;
@@ -5,6 +5,7 @@ const assert = std.debug.assert;
5
5
 
6
6
  const config = @import("../config.zig");
7
7
 
8
+ const util = @import("../util.zig");
8
9
  const RingBuffer = @import("../ring_buffer.zig").RingBuffer;
9
10
  const ManifestType = @import("manifest.zig").ManifestType;
10
11
  const GridType = @import("grid.zig").GridType;
@@ -90,8 +91,6 @@ pub fn TableIteratorType(comptime Table: type, comptime Storage: type) type {
90
91
  }
91
92
 
92
93
  pub fn deinit(it: *TableIterator, allocator: mem.Allocator) void {
93
- assert(!it.read_pending);
94
-
95
94
  allocator.free(it.index_block);
96
95
  it.values.deinit(allocator);
97
96
  for (it.data_blocks.buffer) |block| allocator.free(block);
@@ -186,7 +185,7 @@ pub fn TableIteratorType(comptime Table: type, comptime Storage: type) type {
186
185
 
187
186
  // Copy the bytes read into a buffer owned by the iterator since the Grid
188
187
  // only guarantees the provided pointer to be valid in this callback.
189
- mem.copy(u8, it.index_block, block);
188
+ util.copy_disjoint(.exact, u8, it.index_block, block);
190
189
 
191
190
  if (it.index_block_callback) |callback| {
192
191
  it.index_block_callback = null;
@@ -218,7 +217,7 @@ pub fn TableIteratorType(comptime Table: type, comptime Storage: type) type {
218
217
 
219
218
  // Copy the bytes read into a buffer owned by the iterator since the Grid
220
219
  // only guarantees the provided pointer to be valid in this callback.
221
- mem.copy(u8, it.data_blocks.next_tail().?, block);
220
+ util.copy_disjoint(.exact, u8, it.data_blocks.next_tail().?, block);
222
221
 
223
222
  it.data_blocks.advance_tail();
224
223
  it.data_block_index += 1;
@@ -264,7 +263,7 @@ pub fn TableIteratorType(comptime Table: type, comptime Storage: type) type {
264
263
  /// - error.Empty when there are no values remaining to iterate.
265
264
  /// - error.Drained when the iterator isn't empty, but some values
266
265
  /// still need to be buffered into memory via tick().
267
- pub fn peek(it: TableIterator) error{Empty, Drained}!Table.Key {
266
+ pub fn peek(it: TableIterator) error{ Empty, Drained }!Table.Key {
268
267
  assert(!it.read_pending);
269
268
  assert(!it.read_table_index);
270
269
 
@@ -27,7 +27,10 @@ const SuperBlock = vsr.SuperBlockType(Storage);
27
27
  const Environment = struct {
28
28
  const cluster = 32;
29
29
  const replica = 4;
30
- const size_max = vsr.Zone.superblock.size().? + vsr.Zone.wal.size().? + (512 + 64) * 1024 * 1024;
30
+ const size_max = vsr.Zone.superblock.size().? +
31
+ vsr.Zone.wal_headers.size().? +
32
+ vsr.Zone.wal_prepares.size().? +
33
+ (512 + 64) * 1024 * 1024;
31
34
 
32
35
  const node_count = 1024;
33
36
  const cache_entries_max = 2 * 1024 * 1024;
@@ -196,8 +199,16 @@ const Environment = struct {
196
199
 
197
200
  log.debug("forest checkpointing completed!", .{});
198
201
 
202
+ var vsr_state = env.superblock.staging.vsr_state;
203
+ vsr_state.commit_min += 1;
204
+ vsr_state.commit_min_checkpoint += 1;
205
+
199
206
  env.state = .superblock_checkpointing;
200
- env.superblock.checkpoint(superblock_checkpoint_callback, &env.superblock_context);
207
+ env.superblock.checkpoint(
208
+ superblock_checkpoint_callback,
209
+ &env.superblock_context,
210
+ vsr_state,
211
+ );
201
212
  }
202
213
 
203
214
  fn superblock_checkpoint_callback(superblock_context: *SuperBlock.Context) void {
@@ -51,6 +51,21 @@ const half_bar_beat_count = @divExact(config.lsm_batch_multiple, 2);
51
51
  /// The maximum number of tables for a single tree.
52
52
  pub const table_count_max = table_count_max_for_tree(config.lsm_growth_factor, config.lsm_levels);
53
53
 
54
+ /// The upper-bound count of input tables to a single tree's compaction.
55
+ ///
56
+ /// - +1 from level A.
57
+ /// - +lsm_growth_factor from level B. The A-input table cannot overlap with an extra B-input table
58
+ /// because input table selection is least-overlap. If the input table overlaps on one or both
59
+ /// edges, there must be another table with less overlap to select.
60
+ pub const compaction_tables_input_max = 1 + config.lsm_growth_factor;
61
+
62
+ /// The upper-bound count of output tables from a single tree's compaction.
63
+ /// In the "worst" case, no keys are overwritten/merged, and no tombstones are dropped.
64
+ pub const compaction_tables_output_max = compaction_tables_input_max;
65
+
66
+ /// The maximum number of concurrent compactions (per tree).
67
+ pub const compactions_max = div_ceil(config.lsm_levels, 2);
68
+
54
69
  pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_name: []const u8) type {
55
70
  const Key = TreeTable.Key;
56
71
  const Value = TreeTable.Value;
@@ -490,6 +505,21 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
490
505
 
491
506
  tree.compaction_op = op;
492
507
 
508
+ if (op < config.lsm_batch_multiple) {
509
+ // There is nothing to compact for the first measure.
510
+ // We skip the main compaction code path first compaction bar entirely because it
511
+ // is a special case — its first beat is 1, not 0.
512
+
513
+ tree.lookup_snapshot_max = op + 1;
514
+ if (op + 1 == config.lsm_batch_multiple) {
515
+ tree.compact_mutable_table_into_immutable();
516
+ }
517
+
518
+ // TODO Defer this callback until tick() to avoid stack growth.
519
+ callback(tree);
520
+ return;
521
+ }
522
+
493
523
  if (tree.grid.superblock.working.vsr_state.op_compacted(op)) {
494
524
  // We recovered from a checkpoint, and must avoid replaying one bar of
495
525
  // compactions that were applied before the checkpoint. Repeating these ops'
@@ -497,7 +527,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
497
527
  // causing the storage state of the replica to diverge from the cluster.
498
528
  // See also: lookup_snapshot_max_for_checkpoint().
499
529
 
500
- if (tree.compaction_op + 1 == tree.lookup_snapshot_max) {
530
+ if (op + 1 == tree.lookup_snapshot_max) {
501
531
  // This is the last op of the skipped compaction bar.
502
532
  // Prepare the immutable table for the next bar — since this state is
503
533
  // in-memory, it cannot be skipped.
@@ -535,6 +565,8 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
535
565
  config.lsm_batch_multiple,
536
566
  });
537
567
 
568
+ if (start) tree.manifest.reserve();
569
+
538
570
  // Try to start compacting the immutable table.
539
571
  const even_levels = compaction_beat < half_bar_beat_count;
540
572
  if (even_levels) {
@@ -571,6 +603,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
571
603
  );
572
604
 
573
605
  assert(range.table_count >= 1);
606
+ assert(range.table_count <= compaction_tables_input_max);
574
607
  assert(compare_keys(range.key_min, tree.table_immutable.key_min()) != .gt);
575
608
  assert(compare_keys(range.key_max, tree.table_immutable.key_max()) != .lt);
576
609
 
@@ -595,6 +628,9 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
595
628
  }
596
629
 
597
630
  fn compact_start_table(tree: *Tree, op_min: u64, context: CompactionTableContext) void {
631
+ const compaction_beat = tree.compaction_op % half_bar_beat_count;
632
+ assert(compaction_beat == 0);
633
+
598
634
  assert(context.level_a < config.lsm_levels);
599
635
  assert(context.level_b < config.lsm_levels);
600
636
  assert(context.level_a + 1 == context.level_b);
@@ -602,15 +638,15 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
602
638
  // Do not start compaction if level A does not require compaction.
603
639
  const table_range = tree.manifest.compaction_table(context.level_a) orelse return;
604
640
  const table = table_range.table;
605
- const range = table_range.range;
606
641
 
607
- assert(range.table_count >= 1);
642
+ assert(table_range.range.table_count >= 1);
643
+ assert(table_range.range.table_count <= compaction_tables_input_max);
608
644
  assert(compare_keys(table.key_min, table.key_max) != .gt);
609
- assert(compare_keys(range.key_min, table.key_min) != .gt);
610
- assert(compare_keys(range.key_max, table.key_max) != .lt);
645
+ assert(compare_keys(table_range.range.key_min, table.key_min) != .gt);
646
+ assert(compare_keys(table_range.range.key_max, table.key_max) != .lt);
611
647
 
612
648
  log.debug(tree_name ++ ": compacting {d} tables from level {d} to level {d}", .{
613
- range.table_count,
649
+ table_range.range.table_count,
614
650
  context.level_a,
615
651
  context.level_b,
616
652
  });
@@ -888,11 +924,13 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
888
924
  }
889
925
 
890
926
  pub fn checkpoint(tree: *Tree, callback: fn (*Tree) void) void {
891
- // Assert no outstanding compact_tick() work..
927
+ // Assert no outstanding compact_tick() work.
892
928
  assert(tree.compaction_io_pending == 0);
893
929
  assert(tree.compaction_callback == null);
894
930
  assert(tree.compaction_op > 0);
895
931
  assert(tree.compaction_op + 1 == tree.lookup_snapshot_max);
932
+ // Don't re-run the checkpoint we recovered from.
933
+ assert(!tree.grid.superblock.working.vsr_state.op_compacted(tree.compaction_op));
896
934
 
897
935
  // Assert that this is the last beat in the compaction bar.
898
936
  const compaction_beat = tree.compaction_op % config.lsm_batch_multiple;
@@ -82,7 +82,10 @@ const Environment = struct {
82
82
  const cluster = 32;
83
83
  const replica = 4;
84
84
  // TODO Is this appropriate for the number of fuzz_ops we want to run?
85
- const size_max = vsr.Zone.superblock.size().? + vsr.Zone.wal.size().? + 1024 * 1024 * 1024;
85
+ const size_max = vsr.Zone.superblock.size().? +
86
+ vsr.Zone.wal_headers.size().? +
87
+ vsr.Zone.wal_prepares.size().? +
88
+ 1024 * 1024 * 1024;
86
89
 
87
90
  const node_count = 1024;
88
91
  // This is the smallest size that set_associative_cache will allow us.
@@ -126,6 +129,7 @@ const Environment = struct {
126
129
  tree_exists: bool,
127
130
  lookup_context: Tree.LookupContext = undefined,
128
131
  lookup_value: ?*const Key.Value = null,
132
+ checkpoint_op: ?u64 = null,
129
133
 
130
134
  fn init(env: *Environment, storage: *Storage) !void {
131
135
  env.state = .uninit;
@@ -238,7 +242,8 @@ const Environment = struct {
238
242
  env.change_state(.tree_compacting, .tree_open);
239
243
  }
240
244
 
241
- pub fn checkpoint(env: *Environment) void {
245
+ pub fn checkpoint(env: *Environment, op: u64) void {
246
+ env.checkpoint_op = op - config.lsm_batch_multiple;
242
247
  env.change_state(.tree_open, .tree_checkpointing);
243
248
  env.tree.checkpoint(tree_checkpoint_callback);
244
249
  env.tick_until_state_change(.tree_checkpointing, .superblock_checkpointing);
@@ -248,7 +253,14 @@ const Environment = struct {
248
253
  fn tree_checkpoint_callback(tree: *Tree) void {
249
254
  const env = @fieldParentPtr(@This(), "tree", tree);
250
255
  env.change_state(.tree_checkpointing, .superblock_checkpointing);
251
- env.superblock.checkpoint(superblock_checkpoint_callback, &env.superblock_context);
256
+ env.superblock.checkpoint(superblock_checkpoint_callback, &env.superblock_context, .{
257
+ .commit_min_checksum = env.superblock.working.vsr_state.commit_min_checksum + 1,
258
+ .commit_min = env.checkpoint_op.?,
259
+ .commit_max = env.checkpoint_op.? + 1,
260
+ .view_normal = 0,
261
+ .view = 0,
262
+ });
263
+ env.checkpoint_op = null;
252
264
  }
253
265
 
254
266
  fn superblock_checkpoint_callback(superblock_context: *SuperBlock.Context) void {
@@ -292,17 +304,17 @@ const Environment = struct {
292
304
 
293
305
  for (fuzz_ops) |fuzz_op, fuzz_op_index| {
294
306
  log.debug("Running fuzz_ops[{}/{}] == {}", .{ fuzz_op_index, fuzz_ops.len, fuzz_op });
295
- //TODO(@djg) Restore these when dj-vopr-workload merges.
296
- //const storage_size_used = storage.size_used();
297
- //log.debug("storage.size_used = {}/{}", .{ storage_size_used, storage.size });
298
- //const model_size = model.count() * @sizeOf(Key.Value);
299
- //log.debug("space_amplification = {d:.2}", .{@intToFloat(f64, storage_size_used) / @intToFloat(f64, model_size)});
307
+ const storage_size_used = storage.size_used();
308
+ log.debug("storage.size_used = {}/{}", .{ storage_size_used, storage.size });
309
+ const model_size = model.count() * @sizeOf(Key.Value);
310
+ log.debug("space_amplification = {d:.2}", .{
311
+ @intToFloat(f64, storage_size_used) / @intToFloat(f64, model_size),
312
+ });
300
313
  // Apply fuzz_op to the tree and the model.
301
314
  switch (fuzz_op) {
302
315
  .compact => |compact| {
303
316
  env.compact(compact.op);
304
- if (compact.checkpoint)
305
- env.checkpoint();
317
+ if (compact.checkpoint) env.checkpoint(compact.op);
306
318
  },
307
319
  .put => |value| {
308
320
  env.tree.put(&value);
@@ -333,27 +345,9 @@ const Environment = struct {
333
345
  }
334
346
  };
335
347
 
336
- pub fn run_fuzz_ops(fuzz_ops: []const FuzzOp) !void {
348
+ pub fn run_fuzz_ops(storage_options: Storage.Options, fuzz_ops: []const FuzzOp) !void {
337
349
  // Init mocked storage.
338
- var storage = try Storage.init(
339
- allocator,
340
- Environment.size_max,
341
- Storage.Options{
342
- // We don't apply storage faults yet, so this seed doesn't matter.
343
- .seed = 0xdeadbeef,
344
- .read_latency_min = 0,
345
- .read_latency_mean = 0,
346
- .write_latency_min = 0,
347
- .write_latency_mean = 0,
348
- .read_fault_probability = 0,
349
- .write_fault_probability = 0,
350
- },
351
- 0,
352
- .{
353
- .first_offset = 0,
354
- .period = 0,
355
- },
356
- );
350
+ var storage = try Storage.init(allocator, Environment.size_max, storage_options);
357
351
  defer storage.deinit(allocator);
358
352
 
359
353
  try Environment.format(&storage);
@@ -412,6 +406,7 @@ pub fn generate_fuzz_ops(random: std.rand.Random) ![]const FuzzOp {
412
406
  const checkpoint =
413
407
  // Can only checkpoint on the last beat of the bar.
414
408
  compact_op % config.lsm_batch_multiple == config.lsm_batch_multiple - 1 and
409
+ compact_op > config.lsm_batch_multiple and
415
410
  // Checkpoint at roughly the same rate as log wraparound.
416
411
  random.uintLessThan(usize, Environment.compacts_per_checkpoint) == 0;
417
412
  break :compact FuzzOp{
@@ -447,11 +442,20 @@ pub fn generate_fuzz_ops(random: std.rand.Random) ![]const FuzzOp {
447
442
  pub fn main() !void {
448
443
  const fuzz_args = try fuzz.parse_fuzz_args(allocator);
449
444
  var rng = std.rand.DefaultPrng.init(fuzz_args.seed);
445
+ const random = rng.random();
450
446
 
451
- const fuzz_ops = try generate_fuzz_ops(rng.random());
447
+ const fuzz_ops = try generate_fuzz_ops(random);
452
448
  defer allocator.free(fuzz_ops);
453
449
 
454
- try run_fuzz_ops(fuzz_ops);
450
+ const storage_options = .{
451
+ .seed = random.int(u64),
452
+ .read_latency_min = 0,
453
+ .read_latency_mean = 0 + fuzz.random_int_exponential(random, u64, 20),
454
+ .write_latency_min = 0,
455
+ .write_latency_mean = 0 + fuzz.random_int_exponential(random, u64, 20),
456
+ };
457
+
458
+ try run_fuzz_ops(storage_options, fuzz_ops);
455
459
 
456
460
  log.info("Passed!", .{});
457
461
  }
@@ -1,3 +1,4 @@
1
+ const builtin = @import("builtin");
1
2
  const std = @import("std");
2
3
  const assert = std.debug.assert;
3
4
  const fmt = std.fmt;
@@ -5,6 +6,7 @@ const mem = std.mem;
5
6
  const os = std.os;
6
7
  const log = std.log.scoped(.main);
7
8
 
9
+ const build_options = @import("tigerbeetle_build_options");
8
10
  const config = @import("config.zig");
9
11
  pub const log_level: std.log.Level = @intToEnum(std.log.Level, config.log_level);
10
12
 
@@ -45,6 +47,7 @@ pub fn main() !void {
45
47
  switch (parse_args) {
46
48
  .format => |*args| try Command.format(allocator, args.cluster, args.replica, args.path),
47
49
  .start => |*args| try Command.start(&arena, args.addresses, args.memory, args.path),
50
+ .version => |*args| try Command.version(allocator, args.verbose),
48
51
  }
49
52
  }
50
53
 
@@ -123,7 +126,7 @@ const Command = struct {
123
126
  defer command.deinit(allocator);
124
127
 
125
128
  var replica: Replica = undefined;
126
- try replica.open(allocator, .{
129
+ replica.open(allocator, .{
127
130
  .replica_count = @intCast(u8, addresses.len),
128
131
  .storage = &command.storage,
129
132
  .message_pool = &command.message_pool,
@@ -141,7 +144,7 @@ const Command = struct {
141
144
  },
142
145
  }) catch |err| switch (err) {
143
146
  error.NoAddress => fatal("all --addresses must be provided", .{}),
144
- else => err,
147
+ else => |e| return e,
145
148
  };
146
149
 
147
150
  // Calculate how many bytes are allocated inside `arena`.
@@ -173,4 +176,54 @@ const Command = struct {
173
176
  try command.io.run_for_ns(config.tick_ms * std.time.ns_per_ms);
174
177
  }
175
178
  }
179
+
180
+ pub fn version(allocator: mem.Allocator, verbose: bool) !void {
181
+ _ = allocator;
182
+
183
+ var stdout_buffer = std.io.bufferedWriter(std.io.getStdOut().writer());
184
+ const stdout = stdout_buffer.writer();
185
+ // TODO Pass an actual version number in on build, instead of just saying "experimental".
186
+ try stdout.writeAll("TigerBeetle version experimental\n");
187
+
188
+ if (verbose) {
189
+ try std.fmt.format(
190
+ stdout,
191
+ \\
192
+ \\git_commit="{s}"
193
+ \\
194
+ ,
195
+ .{build_options.git_commit orelse "?"},
196
+ );
197
+
198
+ try stdout.writeAll("\n");
199
+ inline for (.{ "zig_version", "mode" }) |declaration| {
200
+ try print_value(stdout, declaration, @field(builtin, declaration));
201
+ }
202
+
203
+ try stdout.writeAll("\n");
204
+ inline for (std.meta.declarations(config)) |declaration| {
205
+ if (!declaration.is_pub) continue;
206
+ try print_value(stdout, declaration.name, @field(config, declaration.name));
207
+ }
208
+ }
209
+ try stdout_buffer.flush();
210
+ }
176
211
  };
212
+
213
+ fn print_value(
214
+ writer: anytype,
215
+ comptime field: []const u8,
216
+ comptime value: anytype,
217
+ ) !void {
218
+ if (@typeInfo(@TypeOf(value)) == .Pointer) {
219
+ try std.fmt.format(writer, "{s}=\"{s}\"\n", .{
220
+ field,
221
+ std.fmt.fmtSliceEscapeLower(value),
222
+ });
223
+ } else {
224
+ try std.fmt.format(writer, "{s}={}\n", .{
225
+ field,
226
+ value,
227
+ });
228
+ }
229
+ }