tigerbeetle-node 0.11.1 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ const log = std.log.scoped(.main);
9
9
  const build_options = @import("tigerbeetle_build_options");
10
10
  const config = @import("config.zig");
11
11
  pub const log_level: std.log.Level = @intToEnum(std.log.Level, config.log_level);
12
+ const tracer = @import("tracer.zig");
12
13
 
13
14
  const cli = @import("cli.zig");
14
15
  const fatal = cli.fatal;
@@ -46,7 +47,18 @@ pub fn main() !void {
46
47
 
47
48
  switch (parse_args) {
48
49
  .format => |*args| try Command.format(allocator, args.cluster, args.replica, args.path),
49
- .start => |*args| try Command.start(&arena, args.addresses, args.memory, args.path),
50
+ .start => |*args| try Command.start(
51
+ &arena,
52
+ args.addresses,
53
+ .{
54
+ // TODO Tune lsm_forest_node_count better.
55
+ .lsm_forest_node_count = 4096,
56
+ .cache_entries_accounts = args.cache_accounts,
57
+ .cache_entries_transfers = args.cache_transfers,
58
+ .cache_entries_posted = args.cache_transfers_posted,
59
+ },
60
+ args.path,
61
+ ),
50
62
  .version => |*args| try Command.version(allocator, args.verbose),
51
63
  }
52
64
  }
@@ -114,13 +126,14 @@ const Command = struct {
114
126
  pub fn start(
115
127
  arena: *std.heap.ArenaAllocator,
116
128
  addresses: []std.net.Address,
117
- memory: u64,
129
+ options: StateMachine.Options,
118
130
  path: [:0]const u8,
119
131
  ) !void {
120
- _ = memory; // TODO
121
-
122
132
  const allocator = arena.allocator();
123
133
 
134
+ try tracer.init(allocator);
135
+ defer tracer.deinit(allocator);
136
+
124
137
  var command: Command = undefined;
125
138
  try command.init(allocator, path, false);
126
139
  defer command.deinit(allocator);
@@ -131,13 +144,7 @@ const Command = struct {
131
144
  .storage = &command.storage,
132
145
  .message_pool = &command.message_pool,
133
146
  .time = .{},
134
- .state_machine_options = .{
135
- // TODO Tune lsm_forest_node_count better.
136
- .lsm_forest_node_count = 4096,
137
- .cache_entries_accounts = config.cache_accounts_max,
138
- .cache_entries_transfers = config.cache_transfers_max,
139
- .cache_entries_posted = config.cache_transfers_pending_max,
140
- },
147
+ .state_machine_options = options,
141
148
  .message_bus_options = .{
142
149
  .configuration = addresses,
143
150
  .io = &command.io,
@@ -333,7 +333,7 @@ pub fn main() !void {
333
333
  var crashes = cluster.replica_normal_count() -| replica_normal_min;
334
334
 
335
335
  for (cluster.storages) |*storage, replica| {
336
- if (cluster.replicas[replica].journal.recovered) {
336
+ if (cluster.replicas[replica].journal.status == .recovered) {
337
337
  // TODO Remove this workaround when VSR recovery protocol is disabled.
338
338
  // When only the minimum number of replicas are healthy (no more crashes allowed),
339
339
  // disable storage faults on all healthy replicas.
@@ -2,7 +2,9 @@ const std = @import("std");
2
2
  const assert = std.debug.assert;
3
3
  const math = std.math;
4
4
  const mem = std.mem;
5
+
5
6
  const log = std.log.scoped(.state_machine);
7
+ const tracer = @import("tracer.zig");
6
8
 
7
9
  const tb = @import("tigerbeetle.zig");
8
10
  const snapshot_latest = @import("lsm/tree.zig").snapshot_latest;
@@ -114,6 +116,8 @@ pub fn StateMachineType(comptime Storage: type, comptime constants_: struct {
114
116
  compact_callback: ?fn (*StateMachine) void = null,
115
117
  checkpoint_callback: ?fn (*StateMachine) void = null,
116
118
 
119
+ tracer_slot: ?tracer.SpanStart,
120
+
117
121
  pub fn init(allocator: mem.Allocator, grid: *Grid, options: Options) !StateMachine {
118
122
  var forest = try Forest.init(
119
123
  allocator,
@@ -127,10 +131,13 @@ pub fn StateMachineType(comptime Storage: type, comptime constants_: struct {
127
131
  .prepare_timestamp = 0,
128
132
  .commit_timestamp = 0,
129
133
  .forest = forest,
134
+ .tracer_slot = null,
130
135
  };
131
136
  }
132
137
 
133
138
  pub fn deinit(self: *StateMachine, allocator: mem.Allocator) void {
139
+ assert(self.tracer_slot == null);
140
+
134
141
  self.forest.deinit(allocator);
135
142
  }
136
143
 
@@ -218,6 +225,12 @@ pub fn StateMachineType(comptime Storage: type, comptime constants_: struct {
218
225
  return;
219
226
  }
220
227
 
228
+ tracer.start(
229
+ &self.tracer_slot,
230
+ .main,
231
+ .state_machine_prefetch,
232
+ );
233
+
221
234
  self.prefetch_input = input;
222
235
  self.prefetch_callback = callback;
223
236
 
@@ -248,6 +261,13 @@ pub fn StateMachineType(comptime Storage: type, comptime constants_: struct {
248
261
  const callback = self.prefetch_callback.?;
249
262
  self.prefetch_input = null;
250
263
  self.prefetch_callback = null;
264
+
265
+ tracer.end(
266
+ &self.tracer_slot,
267
+ .main,
268
+ .state_machine_prefetch,
269
+ );
270
+
251
271
  callback(self);
252
272
  }
253
273
 
@@ -367,6 +387,12 @@ pub fn StateMachineType(comptime Storage: type, comptime constants_: struct {
367
387
  _ = client;
368
388
  assert(op != 0);
369
389
 
390
+ tracer.start(
391
+ &self.tracer_slot,
392
+ .main,
393
+ .state_machine_commit,
394
+ );
395
+
370
396
  const result = switch (operation) {
371
397
  .root => unreachable,
372
398
  .register => 0,
@@ -377,6 +403,12 @@ pub fn StateMachineType(comptime Storage: type, comptime constants_: struct {
377
403
  else => unreachable,
378
404
  };
379
405
 
406
+ tracer.end(
407
+ &self.tracer_slot,
408
+ .main,
409
+ .state_machine_commit,
410
+ );
411
+
380
412
  return result;
381
413
  }
382
414
 
@@ -384,6 +416,12 @@ pub fn StateMachineType(comptime Storage: type, comptime constants_: struct {
384
416
  assert(self.compact_callback == null);
385
417
  assert(self.checkpoint_callback == null);
386
418
 
419
+ tracer.start(
420
+ &self.tracer_slot,
421
+ .main,
422
+ .state_machine_compact,
423
+ );
424
+
387
425
  self.compact_callback = callback;
388
426
  self.forest.compact(compact_finish, op);
389
427
  }
@@ -392,6 +430,13 @@ pub fn StateMachineType(comptime Storage: type, comptime constants_: struct {
392
430
  const self = @fieldParentPtr(StateMachine, "forest", forest);
393
431
  const callback = self.compact_callback.?;
394
432
  self.compact_callback = null;
433
+
434
+ tracer.end(
435
+ &self.tracer_slot,
436
+ .main,
437
+ .state_machine_compact,
438
+ );
439
+
395
440
  callback(self);
396
441
  }
397
442
 
@@ -785,7 +830,7 @@ pub fn StateMachineType(comptime Storage: type, comptime constants_: struct {
785
830
  .ledger = p.ledger,
786
831
  .code = p.code,
787
832
  .pending_id = t.pending_id,
788
- .timeout = t.timeout,
833
+ .timeout = 0,
789
834
  .timestamp = t.timestamp,
790
835
  .flags = t.flags,
791
836
  .amount = amount,
@@ -72,7 +72,7 @@ pub const StateChecker = struct {
72
72
  pub fn check_state(state_checker: *StateChecker, replica_index: u8) !void {
73
73
  const replica = state_checker.replicas[replica_index];
74
74
  const commit_header = header: {
75
- if (replica.journal.recovered) {
75
+ if (replica.journal.status == .recovered) {
76
76
  const commit_header = replica.journal.header_with_op(replica.commit_min);
77
77
  assert(commit_header != null or replica.commit_min == replica.op_checkpoint);
78
78
  break :header replica.journal.header_with_op(replica.commit_min);
@@ -751,6 +751,15 @@ pub const Storage = struct {
751
751
  }
752
752
  }
753
753
 
754
+ pub fn superblock_sector(
755
+ storage: *const Storage,
756
+ copy_: u8,
757
+ ) *const superblock.SuperBlockSector {
758
+ const offset = vsr.Zone.superblock.offset(superblock.Layout.offset_sector(copy_));
759
+ const bytes = storage.memory[offset..][0..@sizeOf(superblock.SuperBlockSector)];
760
+ return mem.bytesAsValue(superblock.SuperBlockSector, bytes);
761
+ }
762
+
754
763
  pub fn wal_headers(storage: *const Storage) []const vsr.Header {
755
764
  const offset = vsr.Zone.wal_headers.offset(0);
756
765
  const size = vsr.Zone.wal_headers.size().?;
@@ -0,0 +1,319 @@
1
+ //! The tracer records a tree of event spans.
2
+ //!
3
+ //! In order to create event spans, you need somewhere to store the `SpanStart`.
4
+ //!
5
+ //! var slot: ?SpanStart = null;
6
+ //! tracer.start(&slot, group, event);
7
+ //! ... do stuff ...
8
+ //! tracer.end(&slot, group, event);
9
+ //!
10
+ //! Each slot can be used as many times as you like,
11
+ //! but you must alternate calls to start and end,
12
+ //! and you must end every event.
13
+ //!
14
+ //! // good
15
+ //! tracer.start(&slot, group_a, event_a);
16
+ //! tracer.end(&slot, group_a, event_a);
17
+ //! tracer.start(&slot, group_b, event_b);
18
+ //! tracer.end(&slot, group_b, event_b);
19
+ //!
20
+ //! // bad
21
+ //! tracer.start(&slot, group_a, event_a);
22
+ //! tracer.start(&slot, group_b, event_b);
23
+ //! tracer.end(&slot, group_b, event_b);
24
+ //! tracer.end(&slot, group_a, event_a);
25
+ //!
26
+ //! // bad
27
+ //! tracer.end(&slot, group_a, event_a);
28
+ //! tracer.start(&slot, group_a, event_a);
29
+ //!
30
+ //! // bad
31
+ //! tracer.start(&slot, group_a, event_a);
32
+ //! std.os.exit(0);
33
+ //!
34
+ //! Before freeing a slot, you should `assert(slot == null)`
35
+ //! to ensure that you didn't forget to end an event.
36
+ //!
37
+ //! Each `Event` has an `EventGroup`.
38
+ //! Within each group, event spans should form a tree.
39
+ //!
40
+ //! // good
41
+ //! tracer.start(&a, group, ...);
42
+ //! tracer.start(&b, group, ...);
43
+ //! tracer.end(&b, group, ...);
44
+ //! tracer.end(&a, group, ...);
45
+ //!
46
+ //! // bad
47
+ //! tracer.start(&a, group, ...);
48
+ //! tracer.start(&b, group, ...);
49
+ //! tracer.end(&a, group, ...);
50
+ //! tracer.end(&b, group, ...);
51
+ //!
52
+ //! The tracer itself will not object to non-tree spans,
53
+ //! but some config.tracer_backends will either refuse to open the trace or will render it weirdly.
54
+ //!
55
+ //! If you're having trouble making your spans form a tree, feel free to just add new groups.
56
+
57
+ const std = @import("std");
58
+ const assert = std.debug.assert;
59
+ const Allocator = std.mem.Allocator;
60
+ const log = std.log.scoped(.tracer);
61
+
62
+ const config = @import("./config.zig");
63
+ const Time = @import("./time.zig").Time;
64
+
65
+ var is_initialized = false;
66
+ var timer = Time{};
67
+ var span_id_next: u64 = 0;
68
+ var spans: std.ArrayList(Span) = undefined;
69
+ var flush_slot: ?SpanStart = null;
70
+ var log_file: std.fs.File = undefined;
71
+
72
+ const span_count_max = 1 << 20;
73
+ const log_path = "./tracer.json";
74
+
75
+ /// All strings in Event must be comptime constants to ensure that they live until after `tracer.deinit` is called.
76
+ pub const Event = union(enum) {
77
+ tracer_flush,
78
+ commit: struct {
79
+ op: u64,
80
+ },
81
+ checkpoint,
82
+ state_machine_prefetch,
83
+ state_machine_commit,
84
+ state_machine_compact,
85
+ tree_compaction_beat: struct {
86
+ tree_name: []const u8,
87
+ },
88
+ tree_compaction_tick: struct {
89
+ tree_name: []const u8,
90
+ level_b: u8,
91
+ },
92
+ tree_compaction_merge: struct {
93
+ tree_name: []const u8,
94
+ level_b: u8,
95
+ },
96
+ };
97
+
98
+ /// All strings in EventGroup must be comptime constants to ensure that they live until after `tracer.deinit` is called.
99
+ pub const EventGroup = union(enum) {
100
+ main,
101
+ tracer,
102
+ tree: struct {
103
+ tree_name: []const u8,
104
+ },
105
+ };
106
+
107
+ const SpanId = u64;
108
+
109
+ pub const SpanStart = struct {
110
+ id: SpanId,
111
+ start_time_ns: u64,
112
+ group: EventGroup,
113
+ event: Event,
114
+ };
115
+
116
+ const Span = struct {
117
+ id: SpanId,
118
+ start_time_ns: u64,
119
+ end_time_ns: u64,
120
+ group: EventGroup,
121
+ event: Event,
122
+ };
123
+
124
+ pub fn init(allocator: Allocator) !void {
125
+ if (config.tracer_backend == .none) return;
126
+ assert(!is_initialized);
127
+
128
+ spans = try std.ArrayList(Span).initCapacity(allocator, span_count_max);
129
+ errdefer spans.deinit();
130
+
131
+ switch (config.tracer_backend) {
132
+ .none => unreachable,
133
+ .perfetto => {
134
+ log_file = try std.fs.cwd().createFile(log_path, .{ .truncate = true });
135
+ errdefer log_file.close();
136
+
137
+ try log_file.writeAll(
138
+ \\{"traceEvents":[
139
+ \\
140
+ );
141
+ },
142
+ }
143
+
144
+ is_initialized = true;
145
+ }
146
+
147
+ pub fn deinit(allocator: Allocator) void {
148
+ _ = allocator;
149
+
150
+ if (config.tracer_backend == .none) return;
151
+ assert(is_initialized);
152
+
153
+ flush();
154
+ log_file.close();
155
+ assert(flush_slot == null);
156
+ spans.deinit();
157
+ is_initialized = false;
158
+ }
159
+
160
+ pub fn start(slot: *?SpanStart, event_group: EventGroup, event: Event) void {
161
+ if (config.tracer_backend == .none) return;
162
+ assert(is_initialized);
163
+
164
+ // The event must not have already been started.
165
+ assert(slot.* == null);
166
+
167
+ slot.* = .{
168
+ .id = span_id_next,
169
+ .start_time_ns = timer.monotonic(),
170
+ .group = event_group,
171
+ .event = event,
172
+ };
173
+ span_id_next += 1;
174
+ }
175
+
176
+ pub fn end(slot: *?SpanStart, event_group: EventGroup, event: Event) void {
177
+ if (config.tracer_backend == .none) return;
178
+ assert(is_initialized);
179
+
180
+ // The event must have already been started.
181
+ const span_start = &slot.*.?;
182
+ assert(std.meta.eql(span_start.group, event_group));
183
+ assert(std.meta.eql(span_start.event, event));
184
+
185
+ // Make sure we have room in spans.
186
+ if (spans.items.len >= span_count_max) flush();
187
+ assert(spans.items.len < span_count_max);
188
+
189
+ spans.appendAssumeCapacity(.{
190
+ .id = span_start.id,
191
+ .start_time_ns = span_start.start_time_ns,
192
+ .end_time_ns = timer.monotonic(),
193
+ .group = span_start.group,
194
+ .event = span_start.event,
195
+ });
196
+ slot.* = null;
197
+ }
198
+
199
+ pub fn flush() void {
200
+ if (config.tracer_backend == .none) return;
201
+ assert(is_initialized);
202
+
203
+ if (spans.items.len == 0) return;
204
+
205
+ start(&flush_slot, .tracer, .tracer_flush);
206
+ flush_or_err() catch |err| {
207
+ log.err("Could not flush tracer log, discarding instead: {}", .{err});
208
+ };
209
+ spans.shrinkRetainingCapacity(0);
210
+ end(&flush_slot, .tracer, .tracer_flush);
211
+ }
212
+
213
+ fn flush_or_err() !void {
214
+ switch (config.tracer_backend) {
215
+ .none => unreachable,
216
+ .perfetto => {
217
+ for (spans.items) |span| {
218
+ // Perfetto requires this json format:
219
+ // https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
220
+ const NameJson = struct {
221
+ event: Event,
222
+
223
+ pub fn jsonStringify(
224
+ name_json: @This(),
225
+ options: std.json.StringifyOptions,
226
+ writer: anytype,
227
+ ) @TypeOf(writer).Error!void {
228
+ _ = options;
229
+ try writer.writeAll("\"");
230
+ switch (name_json.event) {
231
+ .tracer_flush => try writer.print("tracer_flush", .{}),
232
+ .commit => |commit| try writer.print(
233
+ "commit({})",
234
+ .{commit.op},
235
+ ),
236
+ .checkpoint => try writer.print("checkpoint", .{}),
237
+ .state_machine_prefetch => try writer.print("state_machine_prefetch", .{}),
238
+ .state_machine_commit => try writer.print("state_machine_commit", .{}),
239
+ .state_machine_compact => try writer.print("state_machine_compact", .{}),
240
+ .tree_compaction_beat => |tree_compaction_tick| try writer.print(
241
+ "tree_compaction_beat({s})",
242
+ .{tree_compaction_tick.tree_name},
243
+ ),
244
+ .tree_compaction_tick => |tree_compaction_tick| try writer.print(
245
+ "tree_compaction_tick({s}, {})",
246
+ .{ tree_compaction_tick.tree_name, tree_compaction_tick.level_b },
247
+ ),
248
+ .tree_compaction_merge => |tree_compaction_merge| try writer.print(
249
+ "tree_compaction_merge({s}, {})",
250
+ .{ tree_compaction_merge.tree_name, tree_compaction_merge.level_b },
251
+ ),
252
+ }
253
+ try writer.writeAll("\"");
254
+ }
255
+ };
256
+ const SpanJson = struct {
257
+ name: NameJson,
258
+ cat: []const u8 = "default",
259
+ ph: []const u8 = "X",
260
+ ts: f64,
261
+ dur: f64,
262
+ pid: u64 = 0,
263
+ tid: u64,
264
+ };
265
+ const MetaJson = struct {
266
+ name: []const u8,
267
+ ph: []const u8 = "M",
268
+ pid: u64 = 0,
269
+ tid: u64,
270
+ args: struct {
271
+ name: []const u8,
272
+ },
273
+ };
274
+
275
+ const group_name = switch (span.group) {
276
+ .main => "main",
277
+ .tracer => "tracer",
278
+ .tree => |tree| tree.tree_name,
279
+ };
280
+
281
+ const tid_64 = switch (span.group) {
282
+ .main => 0,
283
+ .tracer => 1,
284
+ .tree => |tree| std.hash_map.hashString(tree.tree_name),
285
+ };
286
+ const tid = @truncate(u32, tid_64) ^ @truncate(u32, tid_64 >> 32);
287
+
288
+ var buffered_writer = std.io.bufferedWriter(log_file.writer());
289
+ const writer = buffered_writer.writer();
290
+
291
+ try std.json.stringify(
292
+ SpanJson{
293
+ .name = .{ .event = span.event },
294
+ .ts = @intToFloat(f64, span.start_time_ns) / 1000,
295
+ .dur = @intToFloat(f64, span.end_time_ns - span.start_time_ns) / 1000,
296
+ .tid = tid,
297
+ },
298
+ .{},
299
+ writer,
300
+ );
301
+ try writer.writeAll(",\n");
302
+
303
+ // TODO Only emit metadata once per group name.
304
+ try std.json.stringify(
305
+ MetaJson{
306
+ .name = "thread_name",
307
+ .tid = tid,
308
+ .args = .{ .name = group_name },
309
+ },
310
+ .{},
311
+ writer,
312
+ );
313
+ try writer.writeAll(",\n");
314
+
315
+ try buffered_writer.flush();
316
+ }
317
+ },
318
+ }
319
+ }
@@ -2,6 +2,7 @@ test {
2
2
  _ = @import("vsr.zig");
3
3
  _ = @import("vsr/journal.zig");
4
4
  _ = @import("vsr/marzullo.zig");
5
+ _ = @import("vsr/replica_format.zig");
5
6
  _ = @import("vsr/superblock.zig");
6
7
  _ = @import("vsr/superblock_free_set.zig");
7
8
  _ = @import("vsr/superblock_manifest.zig");
@@ -16,6 +17,7 @@ test {
16
17
 
17
18
  _ = @import("io.zig");
18
19
 
20
+ _ = @import("cli.zig");
19
21
  _ = @import("ewah.zig");
20
22
  _ = @import("util.zig");
21
23