tigerbeetle-node 0.11.13 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/README.md +5 -10
  2. package/dist/bin/aarch64-linux-gnu/client.node +0 -0
  3. package/dist/bin/aarch64-linux-musl/client.node +0 -0
  4. package/dist/bin/aarch64-macos/client.node +0 -0
  5. package/dist/bin/x86_64-linux-gnu/client.node +0 -0
  6. package/dist/bin/x86_64-linux-musl/client.node +0 -0
  7. package/dist/bin/x86_64-macos/client.node +0 -0
  8. package/dist/index.js +33 -1
  9. package/dist/index.js.map +1 -1
  10. package/package-lock.json +66 -0
  11. package/package.json +6 -16
  12. package/src/index.ts +56 -1
  13. package/src/node.zig +9 -9
  14. package/dist/.client.node.sha256 +0 -1
  15. package/scripts/build_lib.sh +0 -61
  16. package/scripts/download_node_headers.sh +0 -32
  17. package/src/tigerbeetle/scripts/benchmark.bat +0 -55
  18. package/src/tigerbeetle/scripts/benchmark.sh +0 -66
  19. package/src/tigerbeetle/scripts/confirm_image.sh +0 -44
  20. package/src/tigerbeetle/scripts/fail_on_diff.sh +0 -9
  21. package/src/tigerbeetle/scripts/fuzz_loop.sh +0 -15
  22. package/src/tigerbeetle/scripts/fuzz_loop_hash_log.sh +0 -12
  23. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +0 -7
  24. package/src/tigerbeetle/scripts/install.bat +0 -7
  25. package/src/tigerbeetle/scripts/install.sh +0 -21
  26. package/src/tigerbeetle/scripts/install_zig.bat +0 -113
  27. package/src/tigerbeetle/scripts/install_zig.sh +0 -90
  28. package/src/tigerbeetle/scripts/lint.zig +0 -199
  29. package/src/tigerbeetle/scripts/pre-commit.sh +0 -9
  30. package/src/tigerbeetle/scripts/scripts/benchmark.bat +0 -55
  31. package/src/tigerbeetle/scripts/scripts/benchmark.sh +0 -66
  32. package/src/tigerbeetle/scripts/scripts/confirm_image.sh +0 -44
  33. package/src/tigerbeetle/scripts/scripts/fail_on_diff.sh +0 -9
  34. package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +0 -15
  35. package/src/tigerbeetle/scripts/scripts/fuzz_loop_hash_log.sh +0 -12
  36. package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +0 -7
  37. package/src/tigerbeetle/scripts/scripts/install.bat +0 -7
  38. package/src/tigerbeetle/scripts/scripts/install.sh +0 -21
  39. package/src/tigerbeetle/scripts/scripts/install_zig.bat +0 -113
  40. package/src/tigerbeetle/scripts/scripts/install_zig.sh +0 -90
  41. package/src/tigerbeetle/scripts/scripts/lint.zig +0 -199
  42. package/src/tigerbeetle/scripts/scripts/pre-commit.sh +0 -9
  43. package/src/tigerbeetle/scripts/scripts/shellcheck.sh +0 -5
  44. package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +0 -10
  45. package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +0 -14
  46. package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +0 -48
  47. package/src/tigerbeetle/scripts/scripts/validate_docs.sh +0 -23
  48. package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +0 -46
  49. package/src/tigerbeetle/scripts/shellcheck.sh +0 -5
  50. package/src/tigerbeetle/scripts/tests_on_alpine.sh +0 -10
  51. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +0 -14
  52. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -48
  53. package/src/tigerbeetle/scripts/validate_docs.sh +0 -23
  54. package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
  55. package/src/tigerbeetle/src/benchmark.zig +0 -336
  56. package/src/tigerbeetle/src/config.zig +0 -233
  57. package/src/tigerbeetle/src/constants.zig +0 -428
  58. package/src/tigerbeetle/src/ewah.zig +0 -286
  59. package/src/tigerbeetle/src/ewah_benchmark.zig +0 -120
  60. package/src/tigerbeetle/src/ewah_fuzz.zig +0 -130
  61. package/src/tigerbeetle/src/fifo.zig +0 -120
  62. package/src/tigerbeetle/src/io/benchmark.zig +0 -213
  63. package/src/tigerbeetle/src/io/darwin.zig +0 -814
  64. package/src/tigerbeetle/src/io/linux.zig +0 -1071
  65. package/src/tigerbeetle/src/io/test.zig +0 -643
  66. package/src/tigerbeetle/src/io/windows.zig +0 -1183
  67. package/src/tigerbeetle/src/io.zig +0 -34
  68. package/src/tigerbeetle/src/iops.zig +0 -107
  69. package/src/tigerbeetle/src/lsm/README.md +0 -308
  70. package/src/tigerbeetle/src/lsm/binary_search.zig +0 -341
  71. package/src/tigerbeetle/src/lsm/bloom_filter.zig +0 -125
  72. package/src/tigerbeetle/src/lsm/compaction.zig +0 -603
  73. package/src/tigerbeetle/src/lsm/composite_key.zig +0 -77
  74. package/src/tigerbeetle/src/lsm/direction.zig +0 -11
  75. package/src/tigerbeetle/src/lsm/eytzinger.zig +0 -587
  76. package/src/tigerbeetle/src/lsm/eytzinger_benchmark.zig +0 -330
  77. package/src/tigerbeetle/src/lsm/forest.zig +0 -205
  78. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -450
  79. package/src/tigerbeetle/src/lsm/grid.zig +0 -573
  80. package/src/tigerbeetle/src/lsm/groove.zig +0 -1036
  81. package/src/tigerbeetle/src/lsm/k_way_merge.zig +0 -474
  82. package/src/tigerbeetle/src/lsm/level_iterator.zig +0 -332
  83. package/src/tigerbeetle/src/lsm/manifest.zig +0 -617
  84. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -878
  85. package/src/tigerbeetle/src/lsm/manifest_log.zig +0 -789
  86. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -691
  87. package/src/tigerbeetle/src/lsm/merge_iterator.zig +0 -106
  88. package/src/tigerbeetle/src/lsm/node_pool.zig +0 -235
  89. package/src/tigerbeetle/src/lsm/posted_groove.zig +0 -381
  90. package/src/tigerbeetle/src/lsm/segmented_array.zig +0 -1329
  91. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -148
  92. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -9
  93. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +0 -850
  94. package/src/tigerbeetle/src/lsm/table.zig +0 -1009
  95. package/src/tigerbeetle/src/lsm/table_immutable.zig +0 -192
  96. package/src/tigerbeetle/src/lsm/table_iterator.zig +0 -340
  97. package/src/tigerbeetle/src/lsm/table_mutable.zig +0 -203
  98. package/src/tigerbeetle/src/lsm/test.zig +0 -439
  99. package/src/tigerbeetle/src/lsm/tree.zig +0 -1169
  100. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -479
  101. package/src/tigerbeetle/src/message_bus.zig +0 -1013
  102. package/src/tigerbeetle/src/message_pool.zig +0 -156
  103. package/src/tigerbeetle/src/ring_buffer.zig +0 -399
  104. package/src/tigerbeetle/src/simulator.zig +0 -580
  105. package/src/tigerbeetle/src/state_machine/auditor.zig +0 -578
  106. package/src/tigerbeetle/src/state_machine/workload.zig +0 -883
  107. package/src/tigerbeetle/src/state_machine.zig +0 -2099
  108. package/src/tigerbeetle/src/static_allocator.zig +0 -65
  109. package/src/tigerbeetle/src/stdx.zig +0 -171
  110. package/src/tigerbeetle/src/storage.zig +0 -393
  111. package/src/tigerbeetle/src/testing/cluster/message_bus.zig +0 -82
  112. package/src/tigerbeetle/src/testing/cluster/network.zig +0 -237
  113. package/src/tigerbeetle/src/testing/cluster/state_checker.zig +0 -169
  114. package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -202
  115. package/src/tigerbeetle/src/testing/cluster.zig +0 -444
  116. package/src/tigerbeetle/src/testing/fuzz.zig +0 -140
  117. package/src/tigerbeetle/src/testing/hash_log.zig +0 -66
  118. package/src/tigerbeetle/src/testing/id.zig +0 -99
  119. package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -374
  120. package/src/tigerbeetle/src/testing/priority_queue.zig +0 -645
  121. package/src/tigerbeetle/src/testing/reply_sequence.zig +0 -139
  122. package/src/tigerbeetle/src/testing/state_machine.zig +0 -250
  123. package/src/tigerbeetle/src/testing/storage.zig +0 -757
  124. package/src/tigerbeetle/src/testing/table.zig +0 -247
  125. package/src/tigerbeetle/src/testing/time.zig +0 -84
  126. package/src/tigerbeetle/src/tigerbeetle.zig +0 -227
  127. package/src/tigerbeetle/src/time.zig +0 -112
  128. package/src/tigerbeetle/src/tracer.zig +0 -529
  129. package/src/tigerbeetle/src/unit_tests.zig +0 -40
  130. package/src/tigerbeetle/src/vopr.zig +0 -495
  131. package/src/tigerbeetle/src/vsr/README.md +0 -209
  132. package/src/tigerbeetle/src/vsr/client.zig +0 -544
  133. package/src/tigerbeetle/src/vsr/clock.zig +0 -855
  134. package/src/tigerbeetle/src/vsr/journal.zig +0 -2415
  135. package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +0 -111
  136. package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
  137. package/src/tigerbeetle/src/vsr/replica.zig +0 -6616
  138. package/src/tigerbeetle/src/vsr/replica_format.zig +0 -219
  139. package/src/tigerbeetle/src/vsr/superblock.zig +0 -1631
  140. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +0 -256
  141. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +0 -929
  142. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +0 -334
  143. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -390
  144. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +0 -615
  145. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +0 -394
  146. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -314
  147. package/src/tigerbeetle/src/vsr.zig +0 -1425
@@ -1,233 +0,0 @@
1
- //! Raw configuration values.
2
- //!
3
- //! Code which needs these values should use `constants.zig` instead.
4
- //! Configuration values are set from a combination of:
5
- //! - default values
6
- //! - `root.tigerbeetle_config`
7
- //! - `@import("tigerbeetle_options")`
8
-
9
- const builtin = @import("builtin");
10
- const std = @import("std");
11
-
12
- const root = @import("root");
13
- // Allow setting build-time config either via `build.zig` `Options`, or via a struct in the root file.
14
- const build_options =
15
- if (@hasDecl(root, "vsr_options")) root.vsr_options else @import("vsr_options");
16
-
17
- const vsr = @import("vsr.zig");
18
- const sector_size = @import("constants.zig").sector_size;
19
-
20
- pub const Config = struct {
21
- pub const Cluster = ConfigCluster;
22
- pub const Process = ConfigProcess;
23
-
24
- cluster: ConfigCluster,
25
- process: ConfigProcess,
26
- };
27
-
28
- /// Configurations which are tunable per-replica (or per-client).
29
- /// - Replica configs need not equal each other.
30
- /// - Client configs need not equal each other.
31
- /// - Client configs need not equal replica configs.
32
- /// - Replica configs can change between restarts.
33
- ///
34
- /// Fields are documented within constants.zig.
35
- const ConfigProcess = struct {
36
- log_level: std.log.Level = .info,
37
- tracer_backend: TracerBackend = .none,
38
- hash_log_mode: HashLogMode = .none,
39
- verify: bool,
40
- port: u16 = 3001,
41
- address: []const u8 = "127.0.0.1",
42
- memory_size_max_default: u64 = 1024 * 1024 * 1024,
43
- cache_accounts_max: usize,
44
- cache_transfers_max: usize,
45
- cache_transfers_posted_max: usize,
46
- client_request_queue_max: usize = 32,
47
- lsm_manifest_node_size: usize = 16 * 1024,
48
- connection_delay_min_ms: u64 = 50,
49
- connection_delay_max_ms: u64 = 1000,
50
- tcp_backlog: u31 = 64,
51
- tcp_rcvbuf: c_int = 4 * 1024 * 1024,
52
- tcp_keepalive: bool = true,
53
- tcp_keepidle: c_int = 5,
54
- tcp_keepintvl: c_int = 4,
55
- tcp_keepcnt: c_int = 3,
56
- tcp_nodelay: bool = true,
57
- direct_io: bool,
58
- direct_io_required: bool,
59
- journal_iops_read_max: usize = 8,
60
- journal_iops_write_max: usize = 8,
61
- tick_ms: u63 = 10,
62
- rtt_ms: u64 = 300,
63
- rtt_multiple: u8 = 2,
64
- backoff_min_ms: u64 = 100,
65
- backoff_max_ms: u64 = 10000,
66
- clock_offset_tolerance_max_ms: u64 = 10000,
67
- clock_epoch_max_ms: u64 = 60000,
68
- clock_synchronization_window_min_ms: u64 = 2000,
69
- clock_synchronization_window_max_ms: u64 = 20000,
70
- };
71
-
72
- /// Configurations which are tunable per-cluster.
73
- /// - All replicas within a cluster must have the same configuration.
74
- /// - Replicas must reuse the same configuration when the binary is upgraded — they do not change
75
- /// over the cluster lifetime.
76
- /// - The storage formats generated by different ConfigClusters are incompatible.
77
- ///
78
- /// Fields are documented within constants.zig.
79
- const ConfigCluster = struct {
80
- cache_line_size: comptime_int = 64,
81
- clients_max: usize,
82
- pipeline_prepare_queue_max: usize = 8,
83
- view_change_headers_max: usize = 8,
84
- quorum_replication_max: u8 = 3,
85
- journal_slot_count: usize = 1024,
86
- message_size_max: usize = 1 * 1024 * 1024,
87
- superblock_copies: comptime_int = 4,
88
- storage_size_max: u64 = 16 * 1024 * 1024 * 1024 * 1024,
89
- block_size: comptime_int = 64 * 1024,
90
- lsm_levels: u7 = 7,
91
- lsm_growth_factor: u32 = 8,
92
- lsm_batch_multiple: comptime_int = 64,
93
- lsm_snapshots_max: usize = 32,
94
- lsm_value_to_key_layout_ratio_min: comptime_int = 16,
95
-
96
- /// The WAL requires at least two sectors of redundant headers — otherwise we could lose them all to
97
- /// a single torn write. A replica needs at least one valid redundant header to determine an
98
- /// (untrusted) maximum op in recover_torn_prepare(), without which it cannot truncate a torn
99
- /// prepare.
100
- pub const journal_slot_count_min = 2 * @divExact(sector_size, @sizeOf(vsr.Header));
101
-
102
- pub const clients_max_min = 1;
103
-
104
- /// The smallest possible message_size_max (for use in the simulator to improve performance).
105
- /// The message body must have room for pipeline_prepare_queue_max headers in the DVC.
106
- pub fn message_size_max_min(clients_max: usize) usize {
107
- return std.math.max(
108
- sector_size,
109
- std.mem.alignForward(
110
- @sizeOf(vsr.Header) + clients_max * @sizeOf(vsr.Header),
111
- sector_size,
112
- ),
113
- );
114
- }
115
- };
116
-
117
- pub const ConfigBase = enum {
118
- production,
119
- development,
120
- test_min,
121
- default,
122
- };
123
-
124
- pub const TracerBackend = enum {
125
- none,
126
- // Writes to a file (./tracer.json) which can be uploaded to https://ui.perfetto.dev/
127
- perfetto,
128
- // Sends data to https://github.com/wolfpld/tracy.
129
- tracy,
130
- };
131
-
132
- pub const HashLogMode = enum {
133
- none,
134
- create,
135
- check,
136
- };
137
-
138
- pub const configs = struct {
139
- /// A good default config for production.
140
- pub const default_production = Config{
141
- .process = .{
142
- .direct_io = true,
143
- .direct_io_required = true,
144
- .cache_accounts_max = 1024 * 1024,
145
- .cache_transfers_max = 0,
146
- .cache_transfers_posted_max = 256 * 1024,
147
- .verify = false,
148
- },
149
- .cluster = .{
150
- .clients_max = 32,
151
- },
152
- };
153
-
154
- /// A good default config for local development.
155
- /// (For production, use default_production instead.)
156
- /// The cluster-config is compatible with the default production config.
157
- pub const default_development = Config{
158
- .process = .{
159
- .direct_io = true,
160
- .direct_io_required = false,
161
- .cache_accounts_max = 1024 * 1024,
162
- .cache_transfers_max = 0,
163
- .cache_transfers_posted_max = 256 * 1024,
164
- .verify = true,
165
- },
166
- .cluster = default_production.cluster,
167
- };
168
-
169
- /// Minimal test configuration — small WAL, small grid block size, etc.
170
- /// Not suitable for production, but good for testing code that would be otherwise hard to reach.
171
- pub const test_min = Config{
172
- .process = .{
173
- .direct_io = false,
174
- .direct_io_required = false,
175
- .cache_accounts_max = 2048,
176
- .cache_transfers_max = 0,
177
- .cache_transfers_posted_max = 2048,
178
- .verify = true,
179
- },
180
- .cluster = .{
181
- .clients_max = 4 + 3,
182
- .pipeline_prepare_queue_max = 4,
183
- .view_change_headers_max = 4,
184
- .journal_slot_count = Config.Cluster.journal_slot_count_min,
185
- .message_size_max = Config.Cluster.message_size_max_min(4),
186
- .storage_size_max = 4 * 1024 * 1024 * 1024,
187
-
188
- .block_size = sector_size,
189
- .lsm_batch_multiple = 4,
190
- .lsm_growth_factor = 4,
191
- },
192
- };
193
-
194
- const default = if (@hasDecl(root, "tigerbeetle_config"))
195
- root.tigerbeetle_config
196
- else if (builtin.is_test)
197
- test_min
198
- else
199
- default_development;
200
-
201
- pub const current = current: {
202
- var base = if (@hasDecl(root, "decode_events"))
203
- // TODO(DJ) This is a hack to work around the absense of tigerbeetle_build_options.
204
- // This should be removed once the node client is built using `zig build`.
205
- default_development
206
- else switch (build_options.config_base) {
207
- .default => default,
208
- .production => default_production,
209
- .development => default_development,
210
- .test_min => test_min,
211
- };
212
-
213
- // TODO Use additional build options to overwrite other fields.
214
- base.process.tracer_backend = if (@hasDecl(root, "tracer_backend"))
215
- // TODO(jamii)
216
- // This branch is a hack used to work around the absence of tigerbeetle_build_options.
217
- // This should be removed once the node client is built using `zig build`.
218
- root.tracer_backend
219
- else
220
- // Zig's `addOptions` reuses the type, but redeclares it — identical structurally,
221
- // but a different type from a nominal typing perspective.
222
- @intToEnum(TracerBackend, @enumToInt(build_options.tracer_backend));
223
-
224
- base.process.hash_log_mode = if (@hasDecl(root, "decode_events"))
225
- // TODO(DJ) This is a hack to work around the absense of tigerbeetle_build_options.
226
- // This should be removed once the node client is built using `zig build`.
227
- .none
228
- else
229
- @intToEnum(HashLogMode, @enumToInt(build_options.hash_log_mode));
230
-
231
- break :current base;
232
- };
233
- };
@@ -1,428 +0,0 @@
1
- //! Constants are the configuration that the code actually imports — they include:
2
- //! - all of the configuration values (flattened)
3
- //! - derived configuration values,
4
-
5
- const std = @import("std");
6
- const assert = std.debug.assert;
7
- const vsr = @import("vsr.zig");
8
- const tracer = @import("tracer.zig");
9
- const Config = @import("config.zig").Config;
10
- const config = @import("config.zig").configs.current;
11
-
12
- /// The maximum log level.
13
- /// One of: .err, .warn, .info, .debug
14
- pub const log_level: std.log.Level = config.process.log_level;
15
-
16
- pub const log = if (tracer_backend == .tracy)
17
- tracer.log_fn
18
- else
19
- std.log.defaultLog;
20
-
21
- // Which backend to use for ./tracer.zig.
22
- // Default is `.none`.
23
- pub const tracer_backend = config.process.tracer_backend;
24
-
25
- // Which mode to use for ./testing/hash_log.zig.
26
- pub const hash_log_mode = config.process.hash_log_mode;
27
-
28
- /// The maximum number of replicas allowed in a cluster.
29
- pub const replicas_max = 6;
30
-
31
- /// The maximum number of clients allowed per cluster, where each client has a unique 128-bit ID.
32
- /// This impacts the amount of memory allocated at initialization by the server.
33
- /// This determines the size of the VR client table used to cache replies to clients by client ID.
34
- /// Each client has one entry in the VR client table to store the latest `message_size_max` reply.
35
- pub const clients_max = config.cluster.clients_max;
36
-
37
- comptime {
38
- assert(clients_max >= Config.Cluster.clients_max_min);
39
- }
40
-
41
- /// The maximum number of nodes required to form a quorum for replication.
42
- /// Majority quorums are only required across view change and replication phases (not within).
43
- /// As per Flexible Paxos, provided `quorum_replication + quorum_view_change > replicas`:
44
- /// 1. you may increase `quorum_view_change` above a majority, so that
45
- /// 2. you can decrease `quorum_replication` below a majority, to optimize the common case.
46
- /// This improves latency by reducing the number of nodes required for synchronous replication.
47
- /// This reduces redundancy only in the short term, asynchronous replication will still continue.
48
- /// The size of the replication quorum is limited to the minimum of this value and ⌈replicas/2⌉.
49
- /// The size of the view change quorum will then be automatically inferred from quorum_replication.
50
- pub const quorum_replication_max = config.cluster.quorum_replication_max;
51
-
52
- /// The default server port to listen on if not specified in `--addresses`:
53
- pub const port = config.process.port;
54
-
55
- /// The default network interface address to listen on if not specified in `--addresses`:
56
- /// WARNING: Binding to all interfaces with "0.0.0.0" is dangerous and opens the server to anyone.
57
- /// Bind to the "127.0.0.1" loopback address to accept local connections as a safe default only.
58
- pub const address = config.process.address;
59
-
60
- comptime {
61
- // vsr.parse_address assumes that config.address/config.port are valid.
62
- _ = std.net.Address.parseIp4(address, 0) catch unreachable;
63
- _ = @as(u16, port);
64
- }
65
-
66
- /// The default maximum amount of memory to use.
67
- pub const memory_size_max_default = config.process.memory_size_max_default;
68
-
69
- /// At a high level, priority for object caching is (in descending order):
70
- ///
71
- /// 1. Accounts.
72
- /// - 2 lookups per created transfer
73
- /// - high temporal locality
74
- /// - positive expected result
75
- /// 2. Posted transfers.
76
- /// - high temporal locality
77
- /// - positive expected result
78
- /// 3. Transfers. Generally don't cache these because of:
79
- /// - low temporal locality
80
- /// - negative expected result
81
- ///
82
- /// The maximum number of accounts to store in memory:
83
- /// This impacts the amount of memory allocated at initialization by the server.
84
- pub const cache_accounts_max = config.process.cache_accounts_max;
85
-
86
- /// The maximum number of transfers to store in memory:
87
- /// This impacts the amount of memory allocated at initialization by the server.
88
- /// We allocate more capacity than the number of transfers for a safe hash table load factor.
89
- pub const cache_transfers_max = config.process.cache_transfers_max;
90
-
91
- /// The maximum number of two-phase transfers to store in memory:
92
- /// This impacts the amount of memory allocated at initialization by the server.
93
- pub const cache_transfers_posted_max = config.process.cache_transfers_posted_max;
94
-
95
- comptime {
96
- // SetAssociativeCache requires a power-of-two cardinality.
97
- assert(cache_accounts_max == 0 or std.math.isPowerOfTwo(cache_accounts_max));
98
- assert(cache_transfers_max == 0 or std.math.isPowerOfTwo(cache_transfers_max));
99
- assert(cache_transfers_posted_max == 0 or std.math.isPowerOfTwo(cache_transfers_posted_max));
100
- }
101
-
102
- /// The maximum number of batch entries in the journal file:
103
- /// A batch entry may contain many transfers, so this is not a limit on the number of transfers.
104
- /// We need this limit to allocate space for copies of batch headers at the start of the journal.
105
- /// These header copies enable us to disentangle corruption from crashes and recover accordingly.
106
- pub const journal_slot_count = config.cluster.journal_slot_count;
107
-
108
- /// The maximum size of the journal file:
109
- /// This is pre-allocated and zeroed for performance when initialized.
110
- /// Writes within this file never extend the filesystem inode size reducing the cost of fdatasync().
111
- /// This enables static allocation of disk space so that appends cannot fail with ENOSPC.
112
- /// This also enables us to detect filesystem inode corruption that would change the journal size.
113
- // TODO remove this; just allocate a part of the total storage for the journal
114
- pub const journal_size_max = journal_size_headers + journal_size_prepares;
115
- pub const journal_size_headers = journal_slot_count * @sizeOf(vsr.Header);
116
- pub const journal_size_prepares = journal_slot_count * message_size_max;
117
-
118
- comptime {
119
- // For the given WAL (lsm_batch_multiple=4):
120
- //
121
- // A B C D E
122
- // |····|····|····|····|
123
- //
124
- // - ("|" delineates measures, where a measure is a multiple of prepare batches.)
125
- // - ("·" is a prepare in the WAL.)
126
- // - The Replica triggers a checkpoint at "E".
127
- // - The entries between "A" and "D" are on-disk in level 0.
128
- // - The entries between "D" and "E" are in-memory in the immutable table.
129
- // - So the checkpoint only includes "A…D".
130
- //
131
- // The journal must have at least two measures (batches) to ensure at least one is checkpointed.
132
- assert(journal_slot_count >= Config.Cluster.journal_slot_count_min);
133
- assert(journal_slot_count >= lsm_batch_multiple * 2);
134
- assert(journal_slot_count % lsm_batch_multiple == 0);
135
- assert(journal_slot_count > pipeline_prepare_queue_max);
136
-
137
- assert(journal_size_max == journal_size_headers + journal_size_prepares);
138
- }
139
-
140
- /// The maximum number of connections that can be held open by the server at any time:
141
- pub const connections_max = replicas_max + clients_max;
142
-
143
- /// The maximum size of a message in bytes:
144
- /// This is also the limit of all inflight data across multiple pipelined requests per connection.
145
- /// We may have one request of up to 2 MiB inflight or 2 pipelined requests of up to 1 MiB inflight.
146
- /// This impacts sequential disk write throughput, the larger the buffer the better.
147
- /// 2 MiB is 16,384 transfers, and a reasonable choice for sequential disk write throughput.
148
- /// However, this impacts bufferbloat and head-of-line blocking latency for pipelined requests.
149
- /// For a 1 Gbps NIC = 125 MiB/s throughput: 2 MiB / 125 * 1000ms = 16ms for the next request.
150
- /// This impacts the amount of memory allocated at initialization by the server.
151
- pub const message_size_max = config.cluster.message_size_max;
152
- pub const message_body_size_max = message_size_max - @sizeOf(vsr.Header);
153
-
154
- comptime {
155
- // The WAL format requires messages to be a multiple of the sector size.
156
- assert(message_size_max % sector_size == 0);
157
- assert(message_size_max >= @sizeOf(vsr.Header));
158
- assert(message_size_max >= sector_size);
159
- assert(message_size_max >= Config.Cluster.message_size_max_min(clients_max));
160
-
161
- // Ensure that DVC/SV messages can fit all necessary headers.
162
- assert(message_body_size_max >= view_change_headers_max * @sizeOf(vsr.Header));
163
- }
164
-
165
- /// The maximum number of Viewstamped Replication prepare messages that can be inflight at a time.
166
- /// This is immutable once assigned per cluster, as replicas need to know how many operations might
167
- /// possibly be uncommitted during a view change, and this must be constant for all replicas.
168
- pub const pipeline_prepare_queue_max = config.cluster.pipeline_prepare_queue_max;
169
-
170
- /// The maximum number of Viewstamped Replication request messages that can be queued at a primary,
171
- /// waiting to prepare.
172
- // TODO(Zig): After 0.10, change this to simply "clients_max -| pipeline_prepare_queue_max".
173
- // In Zig 0.9 compilation fails with "operation caused overflow" despite the saturating subtraction.
174
- // See: https://github.com/ziglang/zig/issues/10870
175
- pub const pipeline_request_queue_max =
176
- if (clients_max < pipeline_prepare_queue_max)
177
- 0
178
- else
179
- clients_max - pipeline_prepare_queue_max;
180
-
181
- comptime {
182
- // A prepare-queue capacity larger than clients_max is wasted.
183
- assert(pipeline_prepare_queue_max <= clients_max);
184
- // A total queue capacity larger than clients_max is wasted.
185
- assert(pipeline_prepare_queue_max + pipeline_request_queue_max <= clients_max);
186
- assert(pipeline_prepare_queue_max > 0);
187
- assert(pipeline_request_queue_max >= 0);
188
- }
189
-
190
- /// The number of prepare headers to include in the body of a DVC/SV.
191
- ///
192
- /// CRITICAL:
193
- /// We must provide enough headers to cover all uncommitted headers so that the new
194
- /// primary (if we are in a view change) can decide whether to discard uncommitted headers
195
- /// that cannot be repaired because they are gaps. See DVCQuorum for more detail.
196
- pub const view_change_headers_max = config.cluster.view_change_headers_max;
197
-
198
- comptime {
199
- assert(view_change_headers_max > 0);
200
- assert(view_change_headers_max >= pipeline_prepare_queue_max);
201
- assert(view_change_headers_max <= journal_slot_count);
202
- assert(view_change_headers_max <= @divFloor(message_body_size_max, @sizeOf(vsr.Header)));
203
- }
204
-
205
- /// The minimum and maximum amount of time in milliseconds to wait before initiating a connection.
206
- /// Exponential backoff and jitter are applied within this range.
207
- pub const connection_delay_min_ms = config.process.connection_delay_min_ms;
208
- pub const connection_delay_max_ms = config.process.connection_delay_max_ms;
209
-
210
- /// The maximum number of outgoing messages that may be queued on a replica connection.
211
- pub const connection_send_queue_max_replica = std.math.max(std.math.min(clients_max, 4), 2);
212
-
213
- /// The maximum number of outgoing messages that may be queued on a client connection.
214
- /// The client has one in-flight request, and occasionally a ping.
215
- pub const connection_send_queue_max_client = 2;
216
-
217
- /// The maximum number of outgoing requests that may be queued on a client (including the in-flight request).
218
- pub const client_request_queue_max = config.process.client_request_queue_max;
219
-
220
- /// The maximum number of connections in the kernel's complete connection queue pending an accept():
221
- /// If the backlog argument is greater than the value in `/proc/sys/net/core/somaxconn`, then it is
222
- /// silently truncated to that value. Since Linux 5.4, the default in this file is 4096.
223
- pub const tcp_backlog = config.process.tcp_backlog;
224
-
225
- /// The maximum size of a kernel socket receive buffer in bytes (or 0 to use the system default):
226
- /// This sets SO_RCVBUF as an alternative to the auto-tuning range in /proc/sys/net/ipv4/tcp_rmem.
227
- /// The value is limited by /proc/sys/net/core/rmem_max, unless the CAP_NET_ADMIN privilege exists.
228
- /// The kernel doubles this value to allow space for packet bookkeeping overhead.
229
- /// The receive buffer should ideally exceed the Bandwidth-Delay Product for maximum throughput.
230
- /// At the same time, be careful going beyond 4 MiB as the kernel may merge many small TCP packets,
231
- /// causing considerable latency spikes for large buffer sizes:
232
- /// https://blog.cloudflare.com/the-story-of-one-latency-spike/
233
- pub const tcp_rcvbuf = config.process.tcp_rcvbuf;
234
-
235
- /// The maximum size of a kernel socket send buffer in bytes (or 0 to use the system default):
236
- /// This sets SO_SNDBUF as an alternative to the auto-tuning range in /proc/sys/net/ipv4/tcp_wmem.
237
- /// The value is limited by /proc/sys/net/core/wmem_max, unless the CAP_NET_ADMIN privilege exists.
238
- /// The kernel doubles this value to allow space for packet bookkeeping overhead.
239
- pub const tcp_sndbuf_replica = connection_send_queue_max_replica * message_size_max;
240
- pub const tcp_sndbuf_client = connection_send_queue_max_client * message_size_max;
241
-
242
- comptime {
243
- // Avoid latency issues from setting sndbuf too high:
244
- assert(tcp_sndbuf_replica <= 16 * 1024 * 1024);
245
- assert(tcp_sndbuf_client <= 16 * 1024 * 1024);
246
- }
247
-
248
- /// Whether to enable TCP keepalive:
249
- pub const tcp_keepalive = config.process.tcp_keepalive;
250
-
251
- /// The time (in seconds) the connection needs to be idle before sending TCP keepalive probes:
252
- /// Probes are not sent when the send buffer has data or the congestion window size is zero,
253
- /// for these cases we also need tcp_user_timeout_ms below.
254
- pub const tcp_keepidle = config.process.tcp_keepidle;
255
-
256
- /// The time (in seconds) between individual keepalive probes:
257
- pub const tcp_keepintvl = config.process.tcp_keepintvl;
258
-
259
- /// The maximum number of keepalive probes to send before dropping the connection:
260
- pub const tcp_keepcnt = config.process.tcp_keepcnt;
261
-
262
- /// The time (in milliseconds) to timeout an idle connection or unacknowledged send:
263
- /// This timer rides on the granularity of the keepalive or retransmission timers.
264
- /// For example, if keepalive will only send a probe after 10s then this becomes the lower bound
265
- /// for tcp_user_timeout_ms to fire, even if tcp_user_timeout_ms is 2s. Nevertheless, this would timeout
266
- /// the connection at 10s rather than wait for tcp_keepcnt probes to be sent. At the same time, if
267
- /// tcp_user_timeout_ms is larger than the max keepalive time then tcp_keepcnt will be ignored and
268
- /// more keepalive probes will be sent until tcp_user_timeout_ms fires.
269
- /// For a thorough overview of how these settings interact:
270
- /// https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/
271
- pub const tcp_user_timeout_ms = (tcp_keepidle + tcp_keepintvl * tcp_keepcnt) * 1000;
272
-
273
- /// Whether to disable Nagle's algorithm to eliminate send buffering delays:
274
- pub const tcp_nodelay = config.process.tcp_nodelay;
275
-
276
- /// Size of a CPU cache line in bytes
277
- pub const cache_line_size = config.cluster.cache_line_size;
278
-
279
- /// The minimum size of an aligned kernel page and an Advanced Format disk sector:
280
- /// This is necessary for direct I/O without the kernel having to fix unaligned pages with a copy.
281
- /// The new Advanced Format sector size is backwards compatible with the old 512 byte sector size.
282
- /// This should therefore never be less than 4 KiB to be future-proof when server disks are swapped.
283
- pub const sector_size = 4096;
284
-
285
- /// Whether to perform direct I/O to the underlying disk device:
286
- /// This enables several performance optimizations:
287
- /// * A memory copy to the kernel's page cache can be eliminated for reduced CPU utilization.
288
- /// * I/O can be issued immediately to the disk device without buffering delay for improved latency.
289
- /// This also enables several safety features:
290
- /// * Disk data can be scrubbed to repair latent sector errors and checksum errors proactively.
291
- /// * Fsync failures can be recovered from correctly.
292
- /// WARNING: Disabling direct I/O is unsafe; the page cache cannot be trusted after an fsync error,
293
- /// even after an application panic, since the kernel will mark dirty pages as clean, even
294
- /// when they were never written to disk.
295
- pub const direct_io = config.process.direct_io;
296
- pub const direct_io_required = config.process.direct_io_required;
297
-
298
- // TODO Add in the Grid's IOPS and the upper-bound that the Superblock will use.
299
- pub const iops_read_max = journal_iops_read_max;
300
- pub const iops_write_max = journal_iops_write_max;
301
-
302
- /// The maximum number of concurrent WAL read I/O operations to allow at once.
303
- pub const journal_iops_read_max = config.process.journal_iops_read_max;
304
- /// The maximum number of concurrent WAL write I/O operations to allow at once.
305
- /// Ideally this is at least as high as pipeline_prepare_queue_max, but it is safe to be lower.
306
- pub const journal_iops_write_max = config.process.journal_iops_write_max;
307
-
308
- /// The number of redundant copies of the superblock in the superblock storage zone.
309
- /// This must be either { 4, 6, 8 }, i.e. an even number, for more efficient flexible quorums.
310
- ///
311
- /// The superblock contains local state for the replica and therefore cannot be replicated remotely.
312
- /// Loss of the superblock would represent loss of the replica and so it must be protected.
313
- /// Since each superblock copy also copies the superblock trailer (around 33 MiB), setting this
314
- /// beyond 4 copies (or decreasing block_size < 64 KiB) can result in a superblock zone > 264 MiB.
315
- ///
316
- /// This can mean checkpointing latencies in the rare extreme worst-case of at most 264ms, although
317
- /// this would require EWAH compression of our block free set to have zero effective compression.
318
- /// In practice, checkpointing latency should be an order of magnitude better due to compression,
319
- /// because our block free set will fill holes when allocating.
320
- ///
321
- /// The superblock only needs to be checkpointed every now and then, before the WAL wraps around,
322
- /// or when a view change needs to take place to elect a new primary.
323
- pub const superblock_copies = config.cluster.superblock_copies;
324
-
325
- comptime {
326
- assert(superblock_copies % 2 == 0);
327
- assert(superblock_copies >= 4);
328
- assert(superblock_copies <= 8);
329
- }
330
-
331
- /// The maximum size of a local data file.
332
- /// This should not be much larger than several TiB to limit:
333
- /// * blast radius and recovery time when a whole replica is lost,
334
- /// * replicated storage overhead, since all data files are mirrored,
335
- /// * the size of the superblock storage zone, and
336
- /// * the static memory allocation required for tracking LSM forest metadata in memory.
337
- pub const storage_size_max = config.cluster.storage_size_max;
338
-
339
- /// The unit of read/write access to LSM manifest and LSM table blocks in the block storage zone.
340
- ///
341
- /// - A lower block size increases the memory overhead of table metadata, due to smaller/more tables.
342
- /// - A higher block size increases space amplification due to partially-filled blocks.
343
- pub const block_size = config.cluster.block_size;
344
-
345
- comptime {
346
- assert(block_size % sector_size == 0);
347
- }
348
-
349
- /// The number of levels in an LSM tree.
350
- /// A higher number of levels increases read amplification, as well as total storage capacity.
351
- pub const lsm_levels = config.cluster.lsm_levels;
352
-
353
- comptime {
354
- // ManifestLog serializes the level as a u7.
355
- assert(lsm_levels > 0);
356
- assert(lsm_levels <= std.math.maxInt(u7));
357
- }
358
-
359
- /// The number of tables at level i (0 ≤ i < lsm_levels) is `pow(lsm_growth_factor, i+1)`.
360
- /// A higher growth factor increases write amplification (by increasing the number of tables in
361
- /// level B that overlap a table in level A in a compaction), but decreases read amplification (by
362
- /// reducing the height of the tree and thus the number of levels that must be probed). Since read
363
- /// amplification can be optimized more easily (with filters and caching), we target a growth
364
- /// factor of 8 for lower write amplification rather than the more typical growth factor of 10.
365
- pub const lsm_growth_factor = config.cluster.lsm_growth_factor;
366
-
367
- /// Size of nodes used by the LSM tree manifest implementation.
368
- /// TODO Double-check this with our "LSM Manifest" spreadsheet.
369
- pub const lsm_manifest_node_size = config.process.lsm_manifest_node_size;
370
-
371
- /// A multiple of batch inserts that a mutable table can definitely accommodate before flushing.
372
- /// For example, if a message_size_max batch can contain at most 8181 transfers then a multiple of 4
373
- /// means that the transfer tree's mutable table will be sized to 8191 * 4 = 32764 transfers.
374
- pub const lsm_batch_multiple = config.cluster.lsm_batch_multiple;
375
-
376
- comptime {
377
- // The LSM tree uses half-measures to balance compaction.
378
- assert(lsm_batch_multiple % 2 == 0);
379
- }
380
-
381
- pub const lsm_snapshots_max = config.cluster.lsm_snapshots_max;
382
-
383
- pub const lsm_value_to_key_layout_ratio_min = config.cluster.lsm_value_to_key_layout_ratio_min;
384
-
385
- /// The number of milliseconds between each replica tick, the basic unit of time in TigerBeetle.
386
- /// Used to regulate heartbeats, retries and timeouts, all specified as multiples of a tick.
387
- pub const tick_ms = config.process.tick_ms;
388
-
389
- /// The conservative round-trip time at startup when there is no network knowledge.
390
- /// Adjusted dynamically thereafter for RTT-sensitive timeouts according to network congestion.
391
- /// This should be set higher rather than lower to avoid flooding the network at startup.
392
- pub const rtt_ticks = config.process.rtt_ms / tick_ms;
393
-
394
- /// The multiple of round-trip time for RTT-sensitive timeouts.
395
- pub const rtt_multiple = 2;
396
-
397
- /// The min/max bounds of exponential backoff (and jitter) to add to RTT-sensitive timeouts.
398
- pub const backoff_min_ticks = config.process.backoff_min_ms / tick_ms;
399
- pub const backoff_max_ticks = config.process.backoff_max_ms / tick_ms;
400
-
401
- /// The maximum skew between two clocks to allow when considering them to be in agreement.
402
- /// The principle is that no two clocks tick exactly alike but some clocks more or less agree.
403
- /// The maximum skew across the cluster as a whole is this value times the total number of clocks.
404
- /// The cluster will be unavailable if the majority of clocks are all further than this value apart.
405
- /// Decreasing this reduces the probability of reaching agreement on synchronized time.
406
- /// Increasing this reduces the accuracy of synchronized time.
407
- pub const clock_offset_tolerance_max_ms = config.process.clock_offset_tolerance_max_ms;
408
-
409
- /// The amount of time before the clock's synchronized epoch is expired.
410
- /// If the epoch is expired before it can be replaced with a new synchronized epoch, then this most
411
- /// likely indicates either a network partition or else too many clock faults across the cluster.
412
- /// A new synchronized epoch will be installed as soon as these conditions resolve.
413
- pub const clock_epoch_max_ms = config.process.clock_epoch_max_ms;
414
-
415
- /// The amount of time to wait for enough accurate samples before synchronizing the clock.
416
- /// The more samples we can take per remote clock source, the more accurate our estimation becomes.
417
- /// This impacts cluster startup time as the primary must first wait for synchronization to complete.
418
- pub const clock_synchronization_window_min_ms = config.process.clock_synchronization_window_min_ms;
419
-
420
- /// The amount of time without agreement before the clock window is expired and a new window opened.
421
- /// This happens where some samples have been collected but not enough to reach agreement.
422
- /// The quality of samples degrades as they age so at some point we throw them away and start over.
423
- /// This eliminates the impact of gradual clock drift on our clock offset (clock skew) measurements.
424
- /// If a window expires because of this then it is likely that the clock epoch will also be expired.
425
- pub const clock_synchronization_window_max_ms = config.process.clock_synchronization_window_max_ms;
426
-
427
- /// Whether to perform intensive online verification.
428
- pub const verify = config.process.verify;