tigerbeetle-node 0.11.12 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/README.md +212 -196
  2. package/dist/bin/aarch64-linux-gnu/client.node +0 -0
  3. package/dist/bin/aarch64-linux-musl/client.node +0 -0
  4. package/dist/bin/aarch64-macos/client.node +0 -0
  5. package/dist/bin/x86_64-linux-gnu/client.node +0 -0
  6. package/dist/bin/x86_64-linux-musl/client.node +0 -0
  7. package/dist/bin/x86_64-macos/client.node +0 -0
  8. package/dist/index.js +33 -1
  9. package/dist/index.js.map +1 -1
  10. package/package-lock.json +66 -0
  11. package/package.json +8 -17
  12. package/src/index.ts +56 -1
  13. package/src/node.zig +10 -9
  14. package/dist/.client.node.sha256 +0 -1
  15. package/scripts/build_lib.sh +0 -61
  16. package/scripts/download_node_headers.sh +0 -32
  17. package/src/tigerbeetle/scripts/benchmark.bat +0 -48
  18. package/src/tigerbeetle/scripts/benchmark.sh +0 -66
  19. package/src/tigerbeetle/scripts/confirm_image.sh +0 -44
  20. package/src/tigerbeetle/scripts/fuzz_loop.sh +0 -15
  21. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +0 -7
  22. package/src/tigerbeetle/scripts/install.bat +0 -7
  23. package/src/tigerbeetle/scripts/install.sh +0 -21
  24. package/src/tigerbeetle/scripts/install_zig.bat +0 -113
  25. package/src/tigerbeetle/scripts/install_zig.sh +0 -90
  26. package/src/tigerbeetle/scripts/lint.zig +0 -199
  27. package/src/tigerbeetle/scripts/pre-commit.sh +0 -9
  28. package/src/tigerbeetle/scripts/scripts/benchmark.bat +0 -48
  29. package/src/tigerbeetle/scripts/scripts/benchmark.sh +0 -66
  30. package/src/tigerbeetle/scripts/scripts/confirm_image.sh +0 -44
  31. package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +0 -15
  32. package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +0 -7
  33. package/src/tigerbeetle/scripts/scripts/install.bat +0 -7
  34. package/src/tigerbeetle/scripts/scripts/install.sh +0 -21
  35. package/src/tigerbeetle/scripts/scripts/install_zig.bat +0 -113
  36. package/src/tigerbeetle/scripts/scripts/install_zig.sh +0 -90
  37. package/src/tigerbeetle/scripts/scripts/lint.zig +0 -199
  38. package/src/tigerbeetle/scripts/scripts/pre-commit.sh +0 -9
  39. package/src/tigerbeetle/scripts/scripts/shellcheck.sh +0 -5
  40. package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +0 -10
  41. package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +0 -14
  42. package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +0 -48
  43. package/src/tigerbeetle/scripts/scripts/validate_docs.sh +0 -23
  44. package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +0 -46
  45. package/src/tigerbeetle/scripts/shellcheck.sh +0 -5
  46. package/src/tigerbeetle/scripts/tests_on_alpine.sh +0 -10
  47. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +0 -14
  48. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -48
  49. package/src/tigerbeetle/scripts/validate_docs.sh +0 -23
  50. package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
  51. package/src/tigerbeetle/src/benchmark.zig +0 -314
  52. package/src/tigerbeetle/src/config.zig +0 -234
  53. package/src/tigerbeetle/src/constants.zig +0 -436
  54. package/src/tigerbeetle/src/ewah.zig +0 -286
  55. package/src/tigerbeetle/src/ewah_benchmark.zig +0 -120
  56. package/src/tigerbeetle/src/ewah_fuzz.zig +0 -130
  57. package/src/tigerbeetle/src/fifo.zig +0 -120
  58. package/src/tigerbeetle/src/io/benchmark.zig +0 -213
  59. package/src/tigerbeetle/src/io/darwin.zig +0 -814
  60. package/src/tigerbeetle/src/io/linux.zig +0 -1062
  61. package/src/tigerbeetle/src/io/test.zig +0 -643
  62. package/src/tigerbeetle/src/io/windows.zig +0 -1183
  63. package/src/tigerbeetle/src/io.zig +0 -34
  64. package/src/tigerbeetle/src/iops.zig +0 -107
  65. package/src/tigerbeetle/src/lsm/README.md +0 -308
  66. package/src/tigerbeetle/src/lsm/binary_search.zig +0 -341
  67. package/src/tigerbeetle/src/lsm/bloom_filter.zig +0 -125
  68. package/src/tigerbeetle/src/lsm/compaction.zig +0 -603
  69. package/src/tigerbeetle/src/lsm/composite_key.zig +0 -77
  70. package/src/tigerbeetle/src/lsm/direction.zig +0 -11
  71. package/src/tigerbeetle/src/lsm/eytzinger.zig +0 -587
  72. package/src/tigerbeetle/src/lsm/eytzinger_benchmark.zig +0 -330
  73. package/src/tigerbeetle/src/lsm/forest.zig +0 -204
  74. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -401
  75. package/src/tigerbeetle/src/lsm/grid.zig +0 -573
  76. package/src/tigerbeetle/src/lsm/groove.zig +0 -972
  77. package/src/tigerbeetle/src/lsm/k_way_merge.zig +0 -474
  78. package/src/tigerbeetle/src/lsm/level_iterator.zig +0 -332
  79. package/src/tigerbeetle/src/lsm/manifest.zig +0 -617
  80. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -877
  81. package/src/tigerbeetle/src/lsm/manifest_log.zig +0 -789
  82. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -691
  83. package/src/tigerbeetle/src/lsm/merge_iterator.zig +0 -106
  84. package/src/tigerbeetle/src/lsm/node_pool.zig +0 -235
  85. package/src/tigerbeetle/src/lsm/posted_groove.zig +0 -378
  86. package/src/tigerbeetle/src/lsm/segmented_array.zig +0 -1328
  87. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -148
  88. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -9
  89. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +0 -850
  90. package/src/tigerbeetle/src/lsm/table.zig +0 -1031
  91. package/src/tigerbeetle/src/lsm/table_immutable.zig +0 -203
  92. package/src/tigerbeetle/src/lsm/table_iterator.zig +0 -340
  93. package/src/tigerbeetle/src/lsm/table_mutable.zig +0 -220
  94. package/src/tigerbeetle/src/lsm/test.zig +0 -438
  95. package/src/tigerbeetle/src/lsm/tree.zig +0 -1193
  96. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -474
  97. package/src/tigerbeetle/src/message_bus.zig +0 -1012
  98. package/src/tigerbeetle/src/message_pool.zig +0 -156
  99. package/src/tigerbeetle/src/ring_buffer.zig +0 -399
  100. package/src/tigerbeetle/src/simulator.zig +0 -569
  101. package/src/tigerbeetle/src/state_machine/auditor.zig +0 -577
  102. package/src/tigerbeetle/src/state_machine/workload.zig +0 -883
  103. package/src/tigerbeetle/src/state_machine.zig +0 -1881
  104. package/src/tigerbeetle/src/static_allocator.zig +0 -65
  105. package/src/tigerbeetle/src/stdx.zig +0 -162
  106. package/src/tigerbeetle/src/storage.zig +0 -393
  107. package/src/tigerbeetle/src/testing/cluster/message_bus.zig +0 -82
  108. package/src/tigerbeetle/src/testing/cluster/network.zig +0 -237
  109. package/src/tigerbeetle/src/testing/cluster/state_checker.zig +0 -169
  110. package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -202
  111. package/src/tigerbeetle/src/testing/cluster.zig +0 -443
  112. package/src/tigerbeetle/src/testing/fuzz.zig +0 -140
  113. package/src/tigerbeetle/src/testing/hash_log.zig +0 -66
  114. package/src/tigerbeetle/src/testing/id.zig +0 -99
  115. package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -364
  116. package/src/tigerbeetle/src/testing/priority_queue.zig +0 -645
  117. package/src/tigerbeetle/src/testing/reply_sequence.zig +0 -139
  118. package/src/tigerbeetle/src/testing/state_machine.zig +0 -249
  119. package/src/tigerbeetle/src/testing/storage.zig +0 -757
  120. package/src/tigerbeetle/src/testing/table.zig +0 -247
  121. package/src/tigerbeetle/src/testing/time.zig +0 -84
  122. package/src/tigerbeetle/src/tigerbeetle.zig +0 -227
  123. package/src/tigerbeetle/src/time.zig +0 -112
  124. package/src/tigerbeetle/src/tracer.zig +0 -529
  125. package/src/tigerbeetle/src/unit_tests.zig +0 -42
  126. package/src/tigerbeetle/src/vopr.zig +0 -495
  127. package/src/tigerbeetle/src/vsr/README.md +0 -209
  128. package/src/tigerbeetle/src/vsr/client.zig +0 -544
  129. package/src/tigerbeetle/src/vsr/clock.zig +0 -853
  130. package/src/tigerbeetle/src/vsr/journal.zig +0 -2413
  131. package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +0 -111
  132. package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
  133. package/src/tigerbeetle/src/vsr/replica.zig +0 -6381
  134. package/src/tigerbeetle/src/vsr/replica_format.zig +0 -219
  135. package/src/tigerbeetle/src/vsr/superblock.zig +0 -1631
  136. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +0 -256
  137. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +0 -929
  138. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +0 -334
  139. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -390
  140. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +0 -615
  141. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +0 -394
  142. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -314
  143. package/src/tigerbeetle/src/vsr.zig +0 -1352
@@ -1,436 +0,0 @@
1
- //! Constants are the configuration that the code actually imports — they include:
2
- //! - all of the configuration values (flattened)
3
- //! - derived configuration values,
4
-
5
- const std = @import("std");
6
- const assert = std.debug.assert;
7
- const vsr = @import("vsr.zig");
8
- const tracer = @import("tracer.zig");
9
- const Config = @import("config.zig").Config;
10
- const config = @import("config.zig").configs.current;
11
-
12
- /// The maximum log level.
13
- /// One of: .err, .warn, .info, .debug
14
- pub const log_level: std.log.Level = config.process.log_level;
15
-
16
- pub const log = if (tracer_backend == .tracy)
17
- tracer.log_fn
18
- else
19
- std.log.defaultLog;
20
-
21
- // Which backend to use for ./tracer.zig.
22
- // Default is `.none`.
23
- pub const tracer_backend = config.process.tracer_backend;
24
-
25
- // Which mode to use for ./testing/hash_log.zig.
26
- pub const hash_log_mode = config.process.hash_log_mode;
27
-
28
- /// The maximum number of replicas allowed in a cluster.
29
- pub const replicas_max = 6;
30
-
31
- /// The maximum number of clients allowed per cluster, where each client has a unique 128-bit ID.
32
- /// This impacts the amount of memory allocated at initialization by the server.
33
- /// This determines the size of the VR client table used to cache replies to clients by client ID.
34
- /// Each client has one entry in the VR client table to store the latest `message_size_max` reply.
35
- pub const clients_max = config.cluster.clients_max;
36
-
37
- comptime {
38
- assert(clients_max >= Config.Cluster.clients_max_min);
39
- }
40
-
41
- /// The minimum number of nodes required to form a quorum for replication:
42
- /// Majority quorums are only required across view change and replication phases (not within).
43
- /// As per Flexible Paxos, provided `quorum_replication + quorum_view_change > replicas`:
44
- /// 1. you may increase `quorum_view_change` above a majority, so that
45
- /// 2. you can decrease `quorum_replication` below a majority, to optimize the common case.
46
- /// This improves latency by reducing the number of nodes required for synchronous replication.
47
- /// This reduces redundancy only in the short term, asynchronous replication will still continue.
48
- /// The size of the replication quorum is limited to the minimum of this value and actual majority.
49
- /// The size of the view change quorum will then be automatically inferred from quorum_replication.
50
- pub const quorum_replication_max = config.cluster.quorum_replication_max;
51
-
52
- /// The default server port to listen on if not specified in `--addresses`:
53
- pub const port = config.process.port;
54
-
55
- /// The default network interface address to listen on if not specified in `--addresses`:
56
- /// WARNING: Binding to all interfaces with "0.0.0.0" is dangerous and opens the server to anyone.
57
- /// Bind to the "127.0.0.1" loopback address to accept local connections as a safe default only.
58
- pub const address = config.process.address;
59
-
60
- comptime {
61
- // vsr.parse_address assumes that config.address/config.port are valid.
62
- _ = std.net.Address.parseIp4(address, 0) catch unreachable;
63
- _ = @as(u16, port);
64
- }
65
-
66
- /// The default maximum amount of memory to use.
67
- pub const memory_size_max_default = config.process.memory_size_max_default;
68
-
69
- /// At a high level, priority for object caching is (in descending order):
70
- ///
71
- /// 1. Accounts.
72
- /// - 2 lookups per created transfer
73
- /// - high temporal locality
74
- /// - positive expected result
75
- /// 2. Posted transfers.
76
- /// - high temporal locality
77
- /// - positive expected result
78
- /// 3. Transfers. Generally don't cache these because of:
79
- /// - low temporal locality
80
- /// - negative expected result
81
- ///
82
- /// The maximum number of accounts to store in memory:
83
- /// This impacts the amount of memory allocated at initialization by the server.
84
- pub const cache_accounts_max = config.process.cache_accounts_max;
85
-
86
- /// The maximum number of transfers to store in memory:
87
- /// This impacts the amount of memory allocated at initialization by the server.
88
- /// We allocate more capacity than the number of transfers for a safe hash table load factor.
89
- pub const cache_transfers_max = config.process.cache_transfers_max;
90
-
91
- /// The maximum number of two-phase transfers to store in memory:
92
- /// This impacts the amount of memory allocated at initialization by the server.
93
- pub const cache_transfers_posted_max = config.process.cache_transfers_posted_max;
94
-
95
- comptime {
96
- // SetAssociativeCache requires a power-of-two cardinality.
97
- assert(cache_accounts_max == 0 or std.math.isPowerOfTwo(cache_accounts_max));
98
- assert(cache_transfers_max == 0 or std.math.isPowerOfTwo(cache_transfers_max));
99
- assert(cache_transfers_posted_max == 0 or std.math.isPowerOfTwo(cache_transfers_posted_max));
100
- }
101
-
102
- /// The maximum number of batch entries in the journal file:
103
- /// A batch entry may contain many transfers, so this is not a limit on the number of transfers.
104
- /// We need this limit to allocate space for copies of batch headers at the start of the journal.
105
- /// These header copies enable us to disentangle corruption from crashes and recover accordingly.
106
- pub const journal_slot_count = config.cluster.journal_slot_count;
107
-
108
- /// The maximum size of the journal file:
109
- /// This is pre-allocated and zeroed for performance when initialized.
110
- /// Writes within this file never extend the filesystem inode size reducing the cost of fdatasync().
111
- /// This enables static allocation of disk space so that appends cannot fail with ENOSPC.
112
- /// This also enables us to detect filesystem inode corruption that would change the journal size.
113
- // TODO remove this; just allocate a part of the total storage for the journal
114
- pub const journal_size_max = journal_size_headers + journal_size_prepares;
115
- pub const journal_size_headers = journal_slot_count * @sizeOf(vsr.Header);
116
- pub const journal_size_prepares = journal_slot_count * message_size_max;
117
-
118
- comptime {
119
- // For the given WAL (lsm_batch_multiple=4):
120
- //
121
- // A B C D E
122
- // |····|····|····|····|
123
- //
124
- // - ("|" delineates measures, where a measure is a multiple of prepare batches.)
125
- // - ("·" is a prepare in the WAL.)
126
- // - The Replica triggers a checkpoint at "E".
127
- // - The entries between "A" and "D" are on-disk in level 0.
128
- // - The entries between "D" and "E" are in-memory in the immutable table.
129
- // - So the checkpoint only includes "A…D".
130
- //
131
- // The journal must have at least two measures (batches) to ensure at least one is checkpointed.
132
- assert(journal_slot_count >= Config.Cluster.journal_slot_count_min);
133
- assert(journal_slot_count >= lsm_batch_multiple * 2);
134
- assert(journal_slot_count % lsm_batch_multiple == 0);
135
- assert(journal_slot_count > pipeline_prepare_queue_max);
136
-
137
- assert(journal_size_max == journal_size_headers + journal_size_prepares);
138
- }
139
-
140
- /// The maximum number of connections that can be held open by the server at any time:
141
- pub const connections_max = replicas_max + clients_max;
142
-
143
- /// The maximum size of a message in bytes:
144
- /// This is also the limit of all inflight data across multiple pipelined requests per connection.
145
- /// We may have one request of up to 2 MiB inflight or 2 pipelined requests of up to 1 MiB inflight.
146
- /// This impacts sequential disk write throughput, the larger the buffer the better.
147
- /// 2 MiB is 16,384 transfers, and a reasonable choice for sequential disk write throughput.
148
- /// However, this impacts bufferbloat and head-of-line blocking latency for pipelined requests.
149
- /// For a 1 Gbps NIC = 125 MiB/s throughput: 2 MiB / 125 * 1000ms = 16ms for the next request.
150
- /// This impacts the amount of memory allocated at initialization by the server.
151
- pub const message_size_max = config.cluster.message_size_max;
152
- pub const message_body_size_max = message_size_max - @sizeOf(vsr.Header);
153
-
154
- comptime {
155
- // The WAL format requires messages to be a multiple of the sector size.
156
- assert(message_size_max % sector_size == 0);
157
- assert(message_size_max >= @sizeOf(vsr.Header));
158
- assert(message_size_max >= sector_size);
159
- assert(message_size_max >= Config.Cluster.message_size_max_min(clients_max));
160
-
161
- // Ensure that DVC/SV messages can fit all necessary headers.
162
- assert(message_body_size_max >= view_change_headers_max * @sizeOf(vsr.Header));
163
- }
164
-
165
- /// The maximum number of Viewstamped Replication prepare messages that can be inflight at a time.
166
- /// This is immutable once assigned per cluster, as replicas need to know how many operations might
167
- /// possibly be uncommitted during a view change, and this must be constant for all replicas.
168
- pub const pipeline_prepare_queue_max = config.cluster.pipeline_prepare_queue_max;
169
-
170
- /// The maximum number of Viewstamped Replication request messages that can be queued at a primary,
171
- /// waiting to prepare.
172
- // TODO(Zig): After 0.10, change this to simply "clients_max -| pipeline_prepare_queue_max".
173
- // In Zig 0.9 compilation fails with "operation caused overflow" despite the saturating subtraction.
174
- // See: https://github.com/ziglang/zig/issues/10870
175
- pub const pipeline_request_queue_max =
176
- if (clients_max < pipeline_prepare_queue_max)
177
- 0
178
- else
179
- clients_max - pipeline_prepare_queue_max;
180
-
181
- comptime {
182
- // A prepare-queue capacity larger than clients_max is wasted.
183
- assert(pipeline_prepare_queue_max <= clients_max);
184
- // A total queue capacity larger than clients_max is wasted.
185
- assert(pipeline_prepare_queue_max + pipeline_request_queue_max <= clients_max);
186
- assert(pipeline_prepare_queue_max > 0);
187
- assert(pipeline_request_queue_max >= 0);
188
- }
189
-
190
- /// The number of prepare headers to include in the body of a DVC/SV.
191
- ///
192
- /// CRITICAL:
193
- /// We must provide enough headers to cover all uncommitted headers so that the new
194
- /// primary (if we are in a view change) can decide whether to discard uncommitted headers
195
- /// that cannot be repaired because they are gaps. See DVCQuorum for more detail.
196
- pub const view_change_headers_max = config.cluster.view_change_headers_max;
197
-
198
- comptime {
199
- assert(view_change_headers_max > 0);
200
- assert(view_change_headers_max >= pipeline_prepare_queue_max);
201
- assert(view_change_headers_max <= journal_slot_count);
202
- assert(view_change_headers_max <= @divFloor(message_body_size_max, @sizeOf(vsr.Header)));
203
- }
204
-
205
- /// The minimum and maximum amount of time in milliseconds to wait before initiating a connection.
206
- /// Exponential backoff and jitter are applied within this range.
207
- pub const connection_delay_min_ms = config.process.connection_delay_min_ms;
208
- pub const connection_delay_max_ms = config.process.connection_delay_max_ms;
209
-
210
- /// The maximum number of outgoing messages that may be queued on a replica connection.
211
- pub const connection_send_queue_max_replica = std.math.max(std.math.min(clients_max, 4), 2);
212
-
213
- /// The maximum number of outgoing messages that may be queued on a client connection.
214
- /// The client has one in-flight request, and occasionally a ping.
215
- pub const connection_send_queue_max_client = 2;
216
-
217
- /// The maximum number of outgoing requests that may be queued on a client (including the in-flight request).
218
- pub const client_request_queue_max = config.process.client_request_queue_max;
219
-
220
- /// The maximum number of connections in the kernel's complete connection queue pending an accept():
221
- /// If the backlog argument is greater than the value in `/proc/sys/net/core/somaxconn`, then it is
222
- /// silently truncated to that value. Since Linux 5.4, the default in this file is 4096.
223
- pub const tcp_backlog = config.process.tcp_backlog;
224
-
225
- /// The maximum size of a kernel socket receive buffer in bytes (or 0 to use the system default):
226
- /// This sets SO_RCVBUF as an alternative to the auto-tuning range in /proc/sys/net/ipv4/tcp_rmem.
227
- /// The value is limited by /proc/sys/net/core/rmem_max, unless the CAP_NET_ADMIN privilege exists.
228
- /// The kernel doubles this value to allow space for packet bookkeeping overhead.
229
- /// The receive buffer should ideally exceed the Bandwidth-Delay Product for maximum throughput.
230
- /// At the same time, be careful going beyond 4 MiB as the kernel may merge many small TCP packets,
231
- /// causing considerable latency spikes for large buffer sizes:
232
- /// https://blog.cloudflare.com/the-story-of-one-latency-spike/
233
- pub const tcp_rcvbuf = config.process.tcp_rcvbuf;
234
-
235
- /// The maximum size of a kernel socket send buffer in bytes (or 0 to use the system default):
236
- /// This sets SO_SNDBUF as an alternative to the auto-tuning range in /proc/sys/net/ipv4/tcp_wmem.
237
- /// The value is limited by /proc/sys/net/core/wmem_max, unless the CAP_NET_ADMIN privilege exists.
238
- /// The kernel doubles this value to allow space for packet bookkeeping overhead.
239
- pub const tcp_sndbuf_replica = connection_send_queue_max_replica * message_size_max;
240
- pub const tcp_sndbuf_client = connection_send_queue_max_client * message_size_max;
241
-
242
- comptime {
243
- // Avoid latency issues from setting sndbuf too high:
244
- assert(tcp_sndbuf_replica <= 16 * 1024 * 1024);
245
- assert(tcp_sndbuf_client <= 16 * 1024 * 1024);
246
- }
247
-
248
- /// Whether to enable TCP keepalive:
249
- pub const tcp_keepalive = config.process.tcp_keepalive;
250
-
251
- /// The time (in seconds) the connection needs to be idle before sending TCP keepalive probes:
252
- /// Probes are not sent when the send buffer has data or the congestion window size is zero,
253
- /// for these cases we also need tcp_user_timeout_ms below.
254
- pub const tcp_keepidle = config.process.tcp_keepidle;
255
-
256
- /// The time (in seconds) between individual keepalive probes:
257
- pub const tcp_keepintvl = config.process.tcp_keepintvl;
258
-
259
- /// The maximum number of keepalive probes to send before dropping the connection:
260
- pub const tcp_keepcnt = config.process.tcp_keepcnt;
261
-
262
- /// The time (in milliseconds) to timeout an idle connection or unacknowledged send:
263
- /// This timer rides on the granularity of the keepalive or retransmission timers.
264
- /// For example, if keepalive will only send a probe after 10s then this becomes the lower bound
265
- /// for tcp_user_timeout_ms to fire, even if tcp_user_timeout_ms is 2s. Nevertheless, this would timeout
266
- /// the connection at 10s rather than wait for tcp_keepcnt probes to be sent. At the same time, if
267
- /// tcp_user_timeout_ms is larger than the max keepalive time then tcp_keepcnt will be ignored and
268
- /// more keepalive probes will be sent until tcp_user_timeout_ms fires.
269
- /// For a thorough overview of how these settings interact:
270
- /// https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/
271
- pub const tcp_user_timeout_ms = (tcp_keepidle + tcp_keepintvl * tcp_keepcnt) * 1000;
272
-
273
- /// Whether to disable Nagle's algorithm to eliminate send buffering delays:
274
- pub const tcp_nodelay = config.process.tcp_nodelay;
275
-
276
- /// Size of a CPU cache line in bytes
277
- pub const cache_line_size = config.cluster.cache_line_size;
278
-
279
- /// The minimum size of an aligned kernel page and an Advanced Format disk sector:
280
- /// This is necessary for direct I/O without the kernel having to fix unaligned pages with a copy.
281
- /// The new Advanced Format sector size is backwards compatible with the old 512 byte sector size.
282
- /// This should therefore never be less than 4 KiB to be future-proof when server disks are swapped.
283
- pub const sector_size = 4096;
284
-
285
- /// Whether to perform direct I/O to the underlying disk device:
286
- /// This enables several performance optimizations:
287
- /// * A memory copy to the kernel's page cache can be eliminated for reduced CPU utilization.
288
- /// * I/O can be issued immediately to the disk device without buffering delay for improved latency.
289
- /// This also enables several safety features:
290
- /// * Disk data can be scrubbed to repair latent sector errors and checksum errors proactively.
291
- /// * Fsync failures can be recovered from correctly.
292
- /// WARNING: Disabling direct I/O is unsafe; the page cache cannot be trusted after an fsync error,
293
- /// even after an application panic, since the kernel will mark dirty pages as clean, even
294
- /// when they were never written to disk.
295
- pub const direct_io = config.process.direct_io;
296
- pub const direct_io_required = config.process.direct_io_required;
297
-
298
- // TODO Add in the Grid's IOPS and the upper-bound that the Superblock will use.
299
- pub const iops_read_max = journal_iops_read_max;
300
- pub const iops_write_max = journal_iops_write_max;
301
-
302
- /// The maximum number of concurrent WAL read I/O operations to allow at once.
303
- pub const journal_iops_read_max = config.process.journal_iops_read_max;
304
- /// The maximum number of concurrent WAL write I/O operations to allow at once.
305
- /// Ideally this is at least as high as pipeline_prepare_queue_max, but it is safe to be lower.
306
- pub const journal_iops_write_max = config.process.journal_iops_write_max;
307
-
308
- /// The number of redundant copies of the superblock in the superblock storage zone.
309
- /// This must be either { 4, 6, 8 }, i.e. an even number, for more efficient flexible quorums.
310
- ///
311
- /// The superblock contains local state for the replica and therefore cannot be replicated remotely.
312
- /// Loss of the superblock would represent loss of the replica and so it must be protected.
313
- /// Since each superblock copy also copies the superblock trailer (around 33 MiB), setting this
314
- /// beyond 4 copies (or decreasing block_size < 64 KiB) can result in a superblock zone > 264 MiB.
315
- ///
316
- /// This can mean checkpointing latencies in the rare extreme worst-case of at most 264ms, although
317
- /// this would require EWAH compression of our block free set to have zero effective compression.
318
- /// In practice, checkpointing latency should be an order of magnitude better due to compression,
319
- /// because our block free set will fill holes when allocating.
320
- ///
321
- /// The superblock only needs to be checkpointed every now and then, before the WAL wraps around,
322
- /// or when a view change needs to take place to elect a new primary.
323
- pub const superblock_copies = config.cluster.superblock_copies;
324
-
325
- comptime {
326
- assert(superblock_copies % 2 == 0);
327
- assert(superblock_copies >= 4);
328
- assert(superblock_copies <= 8);
329
- }
330
-
331
- /// The maximum size of a local data file.
332
- /// This should not be much larger than several TiB to limit:
333
- /// * blast radius and recovery time when a whole replica is lost,
334
- /// * replicated storage overhead, since all data files are mirrored,
335
- /// * the size of the superblock storage zone, and
336
- /// * the static memory allocation required for tracking LSM forest metadata in memory.
337
- pub const storage_size_max = config.cluster.storage_size_max;
338
-
339
- /// The unit of read/write access to LSM manifest and LSM table blocks in the block storage zone.
340
- ///
341
- /// - A lower block size increases the memory overhead of table metadata, due to smaller/more tables.
342
- /// - A higher block size increases space amplification due to partially-filled blocks.
343
- pub const block_size = config.cluster.block_size;
344
-
345
- comptime {
346
- assert(block_size % sector_size == 0);
347
- assert(lsm_table_size_max % sector_size == 0);
348
- assert(lsm_table_size_max % block_size == 0);
349
- }
350
-
351
- /// The number of levels in an LSM tree.
352
- /// A higher number of levels increases read amplification, as well as total storage capacity.
353
- pub const lsm_levels = config.cluster.lsm_levels;
354
-
355
- comptime {
356
- // ManifestLog serializes the level as a u7.
357
- assert(lsm_levels > 0);
358
- assert(lsm_levels <= std.math.maxInt(u7));
359
- }
360
-
361
- /// The number of tables at level i (0 ≤ i < lsm_levels) is `pow(lsm_growth_factor, i+1)`.
362
- /// A higher growth factor increases write amplification (by increasing the number of tables in
363
- /// level B that overlap a table in level A in a compaction), but decreases read amplification (by
364
- /// reducing the height of the tree and thus the number of levels that must be probed). Since read
365
- /// amplification can be optimized more easily (with filters and caching), we target a growth
366
- /// factor of 8 for lower write amplification rather than the more typical growth factor of 10.
367
- pub const lsm_growth_factor = config.cluster.lsm_growth_factor;
368
-
369
- /// The maximum cumulative size of a table — computed as the sum of the size of the index block,
370
- /// filter blocks, and data blocks.
371
- pub const lsm_table_size_max = config.cluster.lsm_table_size_max;
372
-
373
- /// Size of nodes used by the LSM tree manifest implementation.
374
- /// TODO Double-check this with our "LSM Manifest" spreadsheet.
375
- pub const lsm_manifest_node_size = config.process.lsm_manifest_node_size;
376
-
377
- /// A multiple of batch inserts that a mutable table can definitely accommodate before flushing.
378
- /// For example, if a message_size_max batch can contain at most 8181 transfers then a multiple of 4
379
- /// means that the transfer tree's mutable table will be sized to 8191 * 4 = 32764 transfers.
380
- /// TODO Assert this relative to lsm_table_size_max.
381
- /// We want to ensure that a mutable table can be converted to an immutable table without overflow.
382
- pub const lsm_batch_multiple = config.cluster.lsm_batch_multiple;
383
-
384
- comptime {
385
- // The LSM tree uses half-measures to balance compaction.
386
- assert(lsm_batch_multiple % 2 == 0);
387
- }
388
-
389
- pub const lsm_snapshots_max = config.cluster.lsm_snapshots_max;
390
-
391
- pub const lsm_value_to_key_layout_ratio_min = config.cluster.lsm_value_to_key_layout_ratio_min;
392
-
393
- /// The number of milliseconds between each replica tick, the basic unit of time in TigerBeetle.
394
- /// Used to regulate heartbeats, retries and timeouts, all specified as multiples of a tick.
395
- pub const tick_ms = config.process.tick_ms;
396
-
397
- /// The conservative round-trip time at startup when there is no network knowledge.
398
- /// Adjusted dynamically thereafter for RTT-sensitive timeouts according to network congestion.
399
- /// This should be set higher rather than lower to avoid flooding the network at startup.
400
- pub const rtt_ticks = config.process.rtt_ms / tick_ms;
401
-
402
- /// The multiple of round-trip time for RTT-sensitive timeouts.
403
- pub const rtt_multiple = 2;
404
-
405
- /// The min/max bounds of exponential backoff (and jitter) to add to RTT-sensitive timeouts.
406
- pub const backoff_min_ticks = config.process.backoff_min_ms / tick_ms;
407
- pub const backoff_max_ticks = config.process.backoff_max_ms / tick_ms;
408
-
409
- /// The maximum skew between two clocks to allow when considering them to be in agreement.
410
- /// The principle is that no two clocks tick exactly alike but some clocks more or less agree.
411
- /// The maximum skew across the cluster as a whole is this value times the total number of clocks.
412
- /// The cluster will be unavailable if the majority of clocks are all further than this value apart.
413
- /// Decreasing this reduces the probability of reaching agreement on synchronized time.
414
- /// Increasing this reduces the accuracy of synchronized time.
415
- pub const clock_offset_tolerance_max_ms = config.process.clock_offset_tolerance_max_ms;
416
-
417
- /// The amount of time before the clock's synchronized epoch is expired.
418
- /// If the epoch is expired before it can be replaced with a new synchronized epoch, then this most
419
- /// likely indicates either a network partition or else too many clock faults across the cluster.
420
- /// A new synchronized epoch will be installed as soon as these conditions resolve.
421
- pub const clock_epoch_max_ms = config.process.clock_epoch_max_ms;
422
-
423
- /// The amount of time to wait for enough accurate samples before synchronizing the clock.
424
- /// The more samples we can take per remote clock source, the more accurate our estimation becomes.
425
- /// This impacts cluster startup time as the primary must first wait for synchronization to complete.
426
- pub const clock_synchronization_window_min_ms = config.process.clock_synchronization_window_min_ms;
427
-
428
- /// The amount of time without agreement before the clock window is expired and a new window opened.
429
- /// This happens where some samples have been collected but not enough to reach agreement.
430
- /// The quality of samples degrades as they age so at some point we throw them away and start over.
431
- /// This eliminates the impact of gradual clock drift on our clock offset (clock skew) measurements.
432
- /// If a window expires because of this then it is likely that the clock epoch will also be expired.
433
- pub const clock_synchronization_window_max_ms = config.process.clock_synchronization_window_max_ms;
434
-
435
- /// Whether to perform intensive online verification.
436
- pub const verify = config.process.verify;