tigerbeetle 0.0.36 → 0.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/tb_client/extconf.rb +13 -13
- data/ext/tb_client/tigerbeetle/LICENSE +177 -0
- data/ext/tb_client/tigerbeetle/build.zig +2327 -0
- data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
- data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
- data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1084 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
- data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
- data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
- data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
- data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
- data/ext/tb_client/tigerbeetle/src/direction.zig +11 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
- data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
- data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
- data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
- data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
- data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
- data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
- data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
- data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
- data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
- data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
- data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
- data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
- data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
- data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +362 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +1036 -0
- data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
- data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
- data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +557 -0
- data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
- data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
- data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
- data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
- data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
- data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
- data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
- data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
- data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
- data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
- data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
- data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
- data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
- data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
- data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
- data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
- data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
- data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
- data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
- data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
- data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
- data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
- data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
- data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
- data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
- data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
- data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
- data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
- data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
- data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
- data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
- data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
- data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
- data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
- data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
- data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
- data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
- data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
- data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
- data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
- data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
- data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
- data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
- data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
- data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
- data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
- data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
- data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
- data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
- data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
- data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
- data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
- data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
- data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
- data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
- data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
- data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2910 -0
- data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
- data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
- data/lib/tb_client/shared_lib.rb +12 -5
- data/lib/tigerbeetle/platforms.rb +9 -0
- data/lib/tigerbeetle/version.rb +1 -1
- data/tigerbeetle.gemspec +22 -5
- metadata +242 -3
- data/ext/tb_client/pkg.tar.gz +0 -0
|
@@ -0,0 +1,790 @@
|
|
|
1
|
+
//! Constants are the configuration that the code actually imports — they include:
|
|
2
|
+
//! - all of the configuration values (flattened)
|
|
3
|
+
//! - derived configuration values,
|
|
4
|
+
|
|
5
|
+
const std = @import("std");
|
|
6
|
+
const assert = std.debug.assert;
|
|
7
|
+
const vsr = @import("vsr.zig");
|
|
8
|
+
const Config = @import("config.zig").Config;
|
|
9
|
+
const stdx = @import("stdx");
|
|
10
|
+
|
|
11
|
+
const MiB = stdx.MiB;
|
|
12
|
+
|
|
13
|
+
pub const config = @import("config.zig").configs.current;
|
|
14
|
+
|
|
15
|
+
pub const semver = std.SemanticVersion{
|
|
16
|
+
.major = config.process.release.triple().major,
|
|
17
|
+
.minor = config.process.release.triple().minor,
|
|
18
|
+
.patch = config.process.release.triple().patch,
|
|
19
|
+
.pre = null,
|
|
20
|
+
.build = if (config.process.git_commit) |sha_full| sha_full[0..7] else null,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/// The maximum number of replicas allowed in a cluster.
|
|
24
|
+
pub const replicas_max = 6;
|
|
25
|
+
/// The maximum number of standbys allowed in a cluster.
|
|
26
|
+
pub const standbys_max = 6;
|
|
27
|
+
/// The maximum number of cluster members (either standbys or active replicas).
|
|
28
|
+
pub const members_max = replicas_max + standbys_max;
|
|
29
|
+
|
|
30
|
+
/// All operations <vsr_operations_reserved are reserved for the control protocol.
|
|
31
|
+
/// All operations ≥vsr_operations_reserved are available for the state machine.
|
|
32
|
+
pub const vsr_operations_reserved: u8 = 128;
|
|
33
|
+
|
|
34
|
+
comptime {
|
|
35
|
+
assert(vsr_operations_reserved <= std.math.maxInt(u8));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/// The checkpoint interval is chosen to be the highest possible value that satisfies the
|
|
39
|
+
/// constraints described below.
|
|
40
|
+
pub const vsr_checkpoint_ops = journal_slot_count -
|
|
41
|
+
lsm_compaction_ops -
|
|
42
|
+
lsm_compaction_ops * stdx.div_ceil(pipeline_prepare_queue_max * 2, lsm_compaction_ops);
|
|
43
|
+
|
|
44
|
+
comptime {
|
|
45
|
+
// Invariant: to guarantee durability, a log entry from a previous checkpoint can be overwritten
|
|
46
|
+
// only when there is a quorum of replicas at the next checkpoint.
|
|
47
|
+
//
|
|
48
|
+
// This assert guarantees that when a prepare gets bumped from the log, there is a prepare
|
|
49
|
+
// _committed_ on top of the next checkpoint, which in turn guarantees the existence of a
|
|
50
|
+
// checkpoint quorum.
|
|
51
|
+
//
|
|
52
|
+
// More specifically, the checkpoint interval must be less than the WAL length by (at least) the
|
|
53
|
+
// sum of:
|
|
54
|
+
// - `lsm_compaction_ops`: Ensure that the final batch of entries immediately preceding a
|
|
55
|
+
// checkpoint trigger is not overwritten by the following checkpoint's entries. This final
|
|
56
|
+
// batch's updates were not persisted as part of the former checkpoint – they are only in
|
|
57
|
+
// memory until they are compacted by the *next* batch of commits (i.e. the first batch of
|
|
58
|
+
// the following checkpoint).
|
|
59
|
+
// - `2 * pipeline_prepare_queue_max` (rounded up to the nearest lsm_compaction_ops multiple):
|
|
60
|
+
// This margin ensures that the entries prepared immediately following a checkpoint's prepare
|
|
61
|
+
// max never overwrite an entry from the previous WAL wrap until a quorum of replicas has
|
|
62
|
+
// reached that checkpoint. The first pipeline_prepare_queue_max is the maximum number of
|
|
63
|
+
// entries a replica can prepare after a checkpoint trigger, so checkpointing doesn't stall
|
|
64
|
+
// normal processing (referred to as the checkpoint's prepare_max). The second
|
|
65
|
+
// pipeline_prepare_queue_max ensures entries prepared after a checkpoint's prepare_max don't
|
|
66
|
+
// overwrite entries from the previous WAL wrap. By the time we start preparing entries after
|
|
67
|
+
// the second pipeline_prepare_queue_max, a quorum of replicas is guaranteed to have already
|
|
68
|
+
// reached the former checkpoint.
|
|
69
|
+
assert(vsr_checkpoint_ops + lsm_compaction_ops + pipeline_prepare_queue_max * 2 <=
|
|
70
|
+
journal_slot_count);
|
|
71
|
+
assert(vsr_checkpoint_ops >= pipeline_prepare_queue_max);
|
|
72
|
+
assert(vsr_checkpoint_ops >= lsm_compaction_ops);
|
|
73
|
+
assert(vsr_checkpoint_ops % lsm_compaction_ops == 0);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/// The maximum number of clients allowed per cluster, where each client has a unique 128-bit ID.
|
|
77
|
+
/// This impacts the amount of memory allocated at initialization by the server.
|
|
78
|
+
/// This determines the size of the VR client table used to cache replies to clients by client ID.
|
|
79
|
+
/// Each client has one entry in the VR client table to store the latest `message_size_max` reply.
|
|
80
|
+
/// Client ID 0 which is used by primary for pulse and upgrade request, is not counted.
|
|
81
|
+
pub const clients_max = config.cluster.clients_max;
|
|
82
|
+
|
|
83
|
+
comptime {
|
|
84
|
+
assert(clients_max >= Config.Cluster.clients_max_min);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/// The maximum number of release versions (upgrade candidates) that can be advertised by a replica
|
|
88
|
+
/// in each ping message body.
|
|
89
|
+
pub const vsr_releases_max = config.cluster.vsr_releases_max;
|
|
90
|
+
|
|
91
|
+
/// The maximum cumulative size of a final TigerBeetle output binary - including potential past
|
|
92
|
+
/// releases and metadata.
|
|
93
|
+
pub fn multiversion_binary_platform_size_max(options: struct { macos: bool, debug: bool }) u64 {
|
|
94
|
+
// {Linux, Windows} get the base value. macOS gets 2x since it has universal binaries. All cases
|
|
95
|
+
// get a further 2x in debug.
|
|
96
|
+
var size_max = config.process.multiversion_binary_platform_size_max;
|
|
97
|
+
if (options.macos) size_max *= 2;
|
|
98
|
+
if (options.debug) size_max *= 2;
|
|
99
|
+
|
|
100
|
+
return size_max;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/// The maximum size, like above, but for any platform.
|
|
104
|
+
pub const multiversion_binary_size_max =
|
|
105
|
+
config.process.multiversion_binary_platform_size_max * 2 * 2;
|
|
106
|
+
comptime {
|
|
107
|
+
assert(multiversion_binary_platform_size_max(.{
|
|
108
|
+
.macos = true,
|
|
109
|
+
.debug = true,
|
|
110
|
+
}) <= multiversion_binary_size_max);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
pub const multiversion_poll_interval_ms = config.process.multiversion_poll_interval_ms;
|
|
114
|
+
|
|
115
|
+
comptime {
|
|
116
|
+
assert(vsr_releases_max >= 2);
|
|
117
|
+
assert(vsr_releases_max * @sizeOf(vsr.Release) <= message_body_size_max);
|
|
118
|
+
// The number of releases is encoded into ping headers as a u16.
|
|
119
|
+
assert(vsr_releases_max <= std.math.maxInt(u16));
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/// The maximum number of nodes required to form a quorum for replication.
|
|
123
|
+
/// Majority quorums are only required across view change and replication phases (not within).
|
|
124
|
+
/// As per Flexible Paxos, provided `quorum_replication + quorum_view_change > replicas`:
|
|
125
|
+
/// 1. you may increase `quorum_view_change` above a majority, so that
|
|
126
|
+
/// 2. you can decrease `quorum_replication` below a majority, to optimize the common case.
|
|
127
|
+
/// This improves latency by reducing the number of nodes required for synchronous replication.
|
|
128
|
+
/// This reduces redundancy only in the short term, asynchronous replication will still continue.
|
|
129
|
+
/// The size of the replication quorum is limited to the minimum of this value and ⌈replicas/2⌉.
|
|
130
|
+
/// The size of the view change quorum will then be automatically inferred from quorum_replication.
|
|
131
|
+
pub const quorum_replication_max = config.cluster.quorum_replication_max;
|
|
132
|
+
|
|
133
|
+
/// The default server port to listen on if not specified in `--addresses`:
|
|
134
|
+
pub const port = config.process.port;
|
|
135
|
+
|
|
136
|
+
/// The default network interface address to listen on if not specified in `--addresses`:
|
|
137
|
+
/// WARNING: Binding to all interfaces with "0.0.0.0" is dangerous and opens the server to anyone.
|
|
138
|
+
/// Bind to the "127.0.0.1" loopback address to accept local connections as a safe default only.
|
|
139
|
+
pub const address = config.process.address;
|
|
140
|
+
|
|
141
|
+
comptime {
|
|
142
|
+
// vsr.parse_address assumes that config.address/config.port are valid.
|
|
143
|
+
_ = std.net.Address.parseIp4(address, 0) catch unreachable;
|
|
144
|
+
_ = @as(u16, port);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/// The default maximum amount of memory to use.
|
|
148
|
+
pub const memory_size_max_default = config.process.memory_size_max_default;
|
|
149
|
+
|
|
150
|
+
/// At a high level, priority for object caching is (in descending order):
|
|
151
|
+
///
|
|
152
|
+
/// 1. Accounts.
|
|
153
|
+
/// - 2 lookups per created transfer
|
|
154
|
+
/// - high temporal locality
|
|
155
|
+
/// - positive expected result
|
|
156
|
+
/// 2. Posted transfers.
|
|
157
|
+
/// - high temporal locality
|
|
158
|
+
/// - positive expected result
|
|
159
|
+
/// 3. Transfers. Generally don't cache these because of:
|
|
160
|
+
/// - low temporal locality
|
|
161
|
+
/// - negative expected result
|
|
162
|
+
///
|
|
163
|
+
/// The default size of the accounts in-memory cache:
|
|
164
|
+
/// This impacts the amount of memory allocated at initialization by the server.
|
|
165
|
+
pub const cache_accounts_size_default = config.process.cache_accounts_size_default;
|
|
166
|
+
|
|
167
|
+
/// The default size of the transfers in-memory cache:
|
|
168
|
+
/// This impacts the amount of memory allocated at initialization by the server.
|
|
169
|
+
/// We allocate more capacity than the number of transfers for a safe hash table load factor.
|
|
170
|
+
pub const cache_transfers_size_default = config.process.cache_transfers_size_default;
|
|
171
|
+
|
|
172
|
+
/// The default size of the two-phase transfers in-memory cache:
|
|
173
|
+
/// This impacts the amount of memory allocated at initialization by the server.
|
|
174
|
+
pub const cache_transfers_pending_size_default =
|
|
175
|
+
config.process.cache_transfers_pending_size_default;
|
|
176
|
+
|
|
177
|
+
/// The size of the client replies zone.
|
|
178
|
+
pub const client_replies_size = clients_max * message_size_max;
|
|
179
|
+
|
|
180
|
+
comptime {
|
|
181
|
+
assert(client_replies_size > 0);
|
|
182
|
+
assert(client_replies_size % sector_size == 0);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/// The maximum number of batch entries in the journal file:
|
|
186
|
+
/// A batch entry may contain many transfers, so this is not a limit on the number of transfers.
|
|
187
|
+
/// We need this limit to allocate space for copies of batch headers at the start of the journal.
|
|
188
|
+
/// These header copies enable us to disentangle corruption from crashes and recover accordingly.
|
|
189
|
+
pub const journal_slot_count = config.cluster.journal_slot_count;
|
|
190
|
+
|
|
191
|
+
/// The maximum size of the WAL zone:
|
|
192
|
+
/// This is pre-allocated and zeroed for performance when initialized.
|
|
193
|
+
/// Writes within this file never extend the filesystem inode size reducing the cost of fdatasync().
|
|
194
|
+
/// This enables static allocation of disk space so that appends cannot fail with ENOSPC.
|
|
195
|
+
/// This also enables us to detect filesystem inode corruption that would change the journal size.
|
|
196
|
+
pub const journal_size = journal_size_headers + journal_size_prepares;
|
|
197
|
+
pub const journal_size_headers = journal_slot_count * @sizeOf(vsr.Header);
|
|
198
|
+
pub const journal_size_prepares = journal_slot_count * message_size_max;
|
|
199
|
+
|
|
200
|
+
comptime {
|
|
201
|
+
// For the given WAL (lsm_compaction_ops=4):
|
|
202
|
+
//
|
|
203
|
+
// A B C D E
|
|
204
|
+
// |····|····|····|····|
|
|
205
|
+
//
|
|
206
|
+
// - ("|" delineates bars, where a bar is a multiple of prepare batches.)
|
|
207
|
+
// - ("·" is a prepare in the WAL.)
|
|
208
|
+
// - The Replica triggers a checkpoint at "E".
|
|
209
|
+
// - The entries between "A" and "D" are on-disk in level 0.
|
|
210
|
+
// - The entries between "D" and "E" are in-memory in the immutable table.
|
|
211
|
+
// - So the checkpoint only includes "A…D".
|
|
212
|
+
//
|
|
213
|
+
// The journal must have at least two bars to ensure at least one is checkpointed.
|
|
214
|
+
assert(journal_slot_count >= Config.Cluster.journal_slot_count_min);
|
|
215
|
+
assert(journal_slot_count >= lsm_compaction_ops * 2);
|
|
216
|
+
assert(journal_slot_count % lsm_compaction_ops == 0);
|
|
217
|
+
// The journal must have at least two pipelines of messages to ensure that a new, fully-repaired
|
|
218
|
+
// primary has enough headers for a complete SV message, even if the view-change just truncated
|
|
219
|
+
// another pipeline of messages. (See op_repair_min()).
|
|
220
|
+
assert(journal_slot_count >= pipeline_prepare_queue_max * 2);
|
|
221
|
+
|
|
222
|
+
assert(journal_size == journal_size_headers + journal_size_prepares);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/// The maximum size of a message in bytes:
|
|
226
|
+
/// This is also the limit of all inflight data across multiple pipelined requests per connection.
|
|
227
|
+
/// We may have one request of up to 2 MiB inflight or 2 pipelined requests of up to 1 MiB inflight.
|
|
228
|
+
/// This impacts sequential disk write throughput, the larger the buffer the better.
|
|
229
|
+
/// 2 MiB is 16,384 transfers, and a reasonable choice for sequential disk write throughput.
|
|
230
|
+
/// However, this impacts bufferbloat and head-of-line blocking latency for pipelined requests.
|
|
231
|
+
/// For a 1 Gbps NIC = 125 MiB/s throughput: 2 MiB / 125 * 1000ms = 16ms for the next request.
|
|
232
|
+
/// This impacts the amount of memory allocated at initialization by the server.
|
|
233
|
+
pub const message_size_max: u32 = config.cluster.message_size_max;
|
|
234
|
+
pub const message_body_size_max = message_size_max - @sizeOf(vsr.Header);
|
|
235
|
+
|
|
236
|
+
comptime {
|
|
237
|
+
// The WAL format requires messages to be a multiple of the sector size.
|
|
238
|
+
assert(message_size_max % sector_size == 0);
|
|
239
|
+
assert(message_size_max >= @sizeOf(vsr.Header));
|
|
240
|
+
assert(message_size_max >= sector_size);
|
|
241
|
+
assert(message_size_max >= Config.Cluster.message_size_max_min(clients_max));
|
|
242
|
+
|
|
243
|
+
// Ensure that DVC/SV messages can fit all necessary headers.
|
|
244
|
+
assert(message_body_size_max >= view_headers_max * @sizeOf(vsr.Header));
|
|
245
|
+
|
|
246
|
+
assert(message_body_size_max >= @sizeOf(vsr.ReconfigurationRequest));
|
|
247
|
+
assert(message_body_size_max >= @sizeOf(vsr.BlockRequest));
|
|
248
|
+
assert(message_body_size_max >= @sizeOf(vsr.CheckpointState));
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/// The maximum number of Viewstamped Replication prepare messages that can be inflight at a time.
|
|
252
|
+
/// This is immutable once assigned per cluster, as replicas need to know how many operations might
|
|
253
|
+
/// possibly be uncommitted during a view change, and this must be constant for all replicas.
|
|
254
|
+
pub const pipeline_prepare_queue_max: u32 = config.cluster.pipeline_prepare_queue_max;
|
|
255
|
+
|
|
256
|
+
/// The maximum number of Viewstamped Replication request messages that can be queued at a primary,
|
|
257
|
+
/// waiting to prepare. Each client has at most one request in flight, and a primary can send a
|
|
258
|
+
/// pulse or request upgrade.
|
|
259
|
+
pub const pipeline_request_queue_max: u32 = (clients_max + 1) -| pipeline_prepare_queue_max;
|
|
260
|
+
|
|
261
|
+
comptime {
|
|
262
|
+
// A prepare-queue capacity larger than (clients_max + 1) is wasted.
|
|
263
|
+
assert(pipeline_prepare_queue_max <= clients_max + 1);
|
|
264
|
+
// A total queue capacity larger than (clients_max + 1) is wasted.
|
|
265
|
+
assert(pipeline_prepare_queue_max + pipeline_request_queue_max <= clients_max + 1);
|
|
266
|
+
assert(pipeline_prepare_queue_max > 0);
|
|
267
|
+
assert(pipeline_request_queue_max >= 0);
|
|
268
|
+
|
|
269
|
+
// A DVC message uses the `header.context` (u128) field as a bitset to mark whether it has
|
|
270
|
+
// prepared the corresponding header's message.
|
|
271
|
+
assert(pipeline_prepare_queue_max + 1 <= @bitSizeOf(u128));
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/// Maximum number of headers from the WAL suffix to include in an SV message.
|
|
275
|
+
/// Must at least cover the full pipeline.
|
|
276
|
+
/// Increasing this reduces likelihood that backups will need to repair their suffix's headers.
|
|
277
|
+
///
|
|
278
|
+
/// CRITICAL:
|
|
279
|
+
/// - We must provide enough headers to cover all uncommitted headers so that the new
|
|
280
|
+
/// primary (if we are in a view change) can decide whether to discard uncommitted headers
|
|
281
|
+
/// that cannot be repaired because they are gaps. See DVCQuorum for more detail.
|
|
282
|
+
/// - +1 to leave room for commit_max, in case a backup converts the SV to a DVC.
|
|
283
|
+
pub const view_change_headers_suffix_max = config.cluster.view_change_headers_suffix_max;
|
|
284
|
+
|
|
285
|
+
/// The number of prepare headers to include in the body of a DVC/SV.
|
|
286
|
+
///
|
|
287
|
+
/// start_view:
|
|
288
|
+
///
|
|
289
|
+
/// - We must include all uncommitted headers.
|
|
290
|
+
/// - +1 We must include the highest cluster-committed header (in case the SV is converted to a DVC
|
|
291
|
+
/// by the backup). (This is part of view_change_headers_suffix_max).
|
|
292
|
+
/// - +2: We must provide the header corresponding to each checkpoint-trigger in the intact
|
|
293
|
+
/// suffix of our journal.
|
|
294
|
+
/// - These help a lagging replica catch up when its `op < commit_max`.
|
|
295
|
+
/// - There are at most two of these in the journal.
|
|
296
|
+
/// (There are 2 immediately after we checkpoint, until we prepare enough to overwrite one).
|
|
297
|
+
///
|
|
298
|
+
/// do_view_change:
|
|
299
|
+
///
|
|
300
|
+
/// - We must include all uncommitted headers.
|
|
301
|
+
/// - +1 We must include the highest cluster-committed header, so that the new primary still has a
|
|
302
|
+
/// head op if it truncates the entire pipeline.
|
|
303
|
+
pub const view_headers_max = view_change_headers_suffix_max + 2;
|
|
304
|
+
|
|
305
|
+
comptime {
|
|
306
|
+
assert(view_change_headers_suffix_max >= pipeline_prepare_queue_max + 1);
|
|
307
|
+
|
|
308
|
+
assert(view_headers_max > 0);
|
|
309
|
+
assert(view_headers_max >= pipeline_prepare_queue_max + 3);
|
|
310
|
+
assert(view_headers_max <= journal_slot_count);
|
|
311
|
+
assert(view_headers_max <= @divFloor(
|
|
312
|
+
message_body_size_max - @sizeOf(vsr.CheckpointState),
|
|
313
|
+
@sizeOf(vsr.Header),
|
|
314
|
+
));
|
|
315
|
+
assert(view_headers_max > view_change_headers_suffix_max);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/// The maximum number of headers to include with a response to a command=request_headers message.
|
|
319
|
+
pub const request_headers_max = @min(
|
|
320
|
+
@divFloor(message_body_size_max, @sizeOf(vsr.Header)),
|
|
321
|
+
64,
|
|
322
|
+
);
|
|
323
|
+
|
|
324
|
+
comptime {
|
|
325
|
+
assert(request_headers_max > 0);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/// The maximum number of block addresses/checksums requested by a single command=request_blocks.
|
|
329
|
+
pub const grid_repair_request_max = config.process.grid_repair_request_max;
|
|
330
|
+
|
|
331
|
+
/// The number of grid reads allocated to handle incoming command=request_blocks messages.
|
|
332
|
+
pub const grid_repair_reads_max = config.process.grid_repair_reads_max;
|
|
333
|
+
|
|
334
|
+
/// Immediately after state sync we want access to all of the grid's write bandwidth to rapidly sync
|
|
335
|
+
/// table blocks.
|
|
336
|
+
pub const grid_repair_writes_max = grid_iops_write_max;
|
|
337
|
+
|
|
338
|
+
/// The default sizing of the grid cache. It's expected for operators to override this on the CLI.
|
|
339
|
+
pub const grid_cache_size_default = config.process.grid_cache_size_default;
|
|
340
|
+
|
|
341
|
+
/// The maximum capacity (in *single* blocks – not counting syncing tables) of the
|
|
342
|
+
/// GridBlocksMissing.
|
|
343
|
+
///
|
|
344
|
+
/// As this increases:
|
|
345
|
+
/// - GridBlocksMissing allocates more memory.
|
|
346
|
+
/// - The "period" of GridBlocksMissing's requests increases.
|
|
347
|
+
/// This makes the repair protocol more tolerant of network latency.
|
|
348
|
+
/// - (Repair protocol is used to repair manifest log blocks immediately after state sync).
|
|
349
|
+
pub const grid_missing_blocks_max = config.process.grid_missing_blocks_max;
|
|
350
|
+
|
|
351
|
+
/// The number of tables that can be synced simultaneously.
|
|
352
|
+
/// "Table" in this context is the number of table index blocks to hold in memory while syncing
|
|
353
|
+
/// their content.
|
|
354
|
+
///
|
|
355
|
+
/// As this increases:
|
|
356
|
+
/// - GridBlocksMissing allocates more memory (~2 blocks for each).
|
|
357
|
+
/// - Syncing is more efficient, as more blocks can be fetched concurrently.
|
|
358
|
+
pub const grid_missing_tables_max = config.process.grid_missing_tables_max;
|
|
359
|
+
|
|
360
|
+
comptime {
|
|
361
|
+
assert(grid_repair_request_max > 0);
|
|
362
|
+
assert(grid_repair_request_max <= @divFloor(message_body_size_max, @sizeOf(vsr.BlockRequest)));
|
|
363
|
+
assert(grid_repair_request_max <= grid_repair_reads_max);
|
|
364
|
+
|
|
365
|
+
assert(grid_repair_reads_max > 0);
|
|
366
|
+
assert(grid_repair_writes_max > 0);
|
|
367
|
+
assert(grid_repair_writes_max <=
|
|
368
|
+
grid_missing_blocks_max + grid_missing_tables_max * lsm_table_value_blocks_max);
|
|
369
|
+
|
|
370
|
+
assert(grid_missing_blocks_max > 0);
|
|
371
|
+
assert(grid_missing_tables_max > 0);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
/// The maximum number of concurrent scrubber reads.
|
|
375
|
+
///
|
|
376
|
+
/// Unless the scrubber cycle is extremely short and the data file very large there is no need to
|
|
377
|
+
/// set this higher than 1.
|
|
378
|
+
pub const grid_scrubber_reads_max = config.process.grid_scrubber_reads_max;
|
|
379
|
+
|
|
380
|
+
/// `grid_scrubber_cycle_ms` is the (approximate, target) total milliseconds per scrub of each
|
|
381
|
+
/// replica's entire grid. Scrubbing work is spread evenly across this duration.
|
|
382
|
+
///
|
|
383
|
+
/// Napkin math for the "worst case" scrubber read overhead as a function of cycle duration
|
|
384
|
+
/// (assuming a fully-loaded data file – maximum size and 100% acquired):
|
|
385
|
+
///
|
|
386
|
+
/// storage_size_limit = 64TiB
|
|
387
|
+
/// grid_scrubber_cycle_seconds = 180 days * 24 hr/day * 60 min/hr * 60 s/min (2 cycle/year)
|
|
388
|
+
/// read_bytes_per_second = storage_size_limit / grid_scrubber_cycle_seconds ≈ 4.32 MiB/s
|
|
389
|
+
///
|
|
390
|
+
pub const grid_scrubber_cycle_ticks = config.process.grid_scrubber_cycle_ms / tick_ms;
|
|
391
|
+
|
|
392
|
+
/// Accelerate/throttle scrubber reads if they are less/more frequent than this range.
|
|
393
|
+
/// (This is to keep the timeouts from being too extreme when the grid is tiny or huge.)
|
|
394
|
+
pub const grid_scrubber_interval_ticks_min = config.process.grid_scrubber_interval_ms_min / tick_ms;
|
|
395
|
+
pub const grid_scrubber_interval_ticks_max = config.process.grid_scrubber_interval_ms_max / tick_ms;
|
|
396
|
+
|
|
397
|
+
comptime {
|
|
398
|
+
assert(grid_scrubber_reads_max > 0);
|
|
399
|
+
assert(grid_scrubber_reads_max <= grid_iops_read_max);
|
|
400
|
+
assert(grid_scrubber_cycle_ticks > 0);
|
|
401
|
+
assert(grid_scrubber_cycle_ticks > @divFloor(std.time.ms_per_min, tick_ms)); // Sanity-check.
|
|
402
|
+
assert(grid_scrubber_interval_ticks_min > 0);
|
|
403
|
+
assert(grid_scrubber_interval_ticks_min <= grid_scrubber_interval_ticks_max);
|
|
404
|
+
assert(grid_scrubber_interval_ticks_max > 0);
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/// The minimum and maximum amount of time in milliseconds to wait before initiating a connection.
|
|
408
|
+
/// Exponential backoff and jitter are applied within this range.
|
|
409
|
+
pub const connection_delay_min_ms = config.process.connection_delay_min_ms;
|
|
410
|
+
pub const connection_delay_max_ms = config.process.connection_delay_max_ms;
|
|
411
|
+
|
|
412
|
+
/// The maximum number of outgoing messages that may be queued on a replica connection.
|
|
413
|
+
pub const connection_send_queue_max_replica = @max(@min(clients_max, 4), 2);
|
|
414
|
+
|
|
415
|
+
/// The maximum number of outgoing messages that may be queued on a client connection.
|
|
416
|
+
/// The client has one in-flight request, and occasionally a ping.
|
|
417
|
+
pub const connection_send_queue_max_client = 2;
|
|
418
|
+
|
|
419
|
+
/// The maximum number of outgoing requests that may be queued on a client (including the in-flight
|
|
420
|
+
/// request).
|
|
421
|
+
pub const client_request_queue_max = config.process.client_request_queue_max;
|
|
422
|
+
|
|
423
|
+
/// The maximum number of connections in the kernel's complete connection queue pending an accept():
|
|
424
|
+
/// If the backlog argument is greater than the value in `/proc/sys/net/core/somaxconn`, then it is
|
|
425
|
+
/// silently truncated to that value. Since Linux 5.4, the default in this file is 4096.
|
|
426
|
+
pub const tcp_backlog = config.process.tcp_backlog;
|
|
427
|
+
|
|
428
|
+
/// The maximum size of a kernel socket receive buffer in bytes (or 0 to use the system default):
|
|
429
|
+
/// This sets SO_RCVBUF as an alternative to the auto-tuning range in /proc/sys/net/ipv4/tcp_rmem.
|
|
430
|
+
/// The value is limited by /proc/sys/net/core/rmem_max, unless the CAP_NET_ADMIN privilege exists.
|
|
431
|
+
/// The kernel doubles this value to allow space for packet bookkeeping overhead.
|
|
432
|
+
/// The receive buffer should ideally exceed the Bandwidth-Delay Product for maximum throughput.
|
|
433
|
+
/// At the same time, be careful going beyond 4 MiB as the kernel may merge many small TCP packets,
|
|
434
|
+
/// causing considerable latency spikes for large buffer sizes:
|
|
435
|
+
/// https://blog.cloudflare.com/the-story-of-one-latency-spike/
|
|
436
|
+
pub const tcp_rcvbuf = config.process.tcp_rcvbuf;
|
|
437
|
+
|
|
438
|
+
/// The maximum size of a kernel socket send buffer in bytes (or 0 to use the system default):
|
|
439
|
+
/// This sets SO_SNDBUF as an alternative to the auto-tuning range in /proc/sys/net/ipv4/tcp_wmem.
|
|
440
|
+
/// The value is limited by /proc/sys/net/core/wmem_max, unless the CAP_NET_ADMIN privilege exists.
|
|
441
|
+
/// The kernel doubles this value to allow space for packet bookkeeping overhead.
|
|
442
|
+
pub const tcp_sndbuf_replica = connection_send_queue_max_replica * message_size_max;
|
|
443
|
+
pub const tcp_sndbuf_client = connection_send_queue_max_client * message_size_max;
|
|
444
|
+
|
|
445
|
+
comptime {
|
|
446
|
+
// Avoid latency issues from setting sndbuf too high:
|
|
447
|
+
assert(tcp_sndbuf_replica <= 16 * MiB);
|
|
448
|
+
assert(tcp_sndbuf_client <= 16 * MiB);
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/// Whether to enable TCP keepalive:
|
|
452
|
+
pub const tcp_keepalive = config.process.tcp_keepalive;
|
|
453
|
+
|
|
454
|
+
/// The time (in seconds) the connection needs to be idle before sending TCP keepalive probes:
|
|
455
|
+
/// Probes are not sent when the send buffer has data or the congestion window size is zero,
|
|
456
|
+
/// for these cases we also need tcp_user_timeout_ms below.
|
|
457
|
+
pub const tcp_keepidle = config.process.tcp_keepidle;
|
|
458
|
+
|
|
459
|
+
/// The time (in seconds) between individual keepalive probes:
|
|
460
|
+
pub const tcp_keepintvl = config.process.tcp_keepintvl;
|
|
461
|
+
|
|
462
|
+
/// The maximum number of keepalive probes to send before dropping the connection:
|
|
463
|
+
pub const tcp_keepcnt = config.process.tcp_keepcnt;
|
|
464
|
+
|
|
465
|
+
/// The time (in milliseconds) to timeout an idle connection or unacknowledged send:
|
|
466
|
+
/// This timer rides on the granularity of the keepalive or retransmission timers.
|
|
467
|
+
/// For example, if keepalive will only send a probe after 10s then this becomes the lower bound
|
|
468
|
+
/// for tcp_user_timeout_ms to fire, even if tcp_user_timeout_ms is 2s. Nevertheless, this would
|
|
469
|
+
/// timeout the connection at 10s rather than wait for tcp_keepcnt probes to be sent. At the same
|
|
470
|
+
/// time, if tcp_user_timeout_ms is larger than the max keepalive time then tcp_keepcnt will be
|
|
471
|
+
/// ignored and more keepalive probes will be sent until tcp_user_timeout_ms fires.
|
|
472
|
+
/// For a thorough overview of how these settings interact:
|
|
473
|
+
/// https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/
|
|
474
|
+
pub const tcp_user_timeout_ms = (tcp_keepidle + tcp_keepintvl * tcp_keepcnt) * 1000;
|
|
475
|
+
|
|
476
|
+
/// Whether to disable Nagle's algorithm to eliminate send buffering delays:
|
|
477
|
+
pub const tcp_nodelay = config.process.tcp_nodelay;
|
|
478
|
+
|
|
479
|
+
/// Size of a CPU cache line in bytes
|
|
480
|
+
pub const cache_line_size = config.cluster.cache_line_size;
|
|
481
|
+
|
|
482
|
+
/// The minimum size of an aligned kernel page and an Advanced Format disk sector:
|
|
483
|
+
/// This is necessary for direct I/O without the kernel having to fix unaligned pages with a copy.
|
|
484
|
+
/// The new Advanced Format sector size is backwards compatible with the old 512 byte sector size.
|
|
485
|
+
/// This should therefore never be less than 4 KiB to be future-proof when server disks are swapped.
|
|
486
|
+
pub const sector_size = 4096;
|
|
487
|
+
|
|
488
|
+
/// Whether to perform direct I/O to the underlying disk device:
|
|
489
|
+
/// This enables several performance optimizations:
|
|
490
|
+
/// * A memory copy to the kernel's page cache can be eliminated for reduced CPU utilization.
|
|
491
|
+
/// * I/O can be issued immediately to the disk device without buffering delay for improved latency.
|
|
492
|
+
/// This also enables several safety features:
|
|
493
|
+
/// * Disk data can be scrubbed to repair latent sector errors and checksum errors proactively.
|
|
494
|
+
/// * Fsync failures can be recovered from correctly.
|
|
495
|
+
/// WARNING: Disabling direct I/O is unsafe; the page cache cannot be trusted after an fsync error,
|
|
496
|
+
/// even after an application panic, since the kernel will mark dirty pages as clean, even
|
|
497
|
+
/// when they were never written to disk.
|
|
498
|
+
pub const direct_io = config.process.direct_io;
|
|
499
|
+
|
|
500
|
+
pub const iops_read_max = journal_iops_read_max + client_replies_iops_read_max +
|
|
501
|
+
grid_iops_read_max + superblock_iops_read_max;
|
|
502
|
+
pub const iops_write_max = journal_iops_write_max + client_replies_iops_write_max +
|
|
503
|
+
grid_iops_write_max + superblock_iops_write_max;
|
|
504
|
+
|
|
505
|
+
/// Superblock has at most one write in flight.
|
|
506
|
+
const superblock_iops_read_max = 1;
|
|
507
|
+
const superblock_iops_write_max = 1;
|
|
508
|
+
|
|
509
|
+
/// The maximum number of concurrent WAL read I/O operations to allow at once.
|
|
510
|
+
pub const journal_iops_read_max = config.process.journal_iops_read_max;
|
|
511
|
+
/// The maximum number of concurrent WAL write I/O operations to allow at once.
|
|
512
|
+
/// Ideally this is at least as high as pipeline_prepare_queue_max, but it is safe to be lower.
|
|
513
|
+
pub const journal_iops_write_max = config.process.journal_iops_write_max;
|
|
514
|
+
|
|
515
|
+
/// The maximum number of concurrent reads to the client-replies zone.
|
|
516
|
+
/// Client replies are read when the client misses their original reply and retries a request.
|
|
517
|
+
pub const client_replies_iops_read_max = config.process.client_replies_iops_read_max;
|
|
518
|
+
/// The maximum number of concurrent writes to the client-replies zone.
|
|
519
|
+
/// Client replies are written after every commit.
|
|
520
|
+
pub const client_replies_iops_write_max = config.process.client_replies_iops_write_max;
|
|
521
|
+
/// The amount of time (in milliseconds) within which a client must receive a response from the
|
|
522
|
+
/// cluster, after which it emits a warning log (for alerting/metrics).
|
|
523
|
+
pub const client_request_completion_warn_ms = config.process.client_request_completion_warn_ms;
|
|
524
|
+
|
|
525
|
+
/// The maximum number of concurrent grid read I/O operations to allow at once.
|
|
526
|
+
pub const grid_iops_read_max = config.process.grid_iops_read_max;
|
|
527
|
+
/// The maximum number of concurrent grid write I/O operations to allow at once.
|
|
528
|
+
pub const grid_iops_write_max = config.process.grid_iops_write_max;
|
|
529
|
+
|
|
530
|
+
comptime {
|
|
531
|
+
assert(journal_iops_read_max > 0);
|
|
532
|
+
assert(journal_iops_write_max > 0);
|
|
533
|
+
assert(client_replies_iops_read_max > 0);
|
|
534
|
+
assert(client_replies_iops_write_max > 0);
|
|
535
|
+
assert(client_replies_iops_write_max <= clients_max);
|
|
536
|
+
assert(grid_iops_read_max > 0);
|
|
537
|
+
assert(grid_iops_write_max > 0);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
/// The number of redundant copies of the superblock in the superblock storage zone.
|
|
541
|
+
/// This must be either { 4, 6, 8 }, i.e. an even number, for more efficient flexible quorums.
|
|
542
|
+
///
|
|
543
|
+
/// The superblock contains local state for the replica and therefore cannot be replicated remotely.
|
|
544
|
+
/// Loss of the superblock would represent loss of the replica and so it must be protected.
|
|
545
|
+
///
|
|
546
|
+
/// This can mean checkpointing latencies in the rare extreme worst-case of at most 264ms, although
|
|
547
|
+
/// this would require EWAH compression of our block free set to have zero effective compression.
|
|
548
|
+
/// In practice, checkpointing latency should be an order of magnitude better due to compression,
|
|
549
|
+
/// because our block free set will fill holes when allocating.
|
|
550
|
+
///
|
|
551
|
+
/// The superblock only needs to be checkpointed every now and then, before the WAL wraps around,
|
|
552
|
+
/// or when a view change needs to take place to elect a new primary.
|
|
553
|
+
pub const superblock_copies = config.cluster.superblock_copies;
|
|
554
|
+
|
|
555
|
+
comptime {
|
|
556
|
+
assert(superblock_copies % 2 == 0);
|
|
557
|
+
assert(superblock_copies >= 4);
|
|
558
|
+
assert(superblock_copies <= 8);
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/// The default maximum size of a local data file. This can be override, up to
|
|
562
|
+
/// storage_size_limit_max, by a CLI flag.
|
|
563
|
+
pub const storage_size_limit_default = config.process.storage_size_limit_default;
|
|
564
|
+
|
|
565
|
+
/// The maximum size of a local data file.
|
|
566
|
+
/// This should not be much larger than several TiB to limit:
|
|
567
|
+
/// * blast radius and recovery time when a whole replica is lost,
|
|
568
|
+
/// * replicated storage overhead, since all data files are mirrored, and
|
|
569
|
+
/// * the static memory allocation required for tracking LSM forest metadata in memory.
|
|
570
|
+
///
|
|
571
|
+
/// This is a "firm" limit --- while it is a compile-time constant, it does not affect data file
|
|
572
|
+
/// layout and can be safely changed for an existing cluster.
|
|
573
|
+
pub const storage_size_limit_max = config.process.storage_size_limit_max;
|
|
574
|
+
|
|
575
|
+
comptime {
|
|
576
|
+
assert(storage_size_limit_max >= storage_size_limit_default);
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
/// The unit of read/write access to LSM manifest and LSM table blocks in the block storage zone.
|
|
580
|
+
///
|
|
581
|
+
/// - A lower block size increases the memory overhead of table metadata, due to smaller/more
|
|
582
|
+
/// tables.
|
|
583
|
+
/// - A higher block size increases space amplification due to partially-filled blocks.
|
|
584
|
+
pub const block_size = config.cluster.block_size;
|
|
585
|
+
|
|
586
|
+
comptime {
|
|
587
|
+
assert(block_size % sector_size == 0);
|
|
588
|
+
assert(block_size > @sizeOf(vsr.Header));
|
|
589
|
+
// Blocks are sent over the network as messages during grid repair and state sync.
|
|
590
|
+
assert(block_size <= message_size_max);
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
/// The number of levels in an LSM tree.
|
|
594
|
+
/// A higher number of levels increases read amplification, as well as total storage capacity.
|
|
595
|
+
pub const lsm_levels = config.cluster.lsm_levels;
|
|
596
|
+
|
|
597
|
+
comptime {
|
|
598
|
+
// ManifestLog serializes the level as a u6.
|
|
599
|
+
assert(lsm_levels > 0);
|
|
600
|
+
assert(lsm_levels <= std.math.maxInt(u6));
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
/// The number of tables at level i (0 ≤ i < lsm_levels) is `pow(lsm_growth_factor, i+1)`.
|
|
604
|
+
/// A higher growth factor increases write amplification (by increasing the number of tables in
|
|
605
|
+
/// level B that overlap a table in level A in a compaction), but decreases read amplification (by
|
|
606
|
+
/// reducing the height of the tree and thus the number of levels that must be probed). Since read
|
|
607
|
+
/// amplification can be optimized more easily (with caching), we target a growth
|
|
608
|
+
/// factor of 8 for lower write amplification rather than the more typical growth factor of 10.
|
|
609
|
+
pub const lsm_growth_factor = config.cluster.lsm_growth_factor;
|
|
610
|
+
|
|
611
|
+
comptime {
|
|
612
|
+
assert(lsm_growth_factor > 1);
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
/// Size of nodes used by the LSM tree manifest implementation.
|
|
616
|
+
/// TODO Double-check this with our "LSM Manifest" spreadsheet.
|
|
617
|
+
pub const lsm_manifest_node_size = config.process.lsm_manifest_node_size;
|
|
618
|
+
|
|
619
|
+
/// The number of manifest blocks to compact *beyond the minimum*, per half-bar.
|
|
620
|
+
///
|
|
621
|
+
/// In the worst case, we still compact entries faster than we produce them (by a margin of
|
|
622
|
+
/// "extra" blocks). This is necessary to ensure that the manifest has a bounded number of entries.
|
|
623
|
+
/// (Or in other words, that Pace's recurrence relation converges.)
|
|
624
|
+
///
|
|
625
|
+
/// This specific choice of value is somewhat arbitrary, but yields a decent balance between
|
|
626
|
+
/// "compaction work performed" and "total manifest size".
|
|
627
|
+
///
|
|
628
|
+
/// As this value increases, the manifest must perform more compaction work, but the manifest
|
|
629
|
+
/// upper-bound shrinks (and therefore manifest recovery time decreases).
|
|
630
|
+
///
|
|
631
|
+
/// See ManifestLog.Pace for more detail.
|
|
632
|
+
pub const lsm_manifest_compact_extra_blocks = config.cluster.lsm_manifest_compact_extra_blocks;
|
|
633
|
+
|
|
634
|
+
comptime {
|
|
635
|
+
assert(lsm_manifest_compact_extra_blocks > 0);
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
/// Number of prepares accumulated in the in-memory table before flushing to disk.
|
|
639
|
+
///
|
|
640
|
+
/// This is a batch of batches. Each prepare can contain at most 8_190 transfers. With
|
|
641
|
+
/// lsm_compaction_ops=32, 32 prepares are processed to fill the in-memory table with 262_080
|
|
642
|
+
/// transfers. During processing of the next 32 prepares, this in-memory table is flushed to disk.
|
|
643
|
+
/// Simultaneously, compaction is run to free up enough space to flush the in-memory table from the
|
|
644
|
+
/// next batch of lsm_compaction_ops prepares.
|
|
645
|
+
///
|
|
646
|
+
/// Together with message_body_size_max, lsm_compaction_ops determines the size a table on disk.
|
|
647
|
+
pub const lsm_compaction_ops = config.cluster.lsm_compaction_ops;
|
|
648
|
+
|
|
649
|
+
comptime {
|
|
650
|
+
// The LSM tree uses half-measures to balance compaction.
|
|
651
|
+
assert(lsm_compaction_ops % 2 == 0);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// Limits for the number of value blocks that a single compaction can queue up for IO and for the
|
|
655
|
+
// number of IO operations themselves. The number of index blocks is always one per level.
|
|
656
|
+
// This is a comptime upper bound. The actual number of concurrency is also limited by the
|
|
657
|
+
// runtime-known number of free blocks.
|
|
658
|
+
//
|
|
659
|
+
// For simplicity for now, size IOPS to always be available.
|
|
660
|
+
pub const lsm_compaction_queue_read_max = 16;
|
|
661
|
+
pub const lsm_compaction_queue_write_max = 16;
|
|
662
|
+
pub const lsm_compaction_iops_read_max = lsm_compaction_queue_read_max + 2; // + two index blocks.
|
|
663
|
+
pub const lsm_compaction_iops_write_max = lsm_compaction_queue_write_max + 1; // + one index block.
|
|
664
|
+
|
|
665
|
+
pub const lsm_snapshots_max = config.cluster.lsm_snapshots_max;
|
|
666
|
+
|
|
667
|
+
/// The maximum number of blocks that can possibly be referenced by any table index block.
|
|
668
|
+
///
|
|
669
|
+
/// - This is a very conservative (upper-bound) calculation that doesn't rely on the StateMachine's
|
|
670
|
+
/// tree configuration. (To prevent Grid from depending on StateMachine).
|
|
671
|
+
/// - This counts value blocks, but does not count the index block itself.
|
|
672
|
+
pub const lsm_table_value_blocks_max = table_blocks_max: {
|
|
673
|
+
const checksum_size = @sizeOf(u256);
|
|
674
|
+
const address_size = @sizeOf(u64);
|
|
675
|
+
break :table_blocks_max @divFloor(
|
|
676
|
+
block_size - @sizeOf(vsr.Header),
|
|
677
|
+
(checksum_size + address_size),
|
|
678
|
+
);
|
|
679
|
+
};
|
|
680
|
+
|
|
681
|
+
/// The default size in bytes of the NodePool used for the LSM forest's manifests.
|
|
682
|
+
pub const lsm_manifest_memory_size_default = lsm_manifest_memory: {
|
|
683
|
+
// TODO Tune this better.
|
|
684
|
+
const lsm_forest_node_count: u32 = 8192;
|
|
685
|
+
break :lsm_manifest_memory lsm_forest_node_count * lsm_manifest_node_size;
|
|
686
|
+
};
|
|
687
|
+
|
|
688
|
+
/// The maximum size in bytes of the NodePool used for the LSM forest's manifests.
|
|
689
|
+
pub const lsm_manifest_memory_size_max =
|
|
690
|
+
@divFloor(std.math.maxInt(u32), lsm_manifest_memory_size_multiplier) *
|
|
691
|
+
lsm_manifest_memory_size_multiplier;
|
|
692
|
+
|
|
693
|
+
/// The minimum size in bytes of the NodePool used for the LSM forest's manifests.
|
|
694
|
+
pub const lsm_manifest_memory_size_min = lsm_manifest_memory_size_multiplier;
|
|
695
|
+
|
|
696
|
+
/// The lsm memory size must be a multiple of this value.
|
|
697
|
+
///
|
|
698
|
+
/// While technically this could be equal to lsm_manifest_node_size, we set it
|
|
699
|
+
/// to 1MiB so it is a more obvious increment for users.
|
|
700
|
+
pub const lsm_manifest_memory_size_multiplier = lsm_manifest_memory_multiplier: {
|
|
701
|
+
const lsm_manifest_memory_multiplier = 64 * lsm_manifest_node_size;
|
|
702
|
+
assert(lsm_manifest_memory_multiplier == MiB);
|
|
703
|
+
break :lsm_manifest_memory_multiplier lsm_manifest_memory_multiplier;
|
|
704
|
+
};
|
|
705
|
+
|
|
706
|
+
/// The LSM will attempt to coalesce a table if it is less full than this threshold.
|
|
707
|
+
pub const lsm_table_coalescing_threshold_percent =
|
|
708
|
+
config.cluster.lsm_table_coalescing_threshold_percent;
|
|
709
|
+
|
|
710
|
+
comptime {
|
|
711
|
+
assert(lsm_table_coalescing_threshold_percent > 0); // Ensure that coalescing is possible.
|
|
712
|
+
assert(lsm_table_coalescing_threshold_percent < 100); // Don't coalesce full tables.
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
/// The number of milliseconds between each replica tick, the basic unit of time in TigerBeetle.
|
|
716
|
+
/// Used to regulate heartbeats, retries and timeouts, all specified as multiples of a tick.
|
|
717
|
+
pub const tick_ms = config.process.tick_ms;
|
|
718
|
+
|
|
719
|
+
/// The conservative round-trip time at startup when there is no network knowledge.
|
|
720
|
+
/// Adjusted dynamically thereafter for RTT-sensitive timeouts according to network congestion.
|
|
721
|
+
/// This should be set higher rather than lower to avoid flooding the network at startup.
|
|
722
|
+
pub const rtt_ticks = config.process.rtt_ms / tick_ms;
|
|
723
|
+
|
|
724
|
+
/// Maximum RTT, to prevent too-long timeouts.
|
|
725
|
+
pub const rtt_max_ticks = config.process.rtt_max_ms / tick_ms;
|
|
726
|
+
|
|
727
|
+
/// The multiple of round-trip time for RTT-sensitive timeouts.
|
|
728
|
+
pub const rtt_multiple = 2;
|
|
729
|
+
|
|
730
|
+
/// The min/max bounds of exponential backoff (and jitter) to add to RTT-sensitive timeouts.
|
|
731
|
+
pub const backoff_min_ticks = config.process.backoff_min_ms / tick_ms;
|
|
732
|
+
pub const backoff_max_ticks = config.process.backoff_max_ms / tick_ms;
|
|
733
|
+
|
|
734
|
+
/// The maximum skew between two clocks to allow when considering them to be in agreement.
|
|
735
|
+
/// The principle is that no two clocks tick exactly alike but some clocks more or less agree.
|
|
736
|
+
/// The maximum skew across the cluster as a whole is this value times the total number of clocks.
|
|
737
|
+
/// The cluster will be unavailable if the majority of clocks are all further than this value apart.
|
|
738
|
+
/// Decreasing this reduces the probability of reaching agreement on synchronized time.
|
|
739
|
+
/// Increasing this reduces the accuracy of synchronized time.
|
|
740
|
+
pub const clock_offset_tolerance_max_ms = config.process.clock_offset_tolerance_max_ms;
|
|
741
|
+
|
|
742
|
+
/// The amount of time before the clock's synchronized epoch is expired.
|
|
743
|
+
/// If the epoch is expired before it can be replaced with a new synchronized epoch, then this most
|
|
744
|
+
/// likely indicates either a network partition or else too many clock faults across the cluster.
|
|
745
|
+
/// A new synchronized epoch will be installed as soon as these conditions resolve.
|
|
746
|
+
pub const clock_epoch_max_ms = config.process.clock_epoch_max_ms;
|
|
747
|
+
|
|
748
|
+
/// The amount of time to wait for enough accurate samples before synchronizing the clock.
|
|
749
|
+
/// The more samples we can take per remote clock source, the more accurate our estimation becomes.
|
|
750
|
+
/// This impacts cluster startup time as the primary must first wait for synchronization to
|
|
751
|
+
/// complete.
|
|
752
|
+
pub const clock_synchronization_window_min_ms = config.process.clock_synchronization_window_min_ms;
|
|
753
|
+
|
|
754
|
+
/// The amount of time without agreement before the clock window is expired and a new window opened.
|
|
755
|
+
/// This happens where some samples have been collected but not enough to reach agreement.
|
|
756
|
+
/// The quality of samples degrades as they age so at some point we throw them away and start over.
|
|
757
|
+
/// This eliminates the impact of gradual clock drift on our clock offset (clock skew) measurements.
|
|
758
|
+
/// If a window expires because of this then it is likely that the clock epoch will also be expired.
|
|
759
|
+
pub const clock_synchronization_window_max_ms = config.process.clock_synchronization_window_max_ms;
|
|
760
|
+
|
|
761
|
+
/// TigerBeetle uses asserts proactively, unless they severely degrade performance. For production,
|
|
762
|
+
/// 5% slow down might be deemed critical, tests tolerate slowdowns up to 5x. Tests should be
|
|
763
|
+
/// reasonably fast to make deterministic simulation effective. `constants.verify` disambiguate the
|
|
764
|
+
/// two cases.
|
|
765
|
+
///
|
|
766
|
+
/// In the control plane (eg, vsr proper) assert unconditionally. Due to batching, control plane
|
|
767
|
+
/// overhead is negligible. It is acceptable to spend O(N) time to verify O(1) computation.
|
|
768
|
+
///
|
|
769
|
+
/// In the data plane (eg, lsm tree), finer grained judgement is required. Do an unconditional O(1)
|
|
770
|
+
/// assert before an O(N) loop (e.g, a bounds check). Inside the loop, it might or might not be
|
|
771
|
+
/// feasible to add an extra assert per iteration. In the latter case, guard the assert with `if
|
|
772
|
+
/// (constants.verify)`, but prefer an unconditional assert unless benchmarks prove it to be costly.
|
|
773
|
+
///
|
|
774
|
+
/// In the data plane, never use O(N) asserts for O(1) computations --- due to do randomized testing
|
|
775
|
+
/// the overall coverage is proportional to the number of tests run. Slow thorough assertions
|
|
776
|
+
/// decrease the overall test coverage.
|
|
777
|
+
///
|
|
778
|
+
/// Specific data structures might use a comptime parameter, to enable extra costly verification
|
|
779
|
+
/// only during unit tests of the data structure.
|
|
780
|
+
pub const verify = config.process.verify;
|
|
781
|
+
|
|
782
|
+
/// The maximum number of bytes to use for compaction blocks.
|
|
783
|
+
pub const compaction_block_memory_size_max = std.math.maxInt(u32) * block_size;
|
|
784
|
+
|
|
785
|
+
/// Maximum number of tree scans that can be performed by a single query.
|
|
786
|
+
/// NOTE: Each condition in a query is a scan, for example `WHERE a=0 AND b=1` needs 2 scans.
|
|
787
|
+
pub const lsm_scans_max = config.cluster.lsm_scans_max;
|
|
788
|
+
|
|
789
|
+
/// Processing more than this amount of messages in a single event loop turn issues a warning.
|
|
790
|
+
pub const bus_message_burst_warn_min = 8;
|