tigerbeetle 0.0.34 → 0.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/tb_client/extconf.rb +13 -13
- data/ext/tb_client/tigerbeetle/LICENSE +177 -0
- data/ext/tb_client/tigerbeetle/build.zig +2327 -0
- data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
- data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
- data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1084 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
- data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
- data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
- data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
- data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
- data/ext/tb_client/tigerbeetle/src/direction.zig +11 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
- data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
- data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
- data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
- data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
- data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
- data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
- data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
- data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
- data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
- data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
- data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
- data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
- data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
- data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +362 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +1036 -0
- data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
- data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
- data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +557 -0
- data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
- data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
- data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
- data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
- data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
- data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
- data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
- data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
- data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
- data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
- data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
- data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
- data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
- data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
- data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
- data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
- data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
- data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
- data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
- data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
- data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
- data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
- data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
- data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
- data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
- data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
- data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
- data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
- data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
- data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
- data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
- data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
- data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
- data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
- data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
- data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
- data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
- data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
- data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
- data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
- data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
- data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
- data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
- data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
- data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
- data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
- data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
- data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
- data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
- data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
- data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
- data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
- data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
- data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
- data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
- data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
- data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2910 -0
- data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
- data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
- data/lib/tb_client/shared_lib.rb +12 -5
- data/lib/tigerbeetle/client.rb +1 -1
- data/lib/tigerbeetle/platforms.rb +9 -0
- data/lib/tigerbeetle/version.rb +2 -2
- data/tigerbeetle.gemspec +22 -5
- metadata +242 -3
- data/ext/tb_client/pkg.tar.gz +0 -0
|
@@ -0,0 +1,1727 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const math = std.math;
|
|
3
|
+
const assert = std.debug.assert;
|
|
4
|
+
const maybe = stdx.maybe;
|
|
5
|
+
const log = std.log.scoped(.vsr);
|
|
6
|
+
|
|
7
|
+
// vsr.zig is the root of a zig package, reexport all public APIs.
|
|
8
|
+
//
|
|
9
|
+
// Note that we don't promise any stability of these interfaces yet.
|
|
10
|
+
pub const cdc = @import("cdc/runner.zig");
|
|
11
|
+
pub const constants = @import("constants.zig");
|
|
12
|
+
pub const io = @import("io.zig");
|
|
13
|
+
pub const queue = @import("queue.zig");
|
|
14
|
+
pub const stack = @import("stack.zig");
|
|
15
|
+
pub const message_buffer = @import("message_buffer.zig");
|
|
16
|
+
pub const message_bus = @import("message_bus.zig");
|
|
17
|
+
pub const message_pool = @import("message_pool.zig");
|
|
18
|
+
pub const state_machine = @import("state_machine.zig");
|
|
19
|
+
pub const storage = @import("storage.zig");
|
|
20
|
+
pub const tb_client = @import("clients/c/tb_client.zig");
|
|
21
|
+
pub const tigerbeetle = @import("tigerbeetle.zig");
|
|
22
|
+
pub const time = @import("time.zig");
|
|
23
|
+
pub const trace = @import("trace.zig");
|
|
24
|
+
pub const stdx = @import("stdx");
|
|
25
|
+
pub const grid = @import("vsr/grid.zig");
|
|
26
|
+
pub const superblock = @import("vsr/superblock.zig");
|
|
27
|
+
pub const aof = @import("aof.zig");
|
|
28
|
+
pub const repl = @import("repl.zig");
|
|
29
|
+
pub const lsm = .{
|
|
30
|
+
.tree = @import("lsm/tree.zig"),
|
|
31
|
+
.groove = @import("lsm/groove.zig"),
|
|
32
|
+
.forest = @import("lsm/forest.zig"),
|
|
33
|
+
.schema = @import("lsm/schema.zig"),
|
|
34
|
+
.composite_key = @import("lsm/composite_key.zig"),
|
|
35
|
+
.TimestampRange = @import("lsm/timestamp_range.zig").TimestampRange,
|
|
36
|
+
};
|
|
37
|
+
pub const testing = .{
|
|
38
|
+
.cluster = @import("testing/cluster.zig"),
|
|
39
|
+
.random_int_exponential = @import("testing/fuzz.zig").random_int_exponential,
|
|
40
|
+
.IdPermutation = @import("testing/id.zig").IdPermutation,
|
|
41
|
+
.parse_seed = @import("testing/fuzz.zig").parse_seed,
|
|
42
|
+
};
|
|
43
|
+
pub const ewah = @import("ewah.zig").ewah;
|
|
44
|
+
pub const checkpoint_trailer = @import("vsr/checkpoint_trailer.zig");
|
|
45
|
+
|
|
46
|
+
pub const multi_batch = @import("vsr/multi_batch.zig");
|
|
47
|
+
|
|
48
|
+
pub const ReplicaType = @import("vsr/replica.zig").ReplicaType;
|
|
49
|
+
pub const ReplicaEvent = @import("vsr/replica.zig").ReplicaEvent;
|
|
50
|
+
pub const ReplicaReformatType = @import("vsr/replica_reformat.zig").ReplicaReformatType;
|
|
51
|
+
pub const format = @import("vsr/replica_format.zig").format;
|
|
52
|
+
pub const Status = @import("vsr/replica.zig").Status;
|
|
53
|
+
pub const SyncStage = @import("vsr/sync.zig").Stage;
|
|
54
|
+
pub const SyncTarget = @import("vsr/sync.zig").Target;
|
|
55
|
+
pub const ClientType = @import("vsr/client.zig").ClientType;
|
|
56
|
+
pub const Clock = @import("vsr/clock.zig").Clock;
|
|
57
|
+
pub const GridType = @import("vsr/grid.zig").GridType;
|
|
58
|
+
pub const JournalType = @import("vsr/journal.zig").JournalType;
|
|
59
|
+
pub const ClientSessions = @import("vsr/client_sessions.zig").ClientSessions;
|
|
60
|
+
pub const ClientRepliesType = @import("vsr/client_replies.zig").ClientRepliesType;
|
|
61
|
+
pub const SlotRange = @import("vsr/journal.zig").SlotRange;
|
|
62
|
+
pub const SuperBlockType = superblock.SuperBlockType;
|
|
63
|
+
pub const SuperBlockManifestReferences = superblock.ManifestReferences;
|
|
64
|
+
pub const SuperBlockTrailerReference = superblock.TrailerReference;
|
|
65
|
+
pub const VSRState = superblock.SuperBlockHeader.VSRState;
|
|
66
|
+
pub const CheckpointState = superblock.SuperBlockHeader.CheckpointState;
|
|
67
|
+
pub const checksum = @import("vsr/checksum.zig").checksum;
|
|
68
|
+
pub const ChecksumStream = @import("vsr/checksum.zig").ChecksumStream;
|
|
69
|
+
pub const Header = @import("vsr/message_header.zig").Header;
|
|
70
|
+
pub const FreeSet = @import("vsr/free_set.zig").FreeSet;
|
|
71
|
+
pub const CheckpointTrailerType = @import("vsr/checkpoint_trailer.zig").CheckpointTrailerType;
|
|
72
|
+
pub const GridScrubberType = @import("vsr/grid_scrubber.zig").GridScrubberType;
|
|
73
|
+
pub const Routing = @import("vsr/routing.zig");
|
|
74
|
+
pub const FaultDetector = @import("vsr/fault_detector.zig");
|
|
75
|
+
pub const CountingAllocator = @import("counting_allocator.zig");
|
|
76
|
+
|
|
77
|
+
/// The version of our Viewstamped Replication protocol in use, including customizations.
|
|
78
|
+
/// For backwards compatibility through breaking changes (e.g. upgrading checksums/ciphers).
|
|
79
|
+
pub const Version: u16 = 0;
|
|
80
|
+
|
|
81
|
+
pub const multiversion = @import("multiversion.zig");
|
|
82
|
+
pub const ReleaseList = multiversion.ReleaseList;
|
|
83
|
+
pub const Release = multiversion.Release;
|
|
84
|
+
pub const ReleaseTriple = multiversion.ReleaseTriple;
|
|
85
|
+
|
|
86
|
+
pub const ProcessType = enum { replica, client };
|
|
87
|
+
pub const Peer = union(enum) {
|
|
88
|
+
unknown,
|
|
89
|
+
replica: u8,
|
|
90
|
+
client: u128,
|
|
91
|
+
client_likely: u128,
|
|
92
|
+
|
|
93
|
+
pub fn transition(old: Peer, new: Peer) enum { retain, update, reject } {
|
|
94
|
+
return switch (old) {
|
|
95
|
+
.unknown => .update,
|
|
96
|
+
.client_likely => switch (new) {
|
|
97
|
+
.client_likely => if (std.meta.eql(old, new))
|
|
98
|
+
.retain
|
|
99
|
+
else
|
|
100
|
+
// Receiving requests from two different clients on the same connection implies
|
|
101
|
+
// that we are talking to a replica. However, as we don't know which one, we
|
|
102
|
+
// retain this as a connection to a client, for simplicity.
|
|
103
|
+
.retain,
|
|
104
|
+
.client => if (old.client_likely == new.client) .update else .reject,
|
|
105
|
+
.replica => .update,
|
|
106
|
+
.unknown => .retain,
|
|
107
|
+
},
|
|
108
|
+
|
|
109
|
+
.replica => switch (new) {
|
|
110
|
+
.replica => if (std.meta.eql(old, new)) .retain else .reject,
|
|
111
|
+
.client => .reject,
|
|
112
|
+
.client_likely, .unknown => .retain,
|
|
113
|
+
},
|
|
114
|
+
.client => switch (new) {
|
|
115
|
+
.client => if (std.meta.eql(old, new)) .retain else .reject,
|
|
116
|
+
.client_likely => if (old.client == new.client_likely) .retain else .reject,
|
|
117
|
+
.replica => .reject,
|
|
118
|
+
.unknown => .retain,
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
pub const Zone = enum {
|
|
125
|
+
superblock,
|
|
126
|
+
wal_headers,
|
|
127
|
+
wal_prepares,
|
|
128
|
+
client_replies,
|
|
129
|
+
// Add padding between `client_replies` and `grid`, to make sure grid blocks are aligned to
|
|
130
|
+
// block size and not just to sector size. Aligning blocks this way makes it more likely that
|
|
131
|
+
// they are aligned to the underlying physical sector size. This padding is zeroed during
|
|
132
|
+
// format, but isn't used otherwise.
|
|
133
|
+
grid_padding,
|
|
134
|
+
grid,
|
|
135
|
+
|
|
136
|
+
const size_superblock = superblock.superblock_zone_size;
|
|
137
|
+
const size_wal_headers = constants.journal_size_headers;
|
|
138
|
+
const size_wal_prepares = constants.journal_size_prepares;
|
|
139
|
+
const size_client_replies = constants.client_replies_size;
|
|
140
|
+
const size_grid_padding = size_grid_padding: {
|
|
141
|
+
const grid_start_unaligned = size_superblock +
|
|
142
|
+
size_wal_headers +
|
|
143
|
+
size_wal_prepares +
|
|
144
|
+
size_client_replies;
|
|
145
|
+
const grid_start_aligned = std.mem.alignForward(
|
|
146
|
+
usize,
|
|
147
|
+
grid_start_unaligned,
|
|
148
|
+
constants.block_size,
|
|
149
|
+
);
|
|
150
|
+
break :size_grid_padding grid_start_aligned - grid_start_unaligned;
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
comptime {
|
|
154
|
+
for (.{
|
|
155
|
+
size_superblock,
|
|
156
|
+
size_wal_headers,
|
|
157
|
+
size_wal_prepares,
|
|
158
|
+
size_client_replies,
|
|
159
|
+
size_grid_padding,
|
|
160
|
+
}) |zone_size| {
|
|
161
|
+
assert(zone_size % constants.sector_size == 0);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
for (std.enums.values(Zone)) |zone| {
|
|
165
|
+
assert(Zone.start(zone) % constants.sector_size == 0);
|
|
166
|
+
}
|
|
167
|
+
assert(Zone.start(.grid) % constants.block_size == 0);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
pub fn offset(zone: Zone, offset_logical: u64) u64 {
|
|
171
|
+
if (zone.size()) |zone_size| {
|
|
172
|
+
assert(offset_logical < zone_size);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return zone.start() + offset_logical;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
pub fn start(zone: Zone) u64 {
|
|
179
|
+
comptime var start_offset = 0;
|
|
180
|
+
inline for (comptime std.enums.values(Zone)) |z| {
|
|
181
|
+
if (z == zone) return start_offset;
|
|
182
|
+
start_offset += comptime size(z) orelse 0;
|
|
183
|
+
}
|
|
184
|
+
unreachable;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
pub fn size(zone: Zone) ?u64 {
|
|
188
|
+
return switch (zone) {
|
|
189
|
+
.superblock => size_superblock,
|
|
190
|
+
.wal_headers => size_wal_headers,
|
|
191
|
+
.wal_prepares => size_wal_prepares,
|
|
192
|
+
.client_replies => size_client_replies,
|
|
193
|
+
.grid_padding => size_grid_padding,
|
|
194
|
+
.grid => null,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/// Ensures that the read or write is aligned correctly for Direct I/O.
|
|
199
|
+
/// If this is not the case, then the underlying syscall will return EINVAL.
|
|
200
|
+
/// We check this only at the start of a read or write because the physical sector size may be
|
|
201
|
+
/// less than our logical sector size so that partial IOs then leave us no longer aligned.
|
|
202
|
+
pub fn verify_iop(zone: Zone, buffer: []const u8, offset_in_zone: u64) void {
|
|
203
|
+
if (zone.size()) |zone_size| {
|
|
204
|
+
assert(offset_in_zone + buffer.len <= zone_size);
|
|
205
|
+
}
|
|
206
|
+
assert(@intFromPtr(buffer.ptr) % constants.sector_size == 0);
|
|
207
|
+
assert(buffer.len % constants.sector_size == 0);
|
|
208
|
+
assert(buffer.len > 0);
|
|
209
|
+
const offset_in_storage = zone.offset(offset_in_zone);
|
|
210
|
+
assert(offset_in_storage % constants.sector_size == 0);
|
|
211
|
+
if (zone == .grid) assert(offset_in_storage % constants.block_size == 0);
|
|
212
|
+
}
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
/// Reference to a single block in the grid.
|
|
216
|
+
///
|
|
217
|
+
/// Blocks are always referred to by a pair of an address and a checksum to protect from misdirected
|
|
218
|
+
/// reads and writes: checksum inside the block itself doesn't help if the disk accidentally reads a
|
|
219
|
+
/// wrong block.
|
|
220
|
+
///
|
|
221
|
+
/// Block addresses start from one, such that zeroed-out memory can not be confused with a valid
|
|
222
|
+
/// address.
|
|
223
|
+
pub const BlockReference = struct {
|
|
224
|
+
checksum: u128,
|
|
225
|
+
address: u64,
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
/// Viewstamped Replication protocol commands:
|
|
229
|
+
pub const Command = enum(u8) {
|
|
230
|
+
// Looking to make backwards incompatible changes here? Make sure to check release.zig for
|
|
231
|
+
// `release_triple_client_min`.
|
|
232
|
+
|
|
233
|
+
reserved = 0,
|
|
234
|
+
|
|
235
|
+
ping = 1,
|
|
236
|
+
pong = 2,
|
|
237
|
+
|
|
238
|
+
ping_client = 3,
|
|
239
|
+
pong_client = 4,
|
|
240
|
+
|
|
241
|
+
request = 5,
|
|
242
|
+
prepare = 6,
|
|
243
|
+
prepare_ok = 7,
|
|
244
|
+
reply = 8,
|
|
245
|
+
commit = 9,
|
|
246
|
+
|
|
247
|
+
start_view_change = 10,
|
|
248
|
+
do_view_change = 11,
|
|
249
|
+
|
|
250
|
+
request_start_view = 13,
|
|
251
|
+
request_headers = 14,
|
|
252
|
+
request_prepare = 15,
|
|
253
|
+
request_reply = 16,
|
|
254
|
+
headers = 17,
|
|
255
|
+
|
|
256
|
+
eviction = 18,
|
|
257
|
+
|
|
258
|
+
request_blocks = 19,
|
|
259
|
+
block = 20,
|
|
260
|
+
|
|
261
|
+
start_view = 24,
|
|
262
|
+
|
|
263
|
+
// If a command is removed from the protocol, its ordinal is added here and can't be re-used.
|
|
264
|
+
deprecated_12 = 12, // start_view without checkpoint
|
|
265
|
+
deprecated_21 = 21, // request_sync_checkpoint
|
|
266
|
+
deprecated_22 = 22, // sync_checkpoint
|
|
267
|
+
deprecated_23 = 23, // start_view with an older version of CheckpointState
|
|
268
|
+
|
|
269
|
+
comptime {
|
|
270
|
+
for (std.enums.values(Command)) |command| {
|
|
271
|
+
assert(@intFromEnum(command) < std.enums.values(Command).len);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
};
|
|
275
|
+
|
|
276
|
+
/// This type exists to avoid making the Header type dependent on the state
|
|
277
|
+
/// machine used, which would cause awkward circular type dependencies.
|
|
278
|
+
pub const Operation = enum(u8) {
|
|
279
|
+
// Looking to make backwards incompatible changes here? Make sure to check release.zig for
|
|
280
|
+
// `release_triple_client_min`.
|
|
281
|
+
|
|
282
|
+
/// Operations reserved by VR protocol (for all state machines):
|
|
283
|
+
/// The value 0 is reserved to prevent a spurious zero from being interpreted as an operation.
|
|
284
|
+
reserved = 0,
|
|
285
|
+
/// The value 1 is reserved to initialize the cluster.
|
|
286
|
+
root = 1,
|
|
287
|
+
/// The value 2 is reserved to register a client session with the cluster.
|
|
288
|
+
register = 2,
|
|
289
|
+
/// The value 3 is reserved for reconfiguration request.
|
|
290
|
+
reconfigure = 3,
|
|
291
|
+
/// The value 4 is reserved for pulse request.
|
|
292
|
+
pulse = 4,
|
|
293
|
+
/// The value 5 is reserved for release-upgrade requests.
|
|
294
|
+
upgrade = 5,
|
|
295
|
+
/// The value 6 is reserved for noop requests.
|
|
296
|
+
noop = 6,
|
|
297
|
+
|
|
298
|
+
/// Operations <vsr_operations_reserved are reserved for the control plane.
|
|
299
|
+
/// Operations ≥vsr_operations_reserved are available for the state machine.
|
|
300
|
+
_,
|
|
301
|
+
|
|
302
|
+
pub fn from(comptime StateMachineOperation: type, operation: StateMachineOperation) Operation {
|
|
303
|
+
comptime check_state_machine_operations(StateMachineOperation);
|
|
304
|
+
return @as(Operation, @enumFromInt(@intFromEnum(operation)));
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
pub fn to(comptime StateMachineOperation: type, operation: Operation) StateMachineOperation {
|
|
308
|
+
comptime check_state_machine_operations(StateMachineOperation);
|
|
309
|
+
assert(operation.valid(StateMachineOperation));
|
|
310
|
+
assert(!operation.vsr_reserved());
|
|
311
|
+
return @as(StateMachineOperation, @enumFromInt(@intFromEnum(operation)));
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
pub fn cast(self: Operation, comptime StateMachineOperation: type) StateMachineOperation {
|
|
315
|
+
comptime check_state_machine_operations(StateMachineOperation);
|
|
316
|
+
return StateMachineOperation.from_vsr(self).?;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
pub fn valid(self: Operation, comptime StateMachineOperation: type) bool {
|
|
320
|
+
comptime check_state_machine_operations(StateMachineOperation);
|
|
321
|
+
|
|
322
|
+
inline for (.{ Operation, StateMachineOperation }) |Enum| {
|
|
323
|
+
const ops = comptime std.enums.values(Enum);
|
|
324
|
+
inline for (ops) |op| {
|
|
325
|
+
if (@intFromEnum(self) == @intFromEnum(op)) {
|
|
326
|
+
return true;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return false;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
pub fn vsr_reserved(self: Operation) bool {
|
|
335
|
+
return @intFromEnum(self) < constants.vsr_operations_reserved;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
pub fn tag_name(self: Operation, comptime StateMachineOperation: type) []const u8 {
|
|
339
|
+
assert(self.valid(StateMachineOperation));
|
|
340
|
+
inline for (.{ Operation, StateMachineOperation }) |Enum| {
|
|
341
|
+
inline for (@typeInfo(Enum).@"enum".fields) |field| {
|
|
342
|
+
const op = @field(Enum, field.name);
|
|
343
|
+
if (@intFromEnum(self) == @intFromEnum(op)) {
|
|
344
|
+
return field.name;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
unreachable;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
fn check_state_machine_operations(comptime StateMachineOperation: type) void {
|
|
352
|
+
comptime {
|
|
353
|
+
assert(@typeInfo(StateMachineOperation) == .@"enum");
|
|
354
|
+
assert(@typeInfo(StateMachineOperation).@"enum".is_exhaustive);
|
|
355
|
+
assert(@typeInfo(StateMachineOperation).@"enum".tag_type ==
|
|
356
|
+
@typeInfo(Operation).@"enum".tag_type);
|
|
357
|
+
for (@typeInfo(StateMachineOperation).@"enum".fields) |field| {
|
|
358
|
+
const operation = @field(StateMachineOperation, field.name);
|
|
359
|
+
if (@intFromEnum(operation) < constants.vsr_operations_reserved) {
|
|
360
|
+
@compileError("StateMachine Operation is reserved");
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
for (@typeInfo(Operation).@"enum".fields) |field| {
|
|
364
|
+
const vsr_operation = @field(Operation, field.name);
|
|
365
|
+
switch (vsr_operation) {
|
|
366
|
+
// The StateMachine Operation can convert
|
|
367
|
+
// a `vsr.Operation.pulse` into a valid operation.
|
|
368
|
+
.pulse => maybe(StateMachineOperation.from_vsr(vsr_operation) == null),
|
|
369
|
+
else => assert(StateMachineOperation.from_vsr(vsr_operation) == null),
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
};
|
|
375
|
+
|
|
376
|
+
pub const RegisterRequest = extern struct {
|
|
377
|
+
/// When command=request, batch_size_limit = 0.
|
|
378
|
+
/// When command=prepare, batch_size_limit > 0 and batch_size_limit ≤ message_body_size_max.
|
|
379
|
+
/// (Note that this does *not* include the `@sizeOf(Header)`.)
|
|
380
|
+
batch_size_limit: u32,
|
|
381
|
+
reserved: [252]u8 = @splat(0),
|
|
382
|
+
|
|
383
|
+
comptime {
|
|
384
|
+
assert(@sizeOf(RegisterRequest) == 256);
|
|
385
|
+
assert(@sizeOf(RegisterRequest) <= constants.message_body_size_max);
|
|
386
|
+
assert(stdx.no_padding(RegisterRequest));
|
|
387
|
+
}
|
|
388
|
+
};
|
|
389
|
+
|
|
390
|
+
pub const RegisterResult = extern struct {
|
|
391
|
+
batch_size_limit: u32,
|
|
392
|
+
reserved: [60]u8 = @splat(0),
|
|
393
|
+
|
|
394
|
+
comptime {
|
|
395
|
+
assert(@sizeOf(RegisterResult) == 64);
|
|
396
|
+
assert(@sizeOf(RegisterResult) <= constants.message_body_size_max);
|
|
397
|
+
assert(stdx.no_padding(RegisterResult));
|
|
398
|
+
}
|
|
399
|
+
};
|
|
400
|
+
|
|
401
|
+
pub const BlockRequest = extern struct {
|
|
402
|
+
block_checksum: u128,
|
|
403
|
+
block_address: u64,
|
|
404
|
+
reserved: [8]u8 = @splat(0),
|
|
405
|
+
|
|
406
|
+
comptime {
|
|
407
|
+
assert(@sizeOf(BlockRequest) == 32);
|
|
408
|
+
assert(@sizeOf(BlockRequest) <= constants.message_body_size_max);
|
|
409
|
+
assert(stdx.no_padding(BlockRequest));
|
|
410
|
+
}
|
|
411
|
+
};
|
|
412
|
+
|
|
413
|
+
/// Body of the builtin operation=.reconfigure request.
|
|
414
|
+
pub const ReconfigurationRequest = extern struct {
|
|
415
|
+
/// The new list of members.
|
|
416
|
+
///
|
|
417
|
+
/// Request is rejected if it is not a permutation of an existing list of members.
|
|
418
|
+
/// This is done to separate different failure modes of physically adding a new machine to the
|
|
419
|
+
/// cluster as opposed to logically changing the set of machines participating in quorums.
|
|
420
|
+
members: Members,
|
|
421
|
+
/// The new epoch.
|
|
422
|
+
///
|
|
423
|
+
/// Request is rejected if it isn't exactly current epoch + 1, to protect from operator errors.
|
|
424
|
+
/// Although there's already an `epoch` field in vsr.Header, we don't want to rely on that for
|
|
425
|
+
/// reconfiguration itself, as it is updated automatically by the clients, and here we need
|
|
426
|
+
/// a manual confirmation from the operator.
|
|
427
|
+
epoch: u32,
|
|
428
|
+
/// The new replica count.
|
|
429
|
+
///
|
|
430
|
+
/// At the moment, we require this to be equal to the old count.
|
|
431
|
+
replica_count: u8,
|
|
432
|
+
/// The new standby count.
|
|
433
|
+
///
|
|
434
|
+
/// At the moment, we require this to be equal to the old count.
|
|
435
|
+
standby_count: u8,
|
|
436
|
+
reserved: [54]u8 = @splat(0),
|
|
437
|
+
/// The result of this request. Set to zero by the client and filled-in by the primary when it
|
|
438
|
+
/// accepts a reconfiguration request.
|
|
439
|
+
result: ReconfigurationResult,
|
|
440
|
+
|
|
441
|
+
comptime {
|
|
442
|
+
assert(@sizeOf(ReconfigurationRequest) == 256);
|
|
443
|
+
assert(stdx.no_padding(ReconfigurationRequest));
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
pub fn validate(
|
|
447
|
+
request: *const ReconfigurationRequest,
|
|
448
|
+
current: struct {
|
|
449
|
+
members: *const Members,
|
|
450
|
+
epoch: u32,
|
|
451
|
+
replica_count: u8,
|
|
452
|
+
standby_count: u8,
|
|
453
|
+
},
|
|
454
|
+
) ReconfigurationResult {
|
|
455
|
+
assert(member_count(current.members) == current.replica_count + current.standby_count);
|
|
456
|
+
|
|
457
|
+
if (request.replica_count == 0) return .replica_count_zero;
|
|
458
|
+
if (request.replica_count > constants.replicas_max) return .replica_count_max_exceeded;
|
|
459
|
+
if (request.standby_count > constants.standbys_max) return .standby_count_max_exceeded;
|
|
460
|
+
|
|
461
|
+
if (!valid_members(&request.members)) return .members_invalid;
|
|
462
|
+
if (member_count(&request.members) != request.replica_count + request.standby_count) {
|
|
463
|
+
return .members_count_invalid;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
if (!std.mem.allEqual(u8, &request.reserved, 0)) return .reserved_field;
|
|
467
|
+
if (request.result != .reserved) return .result_must_be_reserved;
|
|
468
|
+
|
|
469
|
+
if (request.replica_count != current.replica_count) return .different_replica_count;
|
|
470
|
+
if (request.standby_count != current.standby_count) return .different_standby_count;
|
|
471
|
+
|
|
472
|
+
if (request.epoch < current.epoch) return .epoch_in_the_past;
|
|
473
|
+
if (request.epoch == current.epoch) {
|
|
474
|
+
return if (std.meta.eql(request.members, current.members.*))
|
|
475
|
+
.configuration_applied
|
|
476
|
+
else
|
|
477
|
+
.configuration_conflict;
|
|
478
|
+
}
|
|
479
|
+
if (request.epoch - current.epoch > 1) return .epoch_in_the_future;
|
|
480
|
+
|
|
481
|
+
assert(request.epoch == current.epoch + 1);
|
|
482
|
+
|
|
483
|
+
assert(valid_members(current.members));
|
|
484
|
+
assert(valid_members(&request.members));
|
|
485
|
+
assert(member_count(current.members) == member_count(&request.members));
|
|
486
|
+
// We have just asserted that the sets have no duplicates and have equal lengths,
|
|
487
|
+
// so it's enough to check that current.members ⊂ request.members.
|
|
488
|
+
for (current.members) |member_current| {
|
|
489
|
+
if (member_current == 0) break;
|
|
490
|
+
for (request.members) |member| {
|
|
491
|
+
if (member == member_current) break;
|
|
492
|
+
} else return .different_member_set;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
if (std.meta.eql(request.members, current.members.*)) {
|
|
496
|
+
return .configuration_is_no_op;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
return .ok;
|
|
500
|
+
}
|
|
501
|
+
};
|
|
502
|
+
|
|
503
|
+
pub const ReconfigurationResult = enum(u32) {
|
|
504
|
+
reserved = 0,
|
|
505
|
+
/// Reconfiguration request is valid.
|
|
506
|
+
/// The cluster is guaranteed to transition to the new epoch with the specified configuration.
|
|
507
|
+
ok = 1,
|
|
508
|
+
|
|
509
|
+
/// replica_count must be at least 1.
|
|
510
|
+
replica_count_zero = 2,
|
|
511
|
+
replica_count_max_exceeded = 3,
|
|
512
|
+
standby_count_max_exceeded = 4,
|
|
513
|
+
|
|
514
|
+
/// The Members array is syntactically invalid --- duplicate entries or internal zero entries.
|
|
515
|
+
members_invalid = 5,
|
|
516
|
+
/// The number of non-zero entries in Members array does not match the sum of replica_count
|
|
517
|
+
/// and standby_count.
|
|
518
|
+
members_count_invalid = 6,
|
|
519
|
+
|
|
520
|
+
/// A reserved field is non-zero.
|
|
521
|
+
reserved_field = 7,
|
|
522
|
+
/// result must be set to zero (.reserved).
|
|
523
|
+
result_must_be_reserved = 8,
|
|
524
|
+
|
|
525
|
+
/// epoch is in the past (smaller than the current epoch).
|
|
526
|
+
epoch_in_the_past = 9,
|
|
527
|
+
/// epoch is too far in the future (larger than current epoch + 1).
|
|
528
|
+
epoch_in_the_future = 10,
|
|
529
|
+
|
|
530
|
+
/// Reconfiguration changes the number of replicas, that is not currently supported.
|
|
531
|
+
different_replica_count = 11,
|
|
532
|
+
/// Reconfiguration changes the number of standbys, that is not currently supported.
|
|
533
|
+
different_standby_count = 12,
|
|
534
|
+
/// members must be a permutation of the current set of cluster members.
|
|
535
|
+
different_member_set = 13,
|
|
536
|
+
|
|
537
|
+
/// epoch is equal to the current epoch and configuration is the same.
|
|
538
|
+
/// This is a duplicate request.
|
|
539
|
+
configuration_applied = 14,
|
|
540
|
+
/// epoch is equal to the current epoch but configuration is different.
|
|
541
|
+
/// A conflicting reconfiguration request was accepted.
|
|
542
|
+
configuration_conflict = 15,
|
|
543
|
+
/// The request is valid, but there's no need to advance the epoch, because / configuration
|
|
544
|
+
/// exactly matches the current one.
|
|
545
|
+
configuration_is_no_op = 16,
|
|
546
|
+
|
|
547
|
+
comptime {
|
|
548
|
+
for (std.enums.values(ReconfigurationResult), 0..) |result, index| {
|
|
549
|
+
assert(@intFromEnum(result) == index);
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
};
|
|
553
|
+
|
|
554
|
+
test "ReconfigurationRequest" {
|
|
555
|
+
const ResultSet = std.EnumSet(ReconfigurationResult);
|
|
556
|
+
|
|
557
|
+
const Test = struct {
|
|
558
|
+
members: Members = to_members(.{ 1, 2, 3, 4 }),
|
|
559
|
+
epoch: u32 = 1,
|
|
560
|
+
replica_count: u8 = 3,
|
|
561
|
+
standby_count: u8 = 1,
|
|
562
|
+
|
|
563
|
+
tested: ResultSet = ResultSet{},
|
|
564
|
+
|
|
565
|
+
fn check(
|
|
566
|
+
t: *@This(),
|
|
567
|
+
request: ReconfigurationRequest,
|
|
568
|
+
expected: ReconfigurationResult,
|
|
569
|
+
) !void {
|
|
570
|
+
const actual = request.validate(.{
|
|
571
|
+
.members = &t.members,
|
|
572
|
+
.epoch = t.epoch,
|
|
573
|
+
.replica_count = t.replica_count,
|
|
574
|
+
.standby_count = t.standby_count,
|
|
575
|
+
});
|
|
576
|
+
|
|
577
|
+
try std.testing.expectEqual(expected, actual);
|
|
578
|
+
t.tested.insert(expected);
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
fn to_members(m: anytype) Members {
|
|
582
|
+
var result: [constants.members_max]u128 = @splat(0);
|
|
583
|
+
inline for (m, 0..) |member, index| result[index] = member;
|
|
584
|
+
return result;
|
|
585
|
+
}
|
|
586
|
+
};
|
|
587
|
+
|
|
588
|
+
var t: Test = .{};
|
|
589
|
+
|
|
590
|
+
const r: ReconfigurationRequest = .{
|
|
591
|
+
.members = Test.to_members(.{ 4, 1, 2, 3 }),
|
|
592
|
+
.epoch = 2,
|
|
593
|
+
.replica_count = 3,
|
|
594
|
+
.standby_count = 1,
|
|
595
|
+
.result = .reserved,
|
|
596
|
+
};
|
|
597
|
+
|
|
598
|
+
try t.check(r, .ok);
|
|
599
|
+
try t.check(stdx.update(r, .{ .replica_count = 0 }), .replica_count_zero);
|
|
600
|
+
try t.check(stdx.update(r, .{ .replica_count = 255 }), .replica_count_max_exceeded);
|
|
601
|
+
try t.check(
|
|
602
|
+
stdx.update(r, .{ .standby_count = constants.standbys_max + 1 }),
|
|
603
|
+
.standby_count_max_exceeded,
|
|
604
|
+
);
|
|
605
|
+
try t.check(
|
|
606
|
+
stdx.update(r, .{ .members = Test.to_members(.{ 4, 1, 4, 3 }) }),
|
|
607
|
+
.members_invalid,
|
|
608
|
+
);
|
|
609
|
+
try t.check(
|
|
610
|
+
stdx.update(r, .{ .members = Test.to_members(.{ 4, 1, 0, 2, 3 }) }),
|
|
611
|
+
.members_invalid,
|
|
612
|
+
);
|
|
613
|
+
try t.check(
|
|
614
|
+
stdx.update(r, .{ .epoch = 0, .members = Test.to_members(.{ 4, 1, 0, 2, 3 }) }),
|
|
615
|
+
.members_invalid,
|
|
616
|
+
);
|
|
617
|
+
try t.check(
|
|
618
|
+
stdx.update(r, .{ .epoch = 1, .members = Test.to_members(.{ 4, 1, 0, 2, 3 }) }),
|
|
619
|
+
.members_invalid,
|
|
620
|
+
);
|
|
621
|
+
try t.check(stdx.update(r, .{ .replica_count = 4 }), .members_count_invalid);
|
|
622
|
+
try t.check(stdx.update(r, .{ .reserved = [_]u8{1} ** 54 }), .reserved_field);
|
|
623
|
+
try t.check(stdx.update(r, .{ .result = .ok }), .result_must_be_reserved);
|
|
624
|
+
try t.check(stdx.update(r, .{ .epoch = 0 }), .epoch_in_the_past);
|
|
625
|
+
try t.check(stdx.update(r, .{ .epoch = 3 }), .epoch_in_the_future);
|
|
626
|
+
try t.check(
|
|
627
|
+
stdx.update(r, .{ .members = Test.to_members(.{ 1, 2, 3 }), .replica_count = 2 }),
|
|
628
|
+
.different_replica_count,
|
|
629
|
+
);
|
|
630
|
+
try t.check(
|
|
631
|
+
stdx.update(r, .{ .members = Test.to_members(.{ 1, 2, 3, 4, 5 }), .standby_count = 2 }),
|
|
632
|
+
.different_standby_count,
|
|
633
|
+
);
|
|
634
|
+
try t.check(
|
|
635
|
+
stdx.update(r, .{ .members = Test.to_members(.{ 8, 1, 2, 3 }) }),
|
|
636
|
+
.different_member_set,
|
|
637
|
+
);
|
|
638
|
+
try t.check(
|
|
639
|
+
stdx.update(r, .{ .epoch = 1, .members = Test.to_members(.{ 1, 2, 3, 4 }) }),
|
|
640
|
+
.configuration_applied,
|
|
641
|
+
);
|
|
642
|
+
try t.check(stdx.update(r, .{ .epoch = 1 }), .configuration_conflict);
|
|
643
|
+
try t.check(
|
|
644
|
+
stdx.update(r, .{ .members = Test.to_members(.{ 1, 2, 3, 4 }) }),
|
|
645
|
+
.configuration_is_no_op,
|
|
646
|
+
);
|
|
647
|
+
|
|
648
|
+
assert(t.tested.count() < ResultSet.initFull().count());
|
|
649
|
+
t.tested.insert(.reserved);
|
|
650
|
+
assert(t.tested.count() == ResultSet.initFull().count());
|
|
651
|
+
|
|
652
|
+
t.epoch = std.math.maxInt(u32);
|
|
653
|
+
try t.check(r, .epoch_in_the_past);
|
|
654
|
+
try t.check(stdx.update(r, .{ .epoch = std.math.maxInt(u32) }), .configuration_conflict);
|
|
655
|
+
try t.check(
|
|
656
|
+
stdx.update(r, .{
|
|
657
|
+
.epoch = std.math.maxInt(u32),
|
|
658
|
+
.members = Test.to_members(.{ 1, 2, 3, 4 }),
|
|
659
|
+
}),
|
|
660
|
+
.configuration_applied,
|
|
661
|
+
);
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
pub const UpgradeRequest = extern struct {
|
|
665
|
+
release: Release,
|
|
666
|
+
reserved: [12]u8 = @splat(0),
|
|
667
|
+
|
|
668
|
+
comptime {
|
|
669
|
+
assert(@sizeOf(UpgradeRequest) == 16);
|
|
670
|
+
assert(@sizeOf(UpgradeRequest) <= constants.message_body_size_max);
|
|
671
|
+
assert(stdx.no_padding(UpgradeRequest));
|
|
672
|
+
}
|
|
673
|
+
};
|
|
674
|
+
|
|
675
|
+
/// To ease investigation of accidents, assign a separate exit status for each fatal condition.
|
|
676
|
+
/// This is a process-global set.
|
|
677
|
+
pub const FatalReason = enum(u8) {
|
|
678
|
+
cli = 1,
|
|
679
|
+
no_space_left = 2,
|
|
680
|
+
manifest_node_pool_exhausted = 3,
|
|
681
|
+
storage_size_exceeds_limit = 4,
|
|
682
|
+
storage_size_would_exceed_limit = 5,
|
|
683
|
+
forest_tables_count_would_exceed_limit = 6,
|
|
684
|
+
unknown_vsr_command = 7,
|
|
685
|
+
|
|
686
|
+
pub fn exit_status(reason: FatalReason) u8 {
|
|
687
|
+
return @intFromEnum(reason);
|
|
688
|
+
}
|
|
689
|
+
};
|
|
690
|
+
|
|
691
|
+
/// Terminates the process with non-zero exit code.
|
|
692
|
+
///
|
|
693
|
+
/// Use fatal when encountering an environmental error where stopping is the intended end response.
|
|
694
|
+
/// For example, when running out of disk space, use `fatal` instead of threading error.NoSpaceLeft
|
|
695
|
+
/// up the stack. Propagating fatal errors up the stack needlessly increases dimensionality (unusual
|
|
696
|
+
/// defers might run), but doesn't improve experience --- the leaf of the call stack has the most
|
|
697
|
+
/// context for printing error message.
|
|
698
|
+
///
|
|
699
|
+
/// Don't use fatal for situations which are necessarily bugs in some replica process (not
|
|
700
|
+
/// necessary this process), use assert or panic instead.
|
|
701
|
+
pub fn fatal(reason: FatalReason, comptime fmt: []const u8, args: anytype) noreturn {
|
|
702
|
+
log.err(fmt, args);
|
|
703
|
+
const status = reason.exit_status();
|
|
704
|
+
assert(status != 0);
|
|
705
|
+
std.process.exit(status);
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
pub const Timeout = struct {
|
|
709
|
+
name: []const u8,
|
|
710
|
+
id: u128,
|
|
711
|
+
after: u64,
|
|
712
|
+
after_dynamic: ?u64 = null, // null iff !ticking
|
|
713
|
+
attempts: u8 = 0,
|
|
714
|
+
rtt: u64 = constants.rtt_ticks,
|
|
715
|
+
rtt_multiple: u8 = constants.rtt_multiple,
|
|
716
|
+
ticks: u64 = 0,
|
|
717
|
+
ticking: bool = false,
|
|
718
|
+
|
|
719
|
+
/// Increments the attempts counter and resets the timeout with exponential backoff and jitter.
|
|
720
|
+
/// Allows the attempts counter to wrap from time to time.
|
|
721
|
+
/// The overflow period is kept short to surface any related bugs sooner rather than later.
|
|
722
|
+
/// We do not saturate the counter as this would cause round-robin retries to get stuck.
|
|
723
|
+
pub fn backoff(self: *Timeout, prng: *stdx.PRNG) void {
|
|
724
|
+
assert(self.ticking);
|
|
725
|
+
|
|
726
|
+
self.ticks = 0;
|
|
727
|
+
self.attempts +%= 1;
|
|
728
|
+
|
|
729
|
+
log.debug("{}: {s} backing off", .{ self.id, self.name });
|
|
730
|
+
self.set_after_for_rtt_and_attempts(prng);
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
/// It's important to check that when fired() is acted on that the timeout is stopped/started,
|
|
734
|
+
/// otherwise further ticks around the event loop may trigger a thundering herd of messages.
|
|
735
|
+
pub fn fired(self: *const Timeout) bool {
|
|
736
|
+
if (self.ticking and self.ticks >= self.after_dynamic.?) {
|
|
737
|
+
log.debug("{}: {s} fired", .{ self.id, self.name });
|
|
738
|
+
if (self.ticks > self.after_dynamic.?) {
|
|
739
|
+
log.err("{}: {s} is firing every tick", .{ self.id, self.name });
|
|
740
|
+
@panic("timeout was not reset correctly");
|
|
741
|
+
}
|
|
742
|
+
return true;
|
|
743
|
+
} else {
|
|
744
|
+
return false;
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
pub fn reset(self: *Timeout) void {
|
|
749
|
+
self.attempts = 0;
|
|
750
|
+
self.ticks = 0;
|
|
751
|
+
assert(self.ticking);
|
|
752
|
+
// TODO Use self.prng to adjust for rtt and attempts.
|
|
753
|
+
log.debug("{}: {s} reset", .{ self.id, self.name });
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
/// Sets the value of `after` as a function of `rtt` and `attempts`.
|
|
757
|
+
/// Adds exponential backoff and jitter.
|
|
758
|
+
/// May be called only after a timeout has been stopped or reset, to prevent backward jumps.
|
|
759
|
+
fn set_after_for_rtt_and_attempts(self: *Timeout, prng: *stdx.PRNG) void {
|
|
760
|
+
// If `after` is reduced by this function to less than `ticks`, then `fired()` will panic:
|
|
761
|
+
assert(self.ticks == 0);
|
|
762
|
+
assert(self.rtt > 0);
|
|
763
|
+
|
|
764
|
+
const after = (self.rtt * self.rtt_multiple) + exponential_backoff_with_jitter(
|
|
765
|
+
prng,
|
|
766
|
+
constants.backoff_min_ticks,
|
|
767
|
+
constants.backoff_max_ticks,
|
|
768
|
+
self.attempts,
|
|
769
|
+
);
|
|
770
|
+
|
|
771
|
+
// TODO Clamp `after` to min/max tick bounds for timeout.
|
|
772
|
+
|
|
773
|
+
log.debug("{}: {s} after={}..{} (rtt={} min={} max={} attempts={})", .{
|
|
774
|
+
self.id,
|
|
775
|
+
self.name,
|
|
776
|
+
self.after_dynamic.?,
|
|
777
|
+
after,
|
|
778
|
+
self.rtt,
|
|
779
|
+
constants.backoff_min_ticks,
|
|
780
|
+
constants.backoff_max_ticks,
|
|
781
|
+
self.attempts,
|
|
782
|
+
});
|
|
783
|
+
|
|
784
|
+
self.after_dynamic = after;
|
|
785
|
+
assert(self.after_dynamic.? > 0);
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
pub fn set_rtt_ns(self: *Timeout, rtt_ns: u64) void {
|
|
789
|
+
assert(self.rtt > 0);
|
|
790
|
+
|
|
791
|
+
const rtt_ms = @divFloor(rtt_ns, std.time.ns_per_ms);
|
|
792
|
+
const rtt_ticks = @max(1, @divFloor(rtt_ms, constants.tick_ms));
|
|
793
|
+
const rtt_ticks_clamped = @min(rtt_ticks, constants.rtt_max_ticks);
|
|
794
|
+
|
|
795
|
+
if (self.rtt != rtt_ticks_clamped) {
|
|
796
|
+
log.debug("{}: {s} rtt={}..{}", .{
|
|
797
|
+
self.id,
|
|
798
|
+
self.name,
|
|
799
|
+
self.rtt,
|
|
800
|
+
rtt_ticks_clamped,
|
|
801
|
+
});
|
|
802
|
+
|
|
803
|
+
self.rtt = rtt_ticks_clamped;
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
pub fn start(self: *Timeout) void {
|
|
808
|
+
self.attempts = 0;
|
|
809
|
+
self.after_dynamic = self.after;
|
|
810
|
+
self.ticks = 0;
|
|
811
|
+
self.ticking = true;
|
|
812
|
+
// TODO Use self.prng to adjust for rtt and attempts.
|
|
813
|
+
log.debug("{}: {s} started", .{ self.id, self.name });
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
pub fn stop(self: *Timeout) void {
|
|
817
|
+
self.attempts = 0;
|
|
818
|
+
self.after_dynamic = null;
|
|
819
|
+
self.ticks = 0;
|
|
820
|
+
self.ticking = false;
|
|
821
|
+
log.debug("{}: {s} stopped", .{ self.id, self.name });
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
pub fn tick(self: *Timeout) void {
|
|
825
|
+
if (self.ticking) self.ticks += 1;
|
|
826
|
+
}
|
|
827
|
+
};
|
|
828
|
+
|
|
829
|
+
/// Calculates exponential backoff with jitter to prevent cascading failure due to thundering herds.
|
|
830
|
+
pub fn exponential_backoff_with_jitter(
|
|
831
|
+
prng: *stdx.PRNG,
|
|
832
|
+
min: u64,
|
|
833
|
+
max: u64,
|
|
834
|
+
attempt: u64,
|
|
835
|
+
) u64 {
|
|
836
|
+
assert(max > min);
|
|
837
|
+
|
|
838
|
+
// Do not use `@truncate(u6, attempt)` since that only discards the high bits:
|
|
839
|
+
// We want a saturating exponent here instead.
|
|
840
|
+
const exponent: u6 = @intCast(@min(std.math.maxInt(u6), attempt));
|
|
841
|
+
|
|
842
|
+
// A "1" shifted left gives any power of two:
|
|
843
|
+
// 1<<0 = 1, 1<<1 = 2, 1<<2 = 4, 1<<3 = 8
|
|
844
|
+
const power = std.math.shlExact(u128, 1, exponent) catch unreachable; // Do not truncate.
|
|
845
|
+
|
|
846
|
+
// Ensure that `backoff` is calculated correctly when min is 0, taking `@max(1, min)`.
|
|
847
|
+
// Otherwise, the final result will always be 0. This was an actual bug we encountered.
|
|
848
|
+
const min_non_zero = @max(1, min);
|
|
849
|
+
assert(min_non_zero > 0);
|
|
850
|
+
assert(power > 0);
|
|
851
|
+
|
|
852
|
+
// Calculate the capped exponential backoff component, `min(range, min * 2 ^ attempt)`:
|
|
853
|
+
const backoff = @min(max - min, min_non_zero * power);
|
|
854
|
+
const jitter = prng.int_inclusive(u64, backoff);
|
|
855
|
+
|
|
856
|
+
const result: u64 = @intCast(min + jitter);
|
|
857
|
+
assert(result >= min);
|
|
858
|
+
assert(result <= max);
|
|
859
|
+
|
|
860
|
+
return result;
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
test "exponential_backoff_with_jitter" {
|
|
864
|
+
var prng = stdx.PRNG.from_seed_testing();
|
|
865
|
+
|
|
866
|
+
const attempts = 1000;
|
|
867
|
+
const max: u64 = std.math.maxInt(u64);
|
|
868
|
+
const min = max - attempts;
|
|
869
|
+
|
|
870
|
+
var attempt = max - attempts;
|
|
871
|
+
while (attempt < max) : (attempt += 1) {
|
|
872
|
+
const ebwj = exponential_backoff_with_jitter(&prng, min, max, attempt);
|
|
873
|
+
try std.testing.expect(ebwj >= min);
|
|
874
|
+
try std.testing.expect(ebwj <= max);
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
/// Returns An array containing the remote or local addresses of each of the 2f + 1 replicas:
|
|
879
|
+
/// Unlike the VRR paper, we do not sort the array but leave the order explicitly to the user.
|
|
880
|
+
/// There are several advantages to this:
|
|
881
|
+
/// * The operator may deploy a cluster with proximity in mind since replication follows order.
|
|
882
|
+
/// * A replica's IP address may be changed without reconfiguration.
|
|
883
|
+
/// This does require that the user specify the same order to all replicas.
|
|
884
|
+
/// The caller owns the memory of the returned slice of addresses.
|
|
885
|
+
pub fn parse_addresses(
|
|
886
|
+
raw: []const u8,
|
|
887
|
+
out_buffer: []std.net.Address,
|
|
888
|
+
) ![]std.net.Address {
|
|
889
|
+
const address_count = std.mem.count(u8, raw, ",") + 1;
|
|
890
|
+
if (address_count > out_buffer.len) return error.AddressLimitExceeded;
|
|
891
|
+
|
|
892
|
+
var index: usize = 0;
|
|
893
|
+
var comma_iterator = std.mem.splitScalar(u8, raw, ',');
|
|
894
|
+
while (comma_iterator.next()) |raw_address| : (index += 1) {
|
|
895
|
+
assert(index < out_buffer.len);
|
|
896
|
+
if (raw_address.len == 0) return error.AddressHasTrailingComma;
|
|
897
|
+
out_buffer[index] = try parse_address_and_port(.{
|
|
898
|
+
.string = raw_address,
|
|
899
|
+
.port_default = constants.port,
|
|
900
|
+
});
|
|
901
|
+
}
|
|
902
|
+
assert(index == address_count);
|
|
903
|
+
|
|
904
|
+
return out_buffer[0..address_count];
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
pub fn parse_address_and_port(
|
|
908
|
+
options: struct {
|
|
909
|
+
string: []const u8,
|
|
910
|
+
port_default: u16,
|
|
911
|
+
},
|
|
912
|
+
) !std.net.Address {
|
|
913
|
+
assert(options.string.len > 0);
|
|
914
|
+
assert(options.port_default > 0);
|
|
915
|
+
|
|
916
|
+
if (std.mem.lastIndexOfAny(u8, options.string, ":.]")) |split| {
|
|
917
|
+
if (options.string[split] == ':') {
|
|
918
|
+
return parse_address(
|
|
919
|
+
options.string[0..split],
|
|
920
|
+
std.fmt.parseUnsigned(
|
|
921
|
+
u16,
|
|
922
|
+
options.string[split + 1 ..],
|
|
923
|
+
10,
|
|
924
|
+
) catch |err| switch (err) {
|
|
925
|
+
error.Overflow => return error.PortOverflow,
|
|
926
|
+
error.InvalidCharacter => return error.PortInvalid,
|
|
927
|
+
},
|
|
928
|
+
);
|
|
929
|
+
} else {
|
|
930
|
+
return parse_address(options.string, options.port_default);
|
|
931
|
+
}
|
|
932
|
+
} else {
|
|
933
|
+
return std.net.Address.parseIp4(
|
|
934
|
+
constants.address,
|
|
935
|
+
std.fmt.parseUnsigned(u16, options.string, 10) catch |err| switch (err) {
|
|
936
|
+
error.Overflow => return error.PortOverflow,
|
|
937
|
+
error.InvalidCharacter => return error.AddressInvalid,
|
|
938
|
+
},
|
|
939
|
+
) catch unreachable;
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
fn parse_address(string: []const u8, port: u16) !std.net.Address {
|
|
944
|
+
if (string.len == 0) return error.AddressInvalid;
|
|
945
|
+
if (string[string.len - 1] == ':') return error.AddressHasMoreThanOneColon;
|
|
946
|
+
|
|
947
|
+
if (string[0] == '[' and string[string.len - 1] == ']') {
|
|
948
|
+
return std.net.Address.parseIp6(string[1 .. string.len - 1], port) catch {
|
|
949
|
+
return error.AddressInvalid;
|
|
950
|
+
};
|
|
951
|
+
} else {
|
|
952
|
+
return std.net.Address.parseIp4(string, port) catch return error.AddressInvalid;
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
test parse_addresses {
|
|
957
|
+
const vectors_positive = &[_]struct {
|
|
958
|
+
raw: []const u8,
|
|
959
|
+
addresses: []const std.net.Address,
|
|
960
|
+
}{
|
|
961
|
+
.{
|
|
962
|
+
// Test the minimum/maximum address/port.
|
|
963
|
+
.raw = "1.2.3.4:567,0.0.0.0:0,255.255.255.255:65535",
|
|
964
|
+
.addresses = &[3]std.net.Address{
|
|
965
|
+
std.net.Address.initIp4([_]u8{ 1, 2, 3, 4 }, 567),
|
|
966
|
+
std.net.Address.initIp4([_]u8{ 0, 0, 0, 0 }, 0),
|
|
967
|
+
std.net.Address.initIp4([_]u8{ 255, 255, 255, 255 }, 65535),
|
|
968
|
+
},
|
|
969
|
+
},
|
|
970
|
+
.{
|
|
971
|
+
// Addresses are not reordered.
|
|
972
|
+
.raw = "3.4.5.6:7777,200.3.4.5:6666,1.2.3.4:5555",
|
|
973
|
+
.addresses = &[3]std.net.Address{
|
|
974
|
+
std.net.Address.initIp4([_]u8{ 3, 4, 5, 6 }, 7777),
|
|
975
|
+
std.net.Address.initIp4([_]u8{ 200, 3, 4, 5 }, 6666),
|
|
976
|
+
std.net.Address.initIp4([_]u8{ 1, 2, 3, 4 }, 5555),
|
|
977
|
+
},
|
|
978
|
+
},
|
|
979
|
+
.{
|
|
980
|
+
// Test default address and port.
|
|
981
|
+
.raw = "1.2.3.4:5,4321,2.3.4.5",
|
|
982
|
+
.addresses = &[3]std.net.Address{
|
|
983
|
+
std.net.Address.initIp4([_]u8{ 1, 2, 3, 4 }, 5),
|
|
984
|
+
try std.net.Address.parseIp4(constants.address, 4321),
|
|
985
|
+
std.net.Address.initIp4([_]u8{ 2, 3, 4, 5 }, constants.port),
|
|
986
|
+
},
|
|
987
|
+
},
|
|
988
|
+
.{
|
|
989
|
+
// Test addresses less than address_limit.
|
|
990
|
+
.raw = "1.2.3.4:5,4321",
|
|
991
|
+
.addresses = &[2]std.net.Address{
|
|
992
|
+
std.net.Address.initIp4([_]u8{ 1, 2, 3, 4 }, 5),
|
|
993
|
+
try std.net.Address.parseIp4(constants.address, 4321),
|
|
994
|
+
},
|
|
995
|
+
},
|
|
996
|
+
.{
|
|
997
|
+
// Test IPv6 address with default port.
|
|
998
|
+
.raw = "[fe80::1ff:fe23:4567:890a]",
|
|
999
|
+
.addresses = &[_]std.net.Address{
|
|
1000
|
+
std.net.Address.initIp6(
|
|
1001
|
+
[_]u8{
|
|
1002
|
+
0xfe, 0x80,
|
|
1003
|
+
0, 0,
|
|
1004
|
+
0, 0,
|
|
1005
|
+
0, 0,
|
|
1006
|
+
0x01, 0xff,
|
|
1007
|
+
0xfe, 0x23,
|
|
1008
|
+
0x45, 0x67,
|
|
1009
|
+
0x89, 0x0a,
|
|
1010
|
+
},
|
|
1011
|
+
constants.port,
|
|
1012
|
+
0,
|
|
1013
|
+
0,
|
|
1014
|
+
),
|
|
1015
|
+
},
|
|
1016
|
+
},
|
|
1017
|
+
.{
|
|
1018
|
+
// Test IPv6 address with port.
|
|
1019
|
+
.raw = "[fe80::1ff:fe23:4567:890a]:1234",
|
|
1020
|
+
.addresses = &[_]std.net.Address{
|
|
1021
|
+
std.net.Address.initIp6(
|
|
1022
|
+
[_]u8{
|
|
1023
|
+
0xfe, 0x80,
|
|
1024
|
+
0, 0,
|
|
1025
|
+
0, 0,
|
|
1026
|
+
0, 0,
|
|
1027
|
+
0x01, 0xff,
|
|
1028
|
+
0xfe, 0x23,
|
|
1029
|
+
0x45, 0x67,
|
|
1030
|
+
0x89, 0x0a,
|
|
1031
|
+
},
|
|
1032
|
+
1234,
|
|
1033
|
+
0,
|
|
1034
|
+
0,
|
|
1035
|
+
),
|
|
1036
|
+
},
|
|
1037
|
+
},
|
|
1038
|
+
};
|
|
1039
|
+
|
|
1040
|
+
const vectors_negative = &[_]struct {
|
|
1041
|
+
raw: []const u8,
|
|
1042
|
+
err: anyerror![]std.net.Address,
|
|
1043
|
+
}{
|
|
1044
|
+
.{ .raw = "", .err = error.AddressHasTrailingComma },
|
|
1045
|
+
.{ .raw = ".", .err = error.AddressInvalid },
|
|
1046
|
+
.{ .raw = ":", .err = error.PortInvalid },
|
|
1047
|
+
.{ .raw = ":92", .err = error.AddressInvalid },
|
|
1048
|
+
.{ .raw = "1.2.3.4:5,2.3.4.5:6,4.5.6.7:8", .err = error.AddressLimitExceeded },
|
|
1049
|
+
.{ .raw = "1.2.3.4:7777,", .err = error.AddressHasTrailingComma },
|
|
1050
|
+
.{ .raw = "1.2.3.4:7777,2.3.4.5::8888", .err = error.AddressHasMoreThanOneColon },
|
|
1051
|
+
.{ .raw = "1.2.3.4:5,A", .err = error.AddressInvalid }, // default port
|
|
1052
|
+
.{ .raw = "1.2.3.4:5,2.a.4.5", .err = error.AddressInvalid }, // default port
|
|
1053
|
+
.{ .raw = "1.2.3.4:5,2.a.4.5:6", .err = error.AddressInvalid }, // specified port
|
|
1054
|
+
.{ .raw = "1.2.3.4:5,2.3.4.5:", .err = error.PortInvalid },
|
|
1055
|
+
.{ .raw = "1.2.3.4:5,2.3.4.5:A", .err = error.PortInvalid },
|
|
1056
|
+
.{ .raw = "1.2.3.4:5,65536", .err = error.PortOverflow }, // default address
|
|
1057
|
+
.{ .raw = "1.2.3.4:5,2.3.4.5:65536", .err = error.PortOverflow },
|
|
1058
|
+
};
|
|
1059
|
+
|
|
1060
|
+
var buffer: [3]std.net.Address = undefined;
|
|
1061
|
+
for (vectors_positive) |vector| {
|
|
1062
|
+
const addresses_actual = try parse_addresses(vector.raw, &buffer);
|
|
1063
|
+
|
|
1064
|
+
try std.testing.expectEqual(addresses_actual.len, vector.addresses.len);
|
|
1065
|
+
for (vector.addresses, 0..) |address_expect, i| {
|
|
1066
|
+
const address_actual = addresses_actual[i];
|
|
1067
|
+
try std.testing.expectEqual(address_expect.in.sa.family, address_actual.in.sa.family);
|
|
1068
|
+
try std.testing.expectEqual(address_expect.in.sa.port, address_actual.in.sa.port);
|
|
1069
|
+
try std.testing.expectEqual(address_expect.in.sa.addr, address_actual.in.sa.addr);
|
|
1070
|
+
try std.testing.expectEqual(address_expect.in.sa.zero, address_actual.in.sa.zero);
|
|
1071
|
+
}
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
for (vectors_negative) |vector| {
|
|
1075
|
+
try std.testing.expectEqual(
|
|
1076
|
+
vector.err,
|
|
1077
|
+
parse_addresses(vector.raw, buffer[0..2]),
|
|
1078
|
+
);
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
test "parse_addresses: fuzz" {
|
|
1083
|
+
const test_count = 1024;
|
|
1084
|
+
const input_size_max = 32;
|
|
1085
|
+
const alphabet = " \t\n,:[]0123456789abcdefgABCDEFGXx";
|
|
1086
|
+
|
|
1087
|
+
var prng = stdx.PRNG.from_seed_testing();
|
|
1088
|
+
|
|
1089
|
+
var input_bufer: [input_size_max]u8 = @splat(0);
|
|
1090
|
+
var buffer: [3]std.net.Address = undefined;
|
|
1091
|
+
for (0..test_count) |_| {
|
|
1092
|
+
const input_size = prng.int_inclusive(usize, input_size_max);
|
|
1093
|
+
const input = input_bufer[0..input_size];
|
|
1094
|
+
for (input) |*c| {
|
|
1095
|
+
c.* = alphabet[prng.index(alphabet)];
|
|
1096
|
+
}
|
|
1097
|
+
if (parse_addresses(input, &buffer)) |addresses| {
|
|
1098
|
+
assert(addresses.len > 0);
|
|
1099
|
+
assert(addresses.len <= 3);
|
|
1100
|
+
} else |_| {}
|
|
1101
|
+
}
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
pub fn sector_floor(offset: u64) u64 {
|
|
1105
|
+
const sectors = math.divFloor(u64, offset, constants.sector_size) catch unreachable;
|
|
1106
|
+
return sectors * constants.sector_size;
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
pub fn sector_ceil(offset: u64) u64 {
|
|
1110
|
+
const sectors = math.divCeil(u64, offset, constants.sector_size) catch unreachable;
|
|
1111
|
+
return sectors * constants.sector_size;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
pub fn quorums(replica_count: u8) struct {
|
|
1115
|
+
replication: u8,
|
|
1116
|
+
view_change: u8,
|
|
1117
|
+
nack_prepare: u8,
|
|
1118
|
+
majority: u8,
|
|
1119
|
+
upgrade: u8,
|
|
1120
|
+
} {
|
|
1121
|
+
assert(replica_count > 0);
|
|
1122
|
+
|
|
1123
|
+
assert(constants.quorum_replication_max >= 2);
|
|
1124
|
+
// For replica_count=2, set quorum_replication=2 even though =1 would intersect.
|
|
1125
|
+
// This improves durability of small clusters.
|
|
1126
|
+
const quorum_replication = if (replica_count == 2) 2 else @min(
|
|
1127
|
+
constants.quorum_replication_max,
|
|
1128
|
+
stdx.div_ceil(replica_count, 2),
|
|
1129
|
+
);
|
|
1130
|
+
assert(quorum_replication <= replica_count);
|
|
1131
|
+
assert(quorum_replication >= 2 or quorum_replication == replica_count);
|
|
1132
|
+
|
|
1133
|
+
// For replica_count=2, set quorum_view_change=2 even though =1 would intersect.
|
|
1134
|
+
// This avoids special cases for a single-replica view-change in Replica.
|
|
1135
|
+
const quorum_view_change =
|
|
1136
|
+
if (replica_count == 2) 2 else replica_count - quorum_replication + 1;
|
|
1137
|
+
// The view change quorum may be more expensive to make the replication quorum cheaper.
|
|
1138
|
+
// The insight is that the replication phase is by far more common than the view change.
|
|
1139
|
+
// This trade-off allows us to optimize for the common case.
|
|
1140
|
+
// See the comments in `constants.zig` for further explanation.
|
|
1141
|
+
assert(quorum_view_change <= replica_count);
|
|
1142
|
+
assert(quorum_view_change >= 2 or quorum_view_change == replica_count);
|
|
1143
|
+
assert(quorum_view_change >= @divFloor(replica_count, 2) + 1);
|
|
1144
|
+
assert(quorum_view_change + quorum_replication > replica_count);
|
|
1145
|
+
|
|
1146
|
+
// We need to have enough nacks to guarantee that `quorum_replication` was not reached,
|
|
1147
|
+
// because if the replication quorum was reached, then it may have been committed.
|
|
1148
|
+
const quorum_nack_prepare = replica_count - quorum_replication + 1;
|
|
1149
|
+
assert(quorum_nack_prepare + quorum_replication > replica_count);
|
|
1150
|
+
|
|
1151
|
+
const quorum_majority =
|
|
1152
|
+
stdx.div_ceil(replica_count, 2) + @intFromBool(@mod(replica_count, 2) == 0);
|
|
1153
|
+
assert(quorum_majority <= replica_count);
|
|
1154
|
+
assert(quorum_majority > @divFloor(replica_count, 2));
|
|
1155
|
+
|
|
1156
|
+
// A majority quorum (i.e. `max(quorum_replication, quorum_view_change)`) is required
|
|
1157
|
+
// to ensure that the upgraded cluster can both commit and view-change.
|
|
1158
|
+
//
|
|
1159
|
+
// However, we farther require that all replicas can upgrade. In most cases, not upgrading all
|
|
1160
|
+
// replicas together would be a mistake (leading to replicas lagging and needing to state sync).
|
|
1161
|
+
// If an upgrade is needed while the cluster is compromised, then it should be a hotfix upgrade
|
|
1162
|
+
// (i.e. to a build tagged with the same release).
|
|
1163
|
+
const quorum_upgrade = replica_count;
|
|
1164
|
+
assert(quorum_upgrade <= replica_count);
|
|
1165
|
+
assert(quorum_upgrade >= quorum_replication);
|
|
1166
|
+
assert(quorum_upgrade >= quorum_view_change);
|
|
1167
|
+
|
|
1168
|
+
return .{
|
|
1169
|
+
.replication = quorum_replication,
|
|
1170
|
+
.view_change = quorum_view_change,
|
|
1171
|
+
.nack_prepare = quorum_nack_prepare,
|
|
1172
|
+
.majority = quorum_majority,
|
|
1173
|
+
.upgrade = quorum_upgrade,
|
|
1174
|
+
};
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
test "quorums" {
|
|
1178
|
+
if (constants.quorum_replication_max != 3) return error.SkipZigTest;
|
|
1179
|
+
|
|
1180
|
+
const expect_replication = [_]u8{ 1, 2, 2, 2, 3, 3, 3, 3 };
|
|
1181
|
+
const expect_view_change = [_]u8{ 1, 2, 2, 3, 3, 4, 5, 6 };
|
|
1182
|
+
const expect_nack_prepare = [_]u8{ 1, 1, 2, 3, 3, 4, 5, 6 };
|
|
1183
|
+
const expect_majority = [_]u8{ 1, 2, 2, 3, 3, 4, 4, 5 };
|
|
1184
|
+
const expect_upgrade = [_]u8{ 1, 2, 3, 4, 5, 6, 7, 8 };
|
|
1185
|
+
|
|
1186
|
+
for (expect_replication[0..], 0..) |_, i| {
|
|
1187
|
+
const replicas = @as(u8, @intCast(i)) + 1;
|
|
1188
|
+
const actual = quorums(replicas);
|
|
1189
|
+
try std.testing.expectEqual(actual.replication, expect_replication[i]);
|
|
1190
|
+
try std.testing.expectEqual(actual.view_change, expect_view_change[i]);
|
|
1191
|
+
try std.testing.expectEqual(actual.nack_prepare, expect_nack_prepare[i]);
|
|
1192
|
+
try std.testing.expectEqual(actual.majority, expect_majority[i]);
|
|
1193
|
+
try std.testing.expectEqual(actual.upgrade, expect_upgrade[i]);
|
|
1194
|
+
|
|
1195
|
+
// The nack quorum only differs from the view-change quorum when R=2.
|
|
1196
|
+
if (replicas == 2) {
|
|
1197
|
+
try std.testing.expectEqual(actual.nack_prepare, 1);
|
|
1198
|
+
} else {
|
|
1199
|
+
try std.testing.expectEqual(actual.nack_prepare, actual.view_change);
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
/// Set of replica_ids of cluster members, where order of ids determines replica indexes.
|
|
1205
|
+
///
|
|
1206
|
+
/// First replica_count elements are active replicas,
|
|
1207
|
+
/// then standby_count standbys, the rest are zeros.
|
|
1208
|
+
/// Order determines ring topology for replication.
|
|
1209
|
+
pub const Members = [constants.members_max]u128;
|
|
1210
|
+
|
|
1211
|
+
/// Deterministically assigns replica_ids for the initial configuration.
|
|
1212
|
+
///
|
|
1213
|
+
/// Eventually, we want to identify replicas using random u128 ids to prevent operator errors.
|
|
1214
|
+
/// However, that requires unergonomic two-step process for spinning a new cluster up. To avoid
|
|
1215
|
+
/// needlessly compromising the experience until reconfiguration is fully implemented, derive
|
|
1216
|
+
/// replica ids for the initial cluster deterministically.
|
|
1217
|
+
pub fn root_members(cluster: u128) Members {
|
|
1218
|
+
const IdSeed = extern struct {
|
|
1219
|
+
cluster_config_checksum: u128 align(1),
|
|
1220
|
+
cluster: u128 align(1),
|
|
1221
|
+
replica: u8 align(1),
|
|
1222
|
+
};
|
|
1223
|
+
comptime assert(@sizeOf(IdSeed) == 33);
|
|
1224
|
+
|
|
1225
|
+
var result: [constants.members_max]u128 = @splat(0);
|
|
1226
|
+
var replica: u8 = 0;
|
|
1227
|
+
while (replica < constants.members_max) : (replica += 1) {
|
|
1228
|
+
const seed = IdSeed{
|
|
1229
|
+
.cluster_config_checksum = constants.config.cluster.checksum(),
|
|
1230
|
+
.cluster = cluster,
|
|
1231
|
+
.replica = replica,
|
|
1232
|
+
};
|
|
1233
|
+
result[replica] = checksum(std.mem.asBytes(&seed));
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
assert(valid_members(&result));
|
|
1237
|
+
return result;
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
/// Check that:
|
|
1241
|
+
/// - all non-zero elements are different
|
|
1242
|
+
/// - all zero elements are trailing
|
|
1243
|
+
pub fn valid_members(members: *const Members) bool {
|
|
1244
|
+
for (members, 0..) |replica_i, i| {
|
|
1245
|
+
for (members[0..i]) |replica_j| {
|
|
1246
|
+
if (replica_j == 0 and replica_i != 0) return false;
|
|
1247
|
+
if (replica_j != 0 and replica_j == replica_i) return false;
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
return true;
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
fn member_count(members: *const Members) u8 {
|
|
1254
|
+
for (members, 0..) |member, index| {
|
|
1255
|
+
if (member == 0) return @intCast(index);
|
|
1256
|
+
}
|
|
1257
|
+
return constants.members_max;
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
pub fn member_index(members: *const Members, replica_id: u128) ?u8 {
|
|
1261
|
+
assert(replica_id != 0);
|
|
1262
|
+
assert(valid_members(members));
|
|
1263
|
+
for (members, 0..) |member, replica_index| {
|
|
1264
|
+
if (member == replica_id) return @intCast(replica_index);
|
|
1265
|
+
} else return null;
|
|
1266
|
+
}
|
|
1267
|
+
|
|
1268
|
+
pub const Headers = struct {
|
|
1269
|
+
pub const Array = stdx.BoundedArrayType(Header.Prepare, constants.view_headers_max);
|
|
1270
|
+
/// The SuperBlock's persisted VSR headers.
|
|
1271
|
+
/// One of the following:
|
|
1272
|
+
///
|
|
1273
|
+
/// - SV headers (consecutive chain)
|
|
1274
|
+
/// - DVC headers (disjoint chain)
|
|
1275
|
+
pub const ViewChangeSlice = ViewChangeHeadersSlice;
|
|
1276
|
+
pub const ViewChangeArray = ViewChangeHeadersArray;
|
|
1277
|
+
|
|
1278
|
+
fn dvc_blank(op: u64) Header.Prepare {
|
|
1279
|
+
return .{
|
|
1280
|
+
.command = .prepare,
|
|
1281
|
+
.release = Release.zero,
|
|
1282
|
+
.operation = .reserved,
|
|
1283
|
+
.op = op,
|
|
1284
|
+
.cluster = 0,
|
|
1285
|
+
.view = 0,
|
|
1286
|
+
.request_checksum = 0,
|
|
1287
|
+
.checkpoint_id = 0,
|
|
1288
|
+
.parent = 0,
|
|
1289
|
+
.client = 0,
|
|
1290
|
+
.commit = 0,
|
|
1291
|
+
.timestamp = 0,
|
|
1292
|
+
.request = 0,
|
|
1293
|
+
};
|
|
1294
|
+
}
|
|
1295
|
+
|
|
1296
|
+
pub fn dvc_header_type(header: *const Header.Prepare) enum { blank, valid } {
|
|
1297
|
+
if (std.meta.eql(header.*, Headers.dvc_blank(header.op))) return .blank;
|
|
1298
|
+
|
|
1299
|
+
assert(header.valid_checksum());
|
|
1300
|
+
assert(header.command == .prepare);
|
|
1301
|
+
assert(header.operation != .reserved);
|
|
1302
|
+
assert(header.invalid() == null);
|
|
1303
|
+
return .valid;
|
|
1304
|
+
}
|
|
1305
|
+
};
|
|
1306
|
+
|
|
1307
|
+
pub const ViewChangeCommand = enum { do_view_change, start_view };
|
|
1308
|
+
|
|
1309
|
+
const ViewChangeHeadersSlice = struct {
|
|
1310
|
+
command: ViewChangeCommand,
|
|
1311
|
+
/// Headers are ordered from high-to-low op.
|
|
1312
|
+
slice: []const Header.Prepare,
|
|
1313
|
+
|
|
1314
|
+
pub fn init(
|
|
1315
|
+
command: ViewChangeCommand,
|
|
1316
|
+
slice: []const Header.Prepare,
|
|
1317
|
+
) ViewChangeHeadersSlice {
|
|
1318
|
+
const headers = ViewChangeHeadersSlice{
|
|
1319
|
+
.command = command,
|
|
1320
|
+
.slice = slice,
|
|
1321
|
+
};
|
|
1322
|
+
headers.verify();
|
|
1323
|
+
return headers;
|
|
1324
|
+
}
|
|
1325
|
+
|
|
1326
|
+
pub fn verify(headers: ViewChangeHeadersSlice) void {
|
|
1327
|
+
assert(headers.slice.len > 0);
|
|
1328
|
+
assert(headers.slice.len <= constants.view_headers_max);
|
|
1329
|
+
|
|
1330
|
+
const head = &headers.slice[0];
|
|
1331
|
+
// A DVC's head op is never a gap or faulty.
|
|
1332
|
+
// A SV never includes gaps or faulty headers.
|
|
1333
|
+
assert(Headers.dvc_header_type(head) == .valid);
|
|
1334
|
+
|
|
1335
|
+
var child = head;
|
|
1336
|
+
for (headers.slice[1..], 0..) |*header, i| {
|
|
1337
|
+
const index = i + 1;
|
|
1338
|
+
assert(header.command == .prepare);
|
|
1339
|
+
maybe(header.operation == .reserved);
|
|
1340
|
+
assert(header.op < child.op);
|
|
1341
|
+
|
|
1342
|
+
// DVC: Ops are consecutive (with explicit blank headers).
|
|
1343
|
+
// SV: The first "pipeline + 1" ops of the SV are consecutive.
|
|
1344
|
+
if (headers.command == .do_view_change or
|
|
1345
|
+
(headers.command == .start_view and
|
|
1346
|
+
index < constants.pipeline_prepare_queue_max + 1))
|
|
1347
|
+
{
|
|
1348
|
+
assert(header.op == head.op - index);
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
switch (Headers.dvc_header_type(header)) {
|
|
1352
|
+
.blank => {
|
|
1353
|
+
// We can't verify that SV headers contain no gaps headers here:
|
|
1354
|
+
// superblock.checkpoint could make .do_view_change headers durable instead of
|
|
1355
|
+
// .start_view headers when view == log_view (see `commit_checkpoint_superblock`
|
|
1356
|
+
// in `replica.zig`). When these headers are loaded from the superblock on
|
|
1357
|
+
// startup, they are considered to be .start_view headers (see `view_headers` in
|
|
1358
|
+
// `superblock.zig`).
|
|
1359
|
+
maybe(headers.command == .do_view_change);
|
|
1360
|
+
maybe(headers.command == .start_view);
|
|
1361
|
+
continue; // Don't update "child".
|
|
1362
|
+
},
|
|
1363
|
+
.valid => {
|
|
1364
|
+
assert(header.view <= child.view);
|
|
1365
|
+
assert(header.timestamp < child.timestamp);
|
|
1366
|
+
if (header.op + 1 == child.op) {
|
|
1367
|
+
assert(header.checksum == child.parent);
|
|
1368
|
+
}
|
|
1369
|
+
},
|
|
1370
|
+
}
|
|
1371
|
+
child = header;
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
const ViewRange = struct {
|
|
1376
|
+
min: u32, // inclusive
|
|
1377
|
+
max: u32, // inclusive
|
|
1378
|
+
|
|
1379
|
+
pub fn contains(range: ViewRange, view: u32) bool {
|
|
1380
|
+
return range.min <= view and view <= range.max;
|
|
1381
|
+
}
|
|
1382
|
+
};
|
|
1383
|
+
|
|
1384
|
+
/// Returns the range of possible views (of prepare, not commit) for a message that is part of
|
|
1385
|
+
/// the same log_view as these headers.
|
|
1386
|
+
///
|
|
1387
|
+
/// - When these are DVC headers for a log_view=V, we must be in view_change status working to
|
|
1388
|
+
/// transition to a view beyond V. So we will never prepare anything else as part of view V.
|
|
1389
|
+
/// - When these are SV headers for a log_view=V, we can continue to add to them (by preparing
|
|
1390
|
+
/// more ops), but those ops will always be part of the log_view. If they were prepared during
|
|
1391
|
+
/// a view prior to the log_view, they would already be part of the headers.
|
|
1392
|
+
pub fn view_for_op(headers: ViewChangeHeadersSlice, op: u64, log_view: u32) ViewRange {
|
|
1393
|
+
const header_newest = &headers.slice[0];
|
|
1394
|
+
const header_oldest = blk: {
|
|
1395
|
+
var oldest: ?usize = null;
|
|
1396
|
+
for (headers.slice, 0..) |*header, i| {
|
|
1397
|
+
switch (Headers.dvc_header_type(header)) {
|
|
1398
|
+
.blank => assert(i > 0),
|
|
1399
|
+
.valid => oldest = i,
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
break :blk &headers.slice[oldest.?];
|
|
1403
|
+
};
|
|
1404
|
+
assert(header_newest.view <= log_view);
|
|
1405
|
+
assert(header_newest.view >= header_oldest.view);
|
|
1406
|
+
assert(header_newest.op >= header_oldest.op);
|
|
1407
|
+
|
|
1408
|
+
if (op < header_oldest.op) return .{ .min = 0, .max = header_oldest.view };
|
|
1409
|
+
if (op > header_newest.op) return .{ .min = log_view, .max = log_view };
|
|
1410
|
+
|
|
1411
|
+
for (headers.slice) |*header| {
|
|
1412
|
+
if (Headers.dvc_header_type(header) == .valid and header.op == op) {
|
|
1413
|
+
return .{ .min = header.view, .max = header.view };
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1417
|
+
var header_next = &headers.slice[0];
|
|
1418
|
+
assert(Headers.dvc_header_type(header_next) == .valid);
|
|
1419
|
+
|
|
1420
|
+
for (headers.slice[1..]) |*header_prev| {
|
|
1421
|
+
if (Headers.dvc_header_type(header_prev) == .valid) {
|
|
1422
|
+
if (header_prev.op < op and op < header_next.op) {
|
|
1423
|
+
return .{ .min = header_prev.view, .max = header_next.view };
|
|
1424
|
+
}
|
|
1425
|
+
header_next = header_prev;
|
|
1426
|
+
}
|
|
1427
|
+
}
|
|
1428
|
+
unreachable;
|
|
1429
|
+
}
|
|
1430
|
+
};
|
|
1431
|
+
|
|
1432
|
+
test "Headers.ViewChangeSlice.view_for_op" {
|
|
1433
|
+
var headers_array = [_]Header.Prepare{
|
|
1434
|
+
std.mem.zeroInit(Header.Prepare, .{
|
|
1435
|
+
.checksum = undefined,
|
|
1436
|
+
.client = 6,
|
|
1437
|
+
.request = 7,
|
|
1438
|
+
.command = .prepare,
|
|
1439
|
+
.release = Release.minimum,
|
|
1440
|
+
.operation = @as(Operation, @enumFromInt(constants.vsr_operations_reserved + 8)),
|
|
1441
|
+
.op = 9,
|
|
1442
|
+
.view = 10,
|
|
1443
|
+
.timestamp = 11,
|
|
1444
|
+
}),
|
|
1445
|
+
Headers.dvc_blank(8),
|
|
1446
|
+
Headers.dvc_blank(7),
|
|
1447
|
+
std.mem.zeroInit(Header.Prepare, .{
|
|
1448
|
+
.checksum = undefined,
|
|
1449
|
+
.client = 3,
|
|
1450
|
+
.request = 4,
|
|
1451
|
+
.command = .prepare,
|
|
1452
|
+
.release = Release.minimum,
|
|
1453
|
+
.operation = @as(Operation, @enumFromInt(constants.vsr_operations_reserved + 5)),
|
|
1454
|
+
.op = 6,
|
|
1455
|
+
.view = 7,
|
|
1456
|
+
.timestamp = 8,
|
|
1457
|
+
}),
|
|
1458
|
+
Headers.dvc_blank(5),
|
|
1459
|
+
};
|
|
1460
|
+
|
|
1461
|
+
headers_array[0].set_checksum();
|
|
1462
|
+
headers_array[3].set_checksum();
|
|
1463
|
+
|
|
1464
|
+
const headers = Headers.ViewChangeSlice.init(.do_view_change, &headers_array);
|
|
1465
|
+
try std.testing.expect(std.meta.eql(headers.view_for_op(11, 12), .{ .min = 12, .max = 12 }));
|
|
1466
|
+
try std.testing.expect(std.meta.eql(headers.view_for_op(10, 12), .{ .min = 12, .max = 12 }));
|
|
1467
|
+
try std.testing.expect(std.meta.eql(headers.view_for_op(9, 12), .{ .min = 10, .max = 10 }));
|
|
1468
|
+
try std.testing.expect(std.meta.eql(headers.view_for_op(8, 12), .{ .min = 7, .max = 10 }));
|
|
1469
|
+
try std.testing.expect(std.meta.eql(headers.view_for_op(7, 12), .{ .min = 7, .max = 10 }));
|
|
1470
|
+
try std.testing.expect(std.meta.eql(headers.view_for_op(6, 12), .{ .min = 7, .max = 7 }));
|
|
1471
|
+
try std.testing.expect(std.meta.eql(headers.view_for_op(5, 12), .{ .min = 0, .max = 7 }));
|
|
1472
|
+
try std.testing.expect(std.meta.eql(headers.view_for_op(0, 12), .{ .min = 0, .max = 7 }));
|
|
1473
|
+
}
|
|
1474
|
+
|
|
1475
|
+
/// The headers of a SV or DVC message.
|
|
1476
|
+
const ViewChangeHeadersArray = struct {
|
|
1477
|
+
command: ViewChangeCommand,
|
|
1478
|
+
array: Headers.Array,
|
|
1479
|
+
|
|
1480
|
+
pub fn root(cluster: u128) ViewChangeHeadersArray {
|
|
1481
|
+
return ViewChangeHeadersArray.init(.start_view, &.{
|
|
1482
|
+
Header.Prepare.root(cluster),
|
|
1483
|
+
});
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1486
|
+
pub fn init(
|
|
1487
|
+
command: ViewChangeCommand,
|
|
1488
|
+
slice: []const Header.Prepare,
|
|
1489
|
+
) ViewChangeHeadersArray {
|
|
1490
|
+
const headers = ViewChangeHeadersArray{
|
|
1491
|
+
.command = command,
|
|
1492
|
+
.array = Headers.Array.from_slice(slice) catch unreachable,
|
|
1493
|
+
};
|
|
1494
|
+
headers.verify();
|
|
1495
|
+
return headers;
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1498
|
+
pub fn verify(headers: *const ViewChangeHeadersArray) void {
|
|
1499
|
+
(ViewChangeHeadersSlice{
|
|
1500
|
+
.command = headers.command,
|
|
1501
|
+
.slice = headers.array.const_slice(),
|
|
1502
|
+
}).verify();
|
|
1503
|
+
}
|
|
1504
|
+
|
|
1505
|
+
pub fn replace(
|
|
1506
|
+
headers: *ViewChangeHeadersArray,
|
|
1507
|
+
command: ViewChangeCommand,
|
|
1508
|
+
slice: []const Header.Prepare,
|
|
1509
|
+
) void {
|
|
1510
|
+
headers.command = command;
|
|
1511
|
+
headers.array.clear();
|
|
1512
|
+
for (slice) |*header| headers.array.push(header.*);
|
|
1513
|
+
headers.verify();
|
|
1514
|
+
}
|
|
1515
|
+
|
|
1516
|
+
pub fn append(headers: *ViewChangeHeadersArray, header: *const Header.Prepare) void {
|
|
1517
|
+
// We don't do comprehensive validation here — assume that verify() will be called
|
|
1518
|
+
// after any series of appends.
|
|
1519
|
+
headers.array.push(header.*);
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
pub fn append_blank(headers: *ViewChangeHeadersArray, op: u64) void {
|
|
1523
|
+
assert(headers.command == .do_view_change);
|
|
1524
|
+
assert(headers.array.count() > 0);
|
|
1525
|
+
headers.array.push(Headers.dvc_blank(op));
|
|
1526
|
+
}
|
|
1527
|
+
};
|
|
1528
|
+
|
|
1529
|
+
/// For a replica with journal_slot_count=10, lsm_compaction_ops=2, pipeline_prepare_queue_max=2,
|
|
1530
|
+
/// and checkpoint_interval=4, which can be computed as follows:
|
|
1531
|
+
/// journal_slot_count - (lsm_compaction_ops + 2 * pipeline_prepare_queue_max) = 4
|
|
1532
|
+
///
|
|
1533
|
+
/// checkpoint() call 0 1 2 3 4
|
|
1534
|
+
/// op_checkpoint 0 3 7 11 15
|
|
1535
|
+
/// op_checkpoint_next 3 7 11 15 19
|
|
1536
|
+
/// op_checkpoint_next_trigger 5 9 13 17 21
|
|
1537
|
+
///
|
|
1538
|
+
/// commit log (ops) │ write-ahead log (slots)
|
|
1539
|
+
/// 0 4 8 2 6 0 4 │ 0 - - - 4 - - - - 9
|
|
1540
|
+
/// 0 ───✓·% │[ 0 1 2 ✓] 4 % R R R R
|
|
1541
|
+
/// 1 ───────✓·% │ 0 1 2 3[ 4 5 6 ✓] 8 %
|
|
1542
|
+
/// 2 ───────────✓·% │ 10 ✓] 12 % 4 5 6 7[ 8 %
|
|
1543
|
+
/// 3 ───────────────✓·% │ 10 11[12 13 14 ✓] 16 % 8 9
|
|
1544
|
+
/// 4 ───────────────────✓·% │ 20 % 12 13 14 15[16 17 18 19]
|
|
1545
|
+
///
|
|
1546
|
+
/// Legend:
|
|
1547
|
+
///
|
|
1548
|
+
/// ─/✓ op on disk at checkpoint
|
|
1549
|
+
/// ·/% op in memory at checkpoint
|
|
1550
|
+
/// ✓ op_checkpoint
|
|
1551
|
+
/// % op_checkpoint's trigger
|
|
1552
|
+
/// R slot reserved in WAL
|
|
1553
|
+
/// [ ] range of ops from a checkpoint
|
|
1554
|
+
pub const Checkpoint = struct {
|
|
1555
|
+
comptime {
|
|
1556
|
+
assert(constants.journal_slot_count > constants.lsm_compaction_ops);
|
|
1557
|
+
assert(constants.journal_slot_count % constants.lsm_compaction_ops == 0);
|
|
1558
|
+
}
|
|
1559
|
+
|
|
1560
|
+
pub fn checkpoint_after(checkpoint: u64) u64 {
|
|
1561
|
+
assert(valid(checkpoint));
|
|
1562
|
+
|
|
1563
|
+
const result = op: {
|
|
1564
|
+
if (checkpoint == 0) {
|
|
1565
|
+
// First wrap: op_checkpoint_next = 6-1 = 5
|
|
1566
|
+
// -1: vsr_checkpoint_ops is a count, result is an inclusive index.
|
|
1567
|
+
break :op constants.vsr_checkpoint_ops - 1;
|
|
1568
|
+
} else {
|
|
1569
|
+
// Second wrap: op_checkpoint_next = 5+6 = 11
|
|
1570
|
+
// Third wrap: op_checkpoint_next = 11+6 = 17
|
|
1571
|
+
break :op checkpoint + constants.vsr_checkpoint_ops;
|
|
1572
|
+
}
|
|
1573
|
+
};
|
|
1574
|
+
|
|
1575
|
+
assert((result + 1) % constants.lsm_compaction_ops == 0);
|
|
1576
|
+
assert(valid(result));
|
|
1577
|
+
|
|
1578
|
+
return result;
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1581
|
+
pub fn trigger_for_checkpoint(checkpoint: u64) ?u64 {
|
|
1582
|
+
assert(valid(checkpoint));
|
|
1583
|
+
|
|
1584
|
+
if (checkpoint == 0) {
|
|
1585
|
+
return null;
|
|
1586
|
+
} else {
|
|
1587
|
+
return checkpoint + constants.lsm_compaction_ops;
|
|
1588
|
+
}
|
|
1589
|
+
}
|
|
1590
|
+
|
|
1591
|
+
pub fn prepare_max_for_checkpoint(checkpoint: u64) ?u64 {
|
|
1592
|
+
assert(valid(checkpoint));
|
|
1593
|
+
|
|
1594
|
+
if (trigger_for_checkpoint(checkpoint)) |trigger| {
|
|
1595
|
+
return trigger + (2 * constants.pipeline_prepare_queue_max);
|
|
1596
|
+
} else {
|
|
1597
|
+
return null;
|
|
1598
|
+
}
|
|
1599
|
+
}
|
|
1600
|
+
|
|
1601
|
+
pub fn durable(checkpoint: u64, commit: u64) bool {
|
|
1602
|
+
assert(valid(checkpoint));
|
|
1603
|
+
|
|
1604
|
+
if (trigger_for_checkpoint(checkpoint)) |trigger| {
|
|
1605
|
+
return commit > (trigger + constants.pipeline_prepare_queue_max);
|
|
1606
|
+
} else {
|
|
1607
|
+
return true;
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
pub fn valid(op: u64) bool {
|
|
1612
|
+
// Divide by `lsm_compaction_ops` instead of `vsr_checkpoint_ops`:
|
|
1613
|
+
// although today in practice checkpoints are evenly spaced, the LSM layer doesn't assume
|
|
1614
|
+
// that. LSM allows any bar boundary to become a checkpoint which happens, e.g., in the tree
|
|
1615
|
+
// fuzzer.
|
|
1616
|
+
return op == 0 or (op + 1) % constants.lsm_compaction_ops == 0;
|
|
1617
|
+
}
|
|
1618
|
+
};
|
|
1619
|
+
|
|
1620
|
+
test "Checkpoint ops diagram" {
|
|
1621
|
+
const Snap = stdx.Snap;
|
|
1622
|
+
const snap = Snap.snap_fn("src");
|
|
1623
|
+
|
|
1624
|
+
var string = std.ArrayList(u8).init(std.testing.allocator);
|
|
1625
|
+
defer string.deinit();
|
|
1626
|
+
|
|
1627
|
+
var string2 = std.ArrayList(u8).init(std.testing.allocator);
|
|
1628
|
+
defer string2.deinit();
|
|
1629
|
+
|
|
1630
|
+
try string.writer().print(
|
|
1631
|
+
\\journal_slot_count={[journal_slot_count]}
|
|
1632
|
+
\\lsm_compaction_ops={[lsm_compaction_ops]}
|
|
1633
|
+
\\pipeline_prepare_queue_max={[pipeline_prepare_queue_max]}
|
|
1634
|
+
\\vsr_checkpoint_ops={[vsr_checkpoint_ops]}
|
|
1635
|
+
\\
|
|
1636
|
+
\\
|
|
1637
|
+
, .{
|
|
1638
|
+
.journal_slot_count = constants.journal_slot_count,
|
|
1639
|
+
.lsm_compaction_ops = constants.lsm_compaction_ops,
|
|
1640
|
+
.pipeline_prepare_queue_max = constants.pipeline_prepare_queue_max,
|
|
1641
|
+
.vsr_checkpoint_ops = constants.vsr_checkpoint_ops,
|
|
1642
|
+
});
|
|
1643
|
+
|
|
1644
|
+
var checkpoint_prev: u64 = 0;
|
|
1645
|
+
var checkpoint_next: u64 = 0;
|
|
1646
|
+
var checkpoint_count: u32 = 0;
|
|
1647
|
+
for (0..constants.journal_slot_count * 10) |op| {
|
|
1648
|
+
const last_beat = (op + 1) % constants.lsm_compaction_ops == 0;
|
|
1649
|
+
const last_slot = (op + 1) % constants.journal_slot_count == 0;
|
|
1650
|
+
|
|
1651
|
+
const op_type: enum {
|
|
1652
|
+
normal,
|
|
1653
|
+
checkpoint,
|
|
1654
|
+
checkpoint_trigger,
|
|
1655
|
+
checkpoint_prepare_max,
|
|
1656
|
+
} = op_type: {
|
|
1657
|
+
if (op == checkpoint_next) break :op_type .checkpoint;
|
|
1658
|
+
if (checkpoint_prev != 0) {
|
|
1659
|
+
if (op == Checkpoint.trigger_for_checkpoint(checkpoint_prev).?) {
|
|
1660
|
+
break :op_type .checkpoint_trigger;
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
if (op == Checkpoint.prepare_max_for_checkpoint(checkpoint_prev).?) {
|
|
1664
|
+
break :op_type .checkpoint_prepare_max;
|
|
1665
|
+
}
|
|
1666
|
+
}
|
|
1667
|
+
break :op_type .normal;
|
|
1668
|
+
};
|
|
1669
|
+
|
|
1670
|
+
// Marker for tidy.zig to ignore the long lines.
|
|
1671
|
+
if (op % constants.journal_slot_count == 0) try string.appendSlice("OPS: ");
|
|
1672
|
+
|
|
1673
|
+
try string.writer().print("{s}{:_>3}{s}", .{
|
|
1674
|
+
switch (op_type) {
|
|
1675
|
+
.normal => " ",
|
|
1676
|
+
.checkpoint => if (checkpoint_count % 2 == 0) "[" else "{",
|
|
1677
|
+
.checkpoint_trigger => "<",
|
|
1678
|
+
.checkpoint_prepare_max => " ",
|
|
1679
|
+
},
|
|
1680
|
+
op,
|
|
1681
|
+
switch (op_type) {
|
|
1682
|
+
.normal => if (last_slot) "" else " ",
|
|
1683
|
+
.checkpoint => if (last_slot) "" else " ",
|
|
1684
|
+
.checkpoint_trigger => ">",
|
|
1685
|
+
.checkpoint_prepare_max => if (checkpoint_count % 2 == 0) "]" else "}",
|
|
1686
|
+
},
|
|
1687
|
+
});
|
|
1688
|
+
|
|
1689
|
+
if (last_slot) try string.append('\n');
|
|
1690
|
+
if (!last_slot and last_beat) try string.append(' ');
|
|
1691
|
+
|
|
1692
|
+
if (op_type == .checkpoint) {
|
|
1693
|
+
checkpoint_prev = checkpoint_next;
|
|
1694
|
+
checkpoint_next = Checkpoint.checkpoint_after(checkpoint_prev);
|
|
1695
|
+
}
|
|
1696
|
+
checkpoint_count += @intFromBool(op == checkpoint_prev);
|
|
1697
|
+
}
|
|
1698
|
+
|
|
1699
|
+
try snap(@src(),
|
|
1700
|
+
\\journal_slot_count=32
|
|
1701
|
+
\\lsm_compaction_ops=4
|
|
1702
|
+
\\pipeline_prepare_queue_max=4
|
|
1703
|
+
\\vsr_checkpoint_ops=20
|
|
1704
|
+
\\
|
|
1705
|
+
\\OPS: [__0 __1 __2 __3 __4 __5 __6 __7 __8 __9 _10 _11 _12 _13 _14 _15 _16 _17 _18 {_19 _20 _21 _22 <_23> _24 _25 _26 _27 _28 _29 _30 _31]
|
|
1706
|
+
\\OPS: _32 _33 _34 _35 _36 _37 _38 [_39 _40 _41 _42 <_43> _44 _45 _46 _47 _48 _49 _50 _51} _52 _53 _54 _55 _56 _57 _58 {_59 _60 _61 _62 <_63>
|
|
1707
|
+
\\OPS: _64 _65 _66 _67 _68 _69 _70 _71] _72 _73 _74 _75 _76 _77 _78 [_79 _80 _81 _82 <_83> _84 _85 _86 _87 _88 _89 _90 _91} _92 _93 _94 _95
|
|
1708
|
+
\\OPS: _96 _97 _98 {_99 100 101 102 <103> 104 105 106 107 108 109 110 111] 112 113 114 115 116 117 118 [119 120 121 122 <123> 124 125 126 127
|
|
1709
|
+
\\OPS: 128 129 130 131} 132 133 134 135 136 137 138 {139 140 141 142 <143> 144 145 146 147 148 149 150 151] 152 153 154 155 156 157 158 [159
|
|
1710
|
+
\\OPS: 160 161 162 <163> 164 165 166 167 168 169 170 171} 172 173 174 175 176 177 178 {179 180 181 182 <183> 184 185 186 187 188 189 190 191]
|
|
1711
|
+
\\OPS: 192 193 194 195 196 197 198 [199 200 201 202 <203> 204 205 206 207 208 209 210 211} 212 213 214 215 216 217 218 {219 220 221 222 <223>
|
|
1712
|
+
\\OPS: 224 225 226 227 228 229 230 231] 232 233 234 235 236 237 238 [239 240 241 242 <243> 244 245 246 247 248 249 250 251} 252 253 254 255
|
|
1713
|
+
\\OPS: 256 257 258 {259 260 261 262 <263> 264 265 266 267 268 269 270 271] 272 273 274 275 276 277 278 [279 280 281 282 <283> 284 285 286 287
|
|
1714
|
+
\\OPS: 288 289 290 291} 292 293 294 295 296 297 298 {299 300 301 302 <303> 304 305 306 307 308 309 310 311] 312 313 314 315 316 317 318 [319
|
|
1715
|
+
\\
|
|
1716
|
+
).diff(string.items);
|
|
1717
|
+
}
|
|
1718
|
+
|
|
1719
|
+
pub const Snapshot = struct {
|
|
1720
|
+
/// A table with TableInfo.snapshot_min=S was written during some commit with op<S.
|
|
1721
|
+
/// A block with snapshot_min=S is definitely readable at op=S.
|
|
1722
|
+
pub fn readable_at_commit(op: u64) u64 {
|
|
1723
|
+
// TODO: This is going to become more complicated when snapshot numbers match the op
|
|
1724
|
+
// acquiring the snapshot.
|
|
1725
|
+
return op + 1;
|
|
1726
|
+
}
|
|
1727
|
+
};
|