tigerbeetle 0.0.36 → 0.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/tb_client/extconf.rb +13 -13
- data/ext/tb_client/tigerbeetle/LICENSE +177 -0
- data/ext/tb_client/tigerbeetle/build.zig +2327 -0
- data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
- data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
- data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1092 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
- data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
- data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
- data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
- data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
- data/ext/tb_client/tigerbeetle/src/direction.zig +120 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
- data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
- data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
- data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
- data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
- data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
- data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
- data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
- data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
- data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
- data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
- data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
- data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
- data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
- data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +359 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +962 -0
- data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
- data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
- data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +534 -0
- data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
- data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
- data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
- data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
- data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
- data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
- data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
- data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
- data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
- data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
- data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
- data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
- data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
- data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
- data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
- data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
- data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
- data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
- data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
- data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
- data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
- data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
- data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
- data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
- data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
- data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
- data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
- data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
- data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
- data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
- data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
- data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
- data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
- data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
- data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
- data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
- data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
- data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
- data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
- data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
- data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
- data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
- data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
- data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
- data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
- data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
- data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
- data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
- data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
- data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
- data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
- data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
- data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
- data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
- data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
- data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
- data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2928 -0
- data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
- data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
- data/lib/tb_client/shared_lib.rb +12 -5
- data/lib/tigerbeetle/platforms.rb +9 -0
- data/lib/tigerbeetle/version.rb +2 -2
- data/tigerbeetle.gemspec +22 -5
- metadata +242 -3
- data/ext/tb_client/pkg.tar.gz +0 -0
|
@@ -0,0 +1,2928 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
const maybe = stdx.maybe;
|
|
4
|
+
const log = std.log.scoped(.test_replica);
|
|
5
|
+
const expectEqual = std.testing.expectEqual;
|
|
6
|
+
const expect = std.testing.expect;
|
|
7
|
+
const allocator = std.testing.allocator;
|
|
8
|
+
|
|
9
|
+
const stdx = @import("stdx");
|
|
10
|
+
const constants = @import("../constants.zig");
|
|
11
|
+
const vsr = @import("../vsr.zig");
|
|
12
|
+
const fuzz = @import("../testing/fuzz.zig");
|
|
13
|
+
const Process = @import("../testing/cluster/message_bus.zig").Process;
|
|
14
|
+
const Message = @import("../message_pool.zig").MessagePool.Message;
|
|
15
|
+
const MessageBuffer = @import("../message_buffer.zig").MessageBuffer;
|
|
16
|
+
const marks = @import("../testing/marks.zig");
|
|
17
|
+
const StateMachineType = @import("../testing/state_machine.zig").StateMachineType;
|
|
18
|
+
const Cluster = @import("../testing/cluster.zig").ClusterType(StateMachineType);
|
|
19
|
+
const Release = @import("../testing/cluster.zig").Release;
|
|
20
|
+
const LinkFilter = @import("../testing/cluster/network.zig").LinkFilter;
|
|
21
|
+
const Network = @import("../testing/cluster/network.zig").Network;
|
|
22
|
+
const Ratio = stdx.PRNG.Ratio;
|
|
23
|
+
|
|
24
|
+
const slot_count = constants.journal_slot_count;
|
|
25
|
+
const checkpoint_1 = vsr.Checkpoint.checkpoint_after(0);
|
|
26
|
+
const checkpoint_2 = vsr.Checkpoint.checkpoint_after(checkpoint_1);
|
|
27
|
+
const checkpoint_3 = vsr.Checkpoint.checkpoint_after(checkpoint_2);
|
|
28
|
+
const checkpoint_1_trigger = vsr.Checkpoint.trigger_for_checkpoint(checkpoint_1).?;
|
|
29
|
+
const checkpoint_2_trigger = vsr.Checkpoint.trigger_for_checkpoint(checkpoint_2).?;
|
|
30
|
+
const checkpoint_3_trigger = vsr.Checkpoint.trigger_for_checkpoint(checkpoint_3).?;
|
|
31
|
+
const checkpoint_1_prepare_max = vsr.Checkpoint.prepare_max_for_checkpoint(checkpoint_1).?;
|
|
32
|
+
const checkpoint_2_prepare_max = vsr.Checkpoint.prepare_max_for_checkpoint(checkpoint_2).?;
|
|
33
|
+
// No test is using this yet:
|
|
34
|
+
// const checkpoint_3_prepare_max = vsr.Checkpoint.prepare_max_for_checkpoint(checkpoint_3).?;
|
|
35
|
+
const checkpoint_1_prepare_ok_max = checkpoint_1_trigger + constants.pipeline_prepare_queue_max;
|
|
36
|
+
const checkpoint_2_prepare_ok_max = checkpoint_2_trigger + constants.pipeline_prepare_queue_max;
|
|
37
|
+
|
|
38
|
+
const MiB = stdx.MiB;
|
|
39
|
+
|
|
40
|
+
const log_level: std.log.Level = .err;
|
|
41
|
+
|
|
42
|
+
const releases = [_]Release{
|
|
43
|
+
.{
|
|
44
|
+
.release = vsr.Release.from(.{ .major = 0, .minor = 0, .patch = 10 }),
|
|
45
|
+
.release_client_min = vsr.Release.from(.{ .major = 0, .minor = 0, .patch = 10 }),
|
|
46
|
+
},
|
|
47
|
+
.{
|
|
48
|
+
.release = vsr.Release.from(.{ .major = 0, .minor = 0, .patch = 20 }),
|
|
49
|
+
.release_client_min = vsr.Release.from(.{ .major = 0, .minor = 0, .patch = 10 }),
|
|
50
|
+
},
|
|
51
|
+
.{
|
|
52
|
+
.release = vsr.Release.from(.{ .major = 0, .minor = 0, .patch = 30 }),
|
|
53
|
+
.release_client_min = vsr.Release.from(.{ .major = 0, .minor = 0, .patch = 10 }),
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
// TODO Detect when cluster has stabilized and stop run() early, rather than just running for a
|
|
58
|
+
// fixed number of ticks.
|
|
59
|
+
|
|
60
|
+
comptime {
|
|
61
|
+
// The tests are written for these configuration values in particular.
|
|
62
|
+
assert(constants.journal_slot_count == 32);
|
|
63
|
+
assert(constants.lsm_compaction_ops == 4);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
test "Cluster: smoke" {
|
|
67
|
+
const t = try TestContext.init(.{ .replica_count = 1 });
|
|
68
|
+
defer t.deinit();
|
|
69
|
+
|
|
70
|
+
var c = t.clients(.{});
|
|
71
|
+
try c.request(checkpoint_2_trigger, checkpoint_2_trigger);
|
|
72
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_2_trigger);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
test "Cluster: recovery: WAL prepare corruption (R=3, corrupt right of head)" {
|
|
76
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
77
|
+
defer t.deinit();
|
|
78
|
+
|
|
79
|
+
var c = t.clients(.{});
|
|
80
|
+
t.replica(.R_).stop();
|
|
81
|
+
t.replica(.R0).corrupt(.{ .wal_prepare = 2 });
|
|
82
|
+
|
|
83
|
+
// 2/3 can't commit when 1/2 is status=recovering_head.
|
|
84
|
+
try t.replica(.R0).open();
|
|
85
|
+
try expectEqual(t.replica(.R0).status(), .recovering_head);
|
|
86
|
+
try t.replica(.R1).open();
|
|
87
|
+
try c.request(4, 0);
|
|
88
|
+
// With the aid of the last replica, the cluster can recover.
|
|
89
|
+
try t.replica(.R2).open();
|
|
90
|
+
try c.request(4, 4);
|
|
91
|
+
try expectEqual(t.replica(.R_).commit(), 4);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
test "Cluster: recovery: WAL prepare corruption (R=3, corrupt left of head, 3/3 corrupt)" {
|
|
95
|
+
// The replicas recognize that the corrupt entry is outside of the pipeline and
|
|
96
|
+
// must be committed.
|
|
97
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
98
|
+
defer t.deinit();
|
|
99
|
+
|
|
100
|
+
var c = t.clients(.{});
|
|
101
|
+
try c.request(2, 2);
|
|
102
|
+
t.replica(.R_).stop();
|
|
103
|
+
t.replica(.R_).corrupt(.{ .wal_prepare = 1 });
|
|
104
|
+
try t.replica(.R_).open();
|
|
105
|
+
t.run();
|
|
106
|
+
|
|
107
|
+
// The same prepare is lost by all WALs, so the cluster can never recover.
|
|
108
|
+
// Each replica stalls trying to repair the header break.
|
|
109
|
+
try expectEqual(t.replica(.R_).status(), .view_change);
|
|
110
|
+
try expectEqual(t.replica(.R_).commit(), 0);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
test "Cluster: recovery: WAL prepare corruption (R=3, corrupt root)" {
|
|
114
|
+
// A replica can recover from a corrupt root prepare.
|
|
115
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
116
|
+
defer t.deinit();
|
|
117
|
+
|
|
118
|
+
var c = t.clients(.{});
|
|
119
|
+
t.replica(.R0).stop();
|
|
120
|
+
t.replica(.R0).corrupt(.{ .wal_prepare = 0 });
|
|
121
|
+
try t.replica(.R0).open();
|
|
122
|
+
|
|
123
|
+
try c.request(1, 1);
|
|
124
|
+
try expectEqual(t.replica(.R_).commit(), 1);
|
|
125
|
+
|
|
126
|
+
const r0 = t.replica(.R0);
|
|
127
|
+
const r0_storage = &t.cluster.storages[r0.replicas.get(0)];
|
|
128
|
+
try expect(!r0_storage.area_faulty(.{ .wal_prepares = .{ .slot = 0 } }));
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
test "Cluster: recovery: WAL prepare corruption (R=3, corrupt checkpoint…head)" {
|
|
132
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
133
|
+
defer t.deinit();
|
|
134
|
+
|
|
135
|
+
var c = t.clients(.{});
|
|
136
|
+
// Trigger the first checkpoint.
|
|
137
|
+
try c.request(checkpoint_1_trigger, checkpoint_1_trigger);
|
|
138
|
+
t.replica(.R0).stop();
|
|
139
|
+
|
|
140
|
+
// Corrupt op_checkpoint (27) and all ops that follow.
|
|
141
|
+
var slot: usize = slot_count - constants.lsm_compaction_ops - 1;
|
|
142
|
+
while (slot < slot_count) : (slot += 1) {
|
|
143
|
+
t.replica(.R0).corrupt(.{ .wal_prepare = slot });
|
|
144
|
+
}
|
|
145
|
+
try t.replica(.R0).open();
|
|
146
|
+
try expectEqual(t.replica(.R0).status(), .recovering_head);
|
|
147
|
+
|
|
148
|
+
try c.request(slot_count, slot_count);
|
|
149
|
+
try expectEqual(t.replica(.R0).status(), .normal);
|
|
150
|
+
t.replica(.R1).stop();
|
|
151
|
+
try c.request(slot_count + 1, slot_count + 1);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
test "Cluster: recovery: WAL prepare corruption (R=1, corrupt between checkpoint and head)" {
|
|
155
|
+
// R=1 can never recover if a WAL-prepare is corrupt.
|
|
156
|
+
const t = try TestContext.init(.{ .replica_count = 1 });
|
|
157
|
+
defer t.deinit();
|
|
158
|
+
|
|
159
|
+
var c = t.clients(.{});
|
|
160
|
+
try c.request(2, 2);
|
|
161
|
+
t.replica(.R0).stop();
|
|
162
|
+
t.replica(.R0).corrupt(.{ .wal_prepare = 1 });
|
|
163
|
+
if (t.replica(.R0).open()) {
|
|
164
|
+
unreachable;
|
|
165
|
+
} else |err| switch (err) {
|
|
166
|
+
error.WALCorrupt => {},
|
|
167
|
+
else => unreachable,
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
test "Cluster: recovery: WAL header corruption (R=1)" {
|
|
172
|
+
// R=1 locally repairs WAL-header corruption.
|
|
173
|
+
const t = try TestContext.init(.{ .replica_count = 1 });
|
|
174
|
+
defer t.deinit();
|
|
175
|
+
|
|
176
|
+
var c = t.clients(.{});
|
|
177
|
+
try c.request(2, 2);
|
|
178
|
+
t.replica(.R0).stop();
|
|
179
|
+
t.replica(.R0).corrupt(.{ .wal_header = 1 });
|
|
180
|
+
try t.replica(.R0).open();
|
|
181
|
+
try c.request(3, 3);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
test "Cluster: recovery: WAL torn prepare, standby with intact prepare (R=1 S=1)" {
|
|
185
|
+
// R=1 recovers to find that its last prepare was a torn write, so it is truncated.
|
|
186
|
+
// The standby received the prepare, though.
|
|
187
|
+
//
|
|
188
|
+
// R=1 handles this by incrementing its view during recovery, so that the standby can truncate
|
|
189
|
+
// discard the truncated prepare.
|
|
190
|
+
const t = try TestContext.init(.{
|
|
191
|
+
.replica_count = 1,
|
|
192
|
+
.standby_count = 1,
|
|
193
|
+
});
|
|
194
|
+
defer t.deinit();
|
|
195
|
+
|
|
196
|
+
var c = t.clients(.{});
|
|
197
|
+
try c.request(2, 2);
|
|
198
|
+
t.replica(.R0).stop();
|
|
199
|
+
t.replica(.R0).corrupt(.{ .wal_header = 2 });
|
|
200
|
+
try t.replica(.R0).open();
|
|
201
|
+
try c.request(3, 3);
|
|
202
|
+
try expectEqual(t.replica(.R0).commit(), 3);
|
|
203
|
+
try expectEqual(t.replica(.S0).commit(), 3);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
test "Cluster: recovery: grid corruption (disjoint)" {
|
|
207
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
208
|
+
defer t.deinit();
|
|
209
|
+
|
|
210
|
+
var c = t.clients(.{});
|
|
211
|
+
|
|
212
|
+
// Checkpoint to ensure that the replicas will actually use the grid to recover.
|
|
213
|
+
// All replicas must be at the same commit to ensure grid repair won't fail and
|
|
214
|
+
// fall back to state sync.
|
|
215
|
+
try c.request(checkpoint_1_trigger, checkpoint_1_trigger);
|
|
216
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), checkpoint_1);
|
|
217
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1_trigger);
|
|
218
|
+
|
|
219
|
+
t.replica(.R_).stop();
|
|
220
|
+
|
|
221
|
+
// Corrupt the whole grid.
|
|
222
|
+
// Manifest blocks will be repaired as each replica opens its forest.
|
|
223
|
+
// Table index/filter/value blocks will be repaired as the replica commits/compacts.
|
|
224
|
+
for ([_]TestReplicas{
|
|
225
|
+
t.replica(.R0),
|
|
226
|
+
t.replica(.R1),
|
|
227
|
+
t.replica(.R2),
|
|
228
|
+
}, 0..) |replica, i| {
|
|
229
|
+
const address_max = t.block_address_max();
|
|
230
|
+
var address: u64 = 1 + i; // Addresses start at 1.
|
|
231
|
+
while (address <= address_max) : (address += 3) {
|
|
232
|
+
// Leave every third address un-corrupt.
|
|
233
|
+
// Each block exists intact on exactly one replica.
|
|
234
|
+
replica.corrupt(.{ .grid_block = address + 1 });
|
|
235
|
+
replica.corrupt(.{ .grid_block = address + 2 });
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
try t.replica(.R_).open();
|
|
240
|
+
t.run();
|
|
241
|
+
|
|
242
|
+
try expectEqual(t.replica(.R_).status(), .normal);
|
|
243
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1_trigger);
|
|
244
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), checkpoint_1);
|
|
245
|
+
|
|
246
|
+
try c.request(checkpoint_2_trigger, checkpoint_2_trigger);
|
|
247
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), checkpoint_2);
|
|
248
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_2_trigger);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
test "Cluster: recovery: recovering_head, outdated start view" {
|
|
252
|
+
// 1. Wait for B1 to ok op=3.
|
|
253
|
+
// 2. Restart B1 while corrupting op=3, so that it gets into a .recovering_head with op=2.
|
|
254
|
+
// 3. Try make B1 forget about op=3 by delivering it an outdated .start_view with op=2.
|
|
255
|
+
const t = try TestContext.init(.{
|
|
256
|
+
.replica_count = 3,
|
|
257
|
+
});
|
|
258
|
+
defer t.deinit();
|
|
259
|
+
|
|
260
|
+
var c = t.clients(.{});
|
|
261
|
+
var a = t.replica(.A0);
|
|
262
|
+
var b1 = t.replica(.B1);
|
|
263
|
+
var b2 = t.replica(.B2);
|
|
264
|
+
|
|
265
|
+
try c.request(2, 2);
|
|
266
|
+
|
|
267
|
+
b1.stop();
|
|
268
|
+
b1.corrupt(.{ .wal_prepare = 2 });
|
|
269
|
+
|
|
270
|
+
try b1.open();
|
|
271
|
+
try expectEqual(b1.status(), .recovering_head);
|
|
272
|
+
try expectEqual(b1.op_head(), 1);
|
|
273
|
+
|
|
274
|
+
b1.record(.A0, .incoming, .start_view);
|
|
275
|
+
t.run();
|
|
276
|
+
try expectEqual(b1.status(), .normal);
|
|
277
|
+
try expectEqual(b1.op_head(), 2);
|
|
278
|
+
|
|
279
|
+
b2.drop_all(.R_, .bidirectional);
|
|
280
|
+
|
|
281
|
+
try c.request(3, 3);
|
|
282
|
+
|
|
283
|
+
b1.stop();
|
|
284
|
+
b1.corrupt(.{ .wal_prepare = 3 });
|
|
285
|
+
|
|
286
|
+
try b1.open();
|
|
287
|
+
try expectEqual(b1.status(), .recovering_head);
|
|
288
|
+
try expectEqual(b1.op_head(), 2);
|
|
289
|
+
|
|
290
|
+
const mark = marks.check("ignoring (recovering_head, nonce mismatch)");
|
|
291
|
+
a.stop();
|
|
292
|
+
b1.replay_recorded();
|
|
293
|
+
t.run();
|
|
294
|
+
|
|
295
|
+
try expectEqual(b1.status(), .recovering_head);
|
|
296
|
+
try expectEqual(b1.op_head(), 2);
|
|
297
|
+
|
|
298
|
+
// Should B1 erroneously accept op=2 as head, unpartitioning B2 here would lead to a data loss.
|
|
299
|
+
b2.pass_all(.R_, .bidirectional);
|
|
300
|
+
t.run();
|
|
301
|
+
try a.open();
|
|
302
|
+
try c.request(4, 4);
|
|
303
|
+
try mark.expect_hit();
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
test "Cluster: recovery: recovering head: idle cluster" {
|
|
307
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
308
|
+
defer t.deinit();
|
|
309
|
+
|
|
310
|
+
var c = t.clients(.{});
|
|
311
|
+
var b = t.replica(.B1);
|
|
312
|
+
|
|
313
|
+
try c.request(2, 2);
|
|
314
|
+
|
|
315
|
+
b.stop();
|
|
316
|
+
b.corrupt(.{ .wal_prepare = 3 });
|
|
317
|
+
b.corrupt(.{ .wal_header = 3 });
|
|
318
|
+
|
|
319
|
+
try b.open();
|
|
320
|
+
try expectEqual(b.status(), .recovering_head);
|
|
321
|
+
try expectEqual(b.op_head(), 2);
|
|
322
|
+
|
|
323
|
+
t.run();
|
|
324
|
+
|
|
325
|
+
try expectEqual(b.status(), .normal);
|
|
326
|
+
try expectEqual(b.op_head(), 2);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
test "Cluster: recovery: reformat unrecoverable replica" {
|
|
330
|
+
for ([_]u64{
|
|
331
|
+
// The cluster is still within the first checkpoint.
|
|
332
|
+
// The recovering replica just needs to load a SV and then it can repair.
|
|
333
|
+
5,
|
|
334
|
+
// The cluster is ahead of the initial checkpoint.
|
|
335
|
+
// The recovering replica needs to state sync via SV.
|
|
336
|
+
checkpoint_2,
|
|
337
|
+
}) |op_max| {
|
|
338
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
339
|
+
defer t.deinit();
|
|
340
|
+
|
|
341
|
+
var c = t.clients(.{});
|
|
342
|
+
var b = t.replica(.B1);
|
|
343
|
+
|
|
344
|
+
try c.request(op_max, op_max);
|
|
345
|
+
|
|
346
|
+
b.stop();
|
|
347
|
+
try b.open_reformat();
|
|
348
|
+
t.run();
|
|
349
|
+
try expectEqual(b.health(), .up);
|
|
350
|
+
|
|
351
|
+
try expectEqual(b.status(), .normal);
|
|
352
|
+
// +pipeline since the reformatted replica pulses noop requests.
|
|
353
|
+
try expectEqual(b.op_head(), op_max + constants.pipeline_prepare_queue_max);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
test "Cluster: recovery: reformat unrecoverable replica: too many faults" {
|
|
358
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
359
|
+
defer t.deinit();
|
|
360
|
+
|
|
361
|
+
var c = t.clients(.{});
|
|
362
|
+
var a0 = t.replica(.A0);
|
|
363
|
+
var b1 = t.replica(.B1);
|
|
364
|
+
var b2 = t.replica(.B2);
|
|
365
|
+
|
|
366
|
+
try c.request(3, 3);
|
|
367
|
+
|
|
368
|
+
b1.stop();
|
|
369
|
+
b2.stop();
|
|
370
|
+
|
|
371
|
+
// Restart A0 to force it out of normal mode.
|
|
372
|
+
// Otherwise it would just share a SV, repairing the recovering replicas.
|
|
373
|
+
a0.stop();
|
|
374
|
+
try a0.open();
|
|
375
|
+
|
|
376
|
+
try b1.open_reformat();
|
|
377
|
+
t.run();
|
|
378
|
+
try expectEqual(b1.health(), .reformatting);
|
|
379
|
+
|
|
380
|
+
try b2.open_reformat();
|
|
381
|
+
t.run();
|
|
382
|
+
try expectEqual(b1.health(), .reformatting);
|
|
383
|
+
|
|
384
|
+
t.run();
|
|
385
|
+
|
|
386
|
+
// There were too many faults, so the cluster (safely) remains unavailable.
|
|
387
|
+
try expectEqual(b1.health(), .reformatting);
|
|
388
|
+
try expectEqual(b1.health(), .reformatting);
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
test "Cluster: network: partition 2-1 (isolate backup, symmetric)" {
|
|
392
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
393
|
+
defer t.deinit();
|
|
394
|
+
|
|
395
|
+
var c = t.clients(.{});
|
|
396
|
+
try c.request(2, 2);
|
|
397
|
+
t.replica(.B2).drop_all(.__, .bidirectional);
|
|
398
|
+
try c.request(3, 3);
|
|
399
|
+
try expectEqual(t.replica(.A0).commit(), 3);
|
|
400
|
+
try expectEqual(t.replica(.B1).commit(), 3);
|
|
401
|
+
try expectEqual(t.replica(.B2).commit(), 2);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
test "Cluster: network: partition 2-1 (isolate backup, asymmetric, send-only)" {
|
|
405
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
406
|
+
defer t.deinit();
|
|
407
|
+
|
|
408
|
+
var c = t.clients(.{});
|
|
409
|
+
try c.request(2, 2);
|
|
410
|
+
t.replica(.B2).drop_all(.__, .incoming);
|
|
411
|
+
try c.request(3, 3);
|
|
412
|
+
try expectEqual(t.replica(.A0).commit(), 3);
|
|
413
|
+
try expectEqual(t.replica(.B1).commit(), 3);
|
|
414
|
+
try expectEqual(t.replica(.B2).commit(), 2);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
test "Cluster: network: partition 2-1 (isolate backup, asymmetric, receive-only)" {
|
|
418
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
419
|
+
defer t.deinit();
|
|
420
|
+
|
|
421
|
+
var c = t.clients(.{});
|
|
422
|
+
try c.request(2, 2);
|
|
423
|
+
t.replica(.B2).drop_all(.__, .outgoing);
|
|
424
|
+
try c.request(3, 3);
|
|
425
|
+
try expectEqual(t.replica(.A0).commit(), 3);
|
|
426
|
+
try expectEqual(t.replica(.B1).commit(), 3);
|
|
427
|
+
// B2 may commit some ops, but at some point is will likely fall behind.
|
|
428
|
+
// Prepares may be reordered by the network, and if B1 receives X+1 then X,
|
|
429
|
+
// it will not forward X on, as it is a "repair".
|
|
430
|
+
// And B2 is partitioned, so it cannot repair its hash chain.
|
|
431
|
+
try expect(t.replica(.B2).commit() >= 2);
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
test "Cluster: network: partition 1-2 (isolate primary, symmetric)" {
|
|
435
|
+
// The primary cannot communicate with either backup, but the backups can communicate with one
|
|
436
|
+
// another. The backups will perform a view-change since they don't receive heartbeats.
|
|
437
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
438
|
+
defer t.deinit();
|
|
439
|
+
|
|
440
|
+
var c = t.clients(.{});
|
|
441
|
+
try c.request(2, 2);
|
|
442
|
+
|
|
443
|
+
const p = t.replica(.A0);
|
|
444
|
+
p.drop_all(.B1, .bidirectional);
|
|
445
|
+
p.drop_all(.B2, .bidirectional);
|
|
446
|
+
try c.request(3, 3);
|
|
447
|
+
try expectEqual(p.commit(), 2);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
test "Cluster: network: partition 1-2 (isolate primary, asymmetric, send-only)" {
|
|
451
|
+
// The primary can send to the backups, but not receive.
|
|
452
|
+
// After a short interval of not receiving messages (specifically prepare_ok's) it will abdicate
|
|
453
|
+
// by pausing heartbeats, allowing the next replica to take over as primary.
|
|
454
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
455
|
+
defer t.deinit();
|
|
456
|
+
|
|
457
|
+
var c = t.clients(.{});
|
|
458
|
+
try c.request(1, 1);
|
|
459
|
+
t.replica(.A0).drop_all(.B1, .incoming);
|
|
460
|
+
t.replica(.A0).drop_all(.B2, .incoming);
|
|
461
|
+
const mark = marks.check("send_commit: primary abdicating");
|
|
462
|
+
try c.request(2, 2);
|
|
463
|
+
try mark.expect_hit();
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
test "Cluster: network: partition 1-2 (isolate primary, asymmetric, receive-only)" {
|
|
467
|
+
// The primary can receive from the backups, but not send to them.
|
|
468
|
+
// The backups will perform a view-change since they don't receive heartbeats.
|
|
469
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
470
|
+
defer t.deinit();
|
|
471
|
+
|
|
472
|
+
var c = t.clients(.{});
|
|
473
|
+
try c.request(1, 1);
|
|
474
|
+
t.replica(.A0).drop_all(.B1, .outgoing);
|
|
475
|
+
t.replica(.A0).drop_all(.B2, .outgoing);
|
|
476
|
+
try c.request(2, 2);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
test "Cluster: network: partition primary-all (isolate primary, asymmetric, send-only)" {
|
|
480
|
+
// The primary can send to the backups and clients, but not receive.
|
|
481
|
+
// Since primary can't see requests, it doesn't know that it needs to abdicate.
|
|
482
|
+
// The rest of the cluster needs to view-change anyway.
|
|
483
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
484
|
+
defer t.deinit();
|
|
485
|
+
|
|
486
|
+
var c = t.clients(.{});
|
|
487
|
+
try c.request(1, 1);
|
|
488
|
+
t.replica(.A0).drop(.__, .incoming, .request);
|
|
489
|
+
// Since the primary doesn't receive requests, it can't ever abdicate.
|
|
490
|
+
const mark = marks.check("send_commit: primary abdicating");
|
|
491
|
+
// TODO:
|
|
492
|
+
// try c.request(2, 2);
|
|
493
|
+
try c.request(2, 1);
|
|
494
|
+
try mark.expect_not_hit();
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
test "Cluster: network: partition client-primary (symmetric)" {
|
|
498
|
+
// Clients cannot communicate with the primary, but they still request/reply via a backup.
|
|
499
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
500
|
+
defer t.deinit();
|
|
501
|
+
|
|
502
|
+
var c = t.clients(.{});
|
|
503
|
+
|
|
504
|
+
t.replica(.A0).drop_all(.C_, .bidirectional);
|
|
505
|
+
try c.request(1, 1);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
test "Cluster: network: partition client-primary (asymmetric, drop requests)" {
|
|
509
|
+
// Primary cannot receive messages from the clients.
|
|
510
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
511
|
+
defer t.deinit();
|
|
512
|
+
|
|
513
|
+
var c = t.clients(.{});
|
|
514
|
+
|
|
515
|
+
t.replica(.A0).drop_all(.C_, .incoming);
|
|
516
|
+
try c.request(1, 1);
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
test "Cluster: network: partition client-primary (asymmetric, drop replies)" {
|
|
520
|
+
// Clients cannot receive replies from the primary, but they receive replies from a backup.
|
|
521
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
522
|
+
defer t.deinit();
|
|
523
|
+
|
|
524
|
+
var c = t.clients(.{});
|
|
525
|
+
|
|
526
|
+
t.replica(.A0).drop_all(.C_, .outgoing);
|
|
527
|
+
try c.request(1, 1);
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
test "Cluster: network: partition flexible quorum" {
|
|
531
|
+
// Two out of four replicas should be able to carry on as long the pair includes the primary.
|
|
532
|
+
const t = try TestContext.init(.{ .replica_count = 4 });
|
|
533
|
+
defer t.deinit();
|
|
534
|
+
|
|
535
|
+
var c = t.clients(.{});
|
|
536
|
+
|
|
537
|
+
t.run();
|
|
538
|
+
t.replica(.B2).stop();
|
|
539
|
+
t.replica(.B3).stop();
|
|
540
|
+
for (0..3) |_| t.run(); // Give enough time for the clocks to desync.
|
|
541
|
+
|
|
542
|
+
try c.request(4, 4);
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
test "Cluster: network: primary no clock sync" {
|
|
546
|
+
// When primary can't accept requests because the clock is not synchronized, it must proactively
|
|
547
|
+
// abdicate (the rest of the cluster doesn't know that there are dropped requests).
|
|
548
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
549
|
+
defer t.deinit();
|
|
550
|
+
|
|
551
|
+
var c = t.clients(.{});
|
|
552
|
+
try c.request(3, 3);
|
|
553
|
+
const a0 = t.replica(.A0);
|
|
554
|
+
try expectEqual(a0.role(), .primary);
|
|
555
|
+
try expectEqual(a0.commit(), 3);
|
|
556
|
+
|
|
557
|
+
a0.drop(.R_, .incoming, .pong);
|
|
558
|
+
for (0..3) |_| t.run(); // Give enough time for the clocks to desync.
|
|
559
|
+
|
|
560
|
+
try expectEqual(a0.role(), .primary);
|
|
561
|
+
const mark = marks.check("send_commit: primary abdicating");
|
|
562
|
+
try c.request(5, 5);
|
|
563
|
+
try mark.expect_hit();
|
|
564
|
+
try expectEqual(a0.role(), .backup);
|
|
565
|
+
try expectEqual(t.replica(.R_).commit(), 5);
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
test "Cluster: repair: partition 2-1, then backup fast-forward 1 checkpoint" {
|
|
569
|
+
// A backup that has fallen behind by two checkpoints can catch up, without using state sync.
|
|
570
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
571
|
+
defer t.deinit();
|
|
572
|
+
|
|
573
|
+
var c = t.clients(.{});
|
|
574
|
+
try c.request(3, 3);
|
|
575
|
+
try expectEqual(t.replica(.R_).commit(), 3);
|
|
576
|
+
|
|
577
|
+
var r_lag = t.replica(.B2);
|
|
578
|
+
r_lag.stop();
|
|
579
|
+
|
|
580
|
+
// Commit enough ops to checkpoint once, and then nearly wrap around, leaving enough slack
|
|
581
|
+
// that the lagging backup can repair (without state sync).
|
|
582
|
+
const commit = 3 + slot_count - constants.pipeline_prepare_queue_max;
|
|
583
|
+
try c.request(commit, commit);
|
|
584
|
+
try expectEqual(t.replica(.A0).op_checkpoint(), checkpoint_1);
|
|
585
|
+
try expectEqual(t.replica(.B1).op_checkpoint(), checkpoint_1);
|
|
586
|
+
|
|
587
|
+
try r_lag.open();
|
|
588
|
+
try expectEqual(r_lag.status(), .normal);
|
|
589
|
+
try expectEqual(r_lag.op_checkpoint(), 0);
|
|
590
|
+
|
|
591
|
+
// Allow repair, but check that state sync doesn't run.
|
|
592
|
+
const mark = marks.check("sync started");
|
|
593
|
+
t.run();
|
|
594
|
+
try mark.expect_not_hit();
|
|
595
|
+
|
|
596
|
+
try expectEqual(t.replica(.R_).status(), .normal);
|
|
597
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), checkpoint_1);
|
|
598
|
+
try expectEqual(t.replica(.R_).commit(), commit);
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
test "Cluster: repair: view-change, new-primary lagging behind checkpoint, forfeit" {
|
|
602
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
603
|
+
defer t.deinit();
|
|
604
|
+
|
|
605
|
+
var c = t.clients(.{});
|
|
606
|
+
try c.request(2, 2);
|
|
607
|
+
try expectEqual(t.replica(.R_).commit(), 2);
|
|
608
|
+
|
|
609
|
+
var a0 = t.replica(.A0);
|
|
610
|
+
var b1 = t.replica(.B1);
|
|
611
|
+
var b2 = t.replica(.B2);
|
|
612
|
+
|
|
613
|
+
b1.drop_all(.__, .bidirectional);
|
|
614
|
+
|
|
615
|
+
try c.request(checkpoint_1_prepare_max + 1, checkpoint_1_prepare_max + 1);
|
|
616
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_1);
|
|
617
|
+
try expectEqual(b1.op_checkpoint(), 0);
|
|
618
|
+
try expectEqual(b2.op_checkpoint(), checkpoint_1);
|
|
619
|
+
try expectEqual(a0.commit(), checkpoint_1_prepare_max + 1);
|
|
620
|
+
try expectEqual(b1.commit(), 2);
|
|
621
|
+
try expectEqual(b2.commit(), checkpoint_1_prepare_max + 1);
|
|
622
|
+
try expectEqual(a0.op_head(), checkpoint_1_prepare_max + 1);
|
|
623
|
+
try expectEqual(b1.op_head(), 2);
|
|
624
|
+
try expectEqual(b2.op_head(), checkpoint_1_prepare_max + 1);
|
|
625
|
+
|
|
626
|
+
// Partition the primary, but restore B1. B1 will attempt to become the primary next,
|
|
627
|
+
// but it is too far behind, so B2 becomes the new primary instead.
|
|
628
|
+
b2.pass_all(.__, .bidirectional);
|
|
629
|
+
b1.pass_all(.__, .bidirectional);
|
|
630
|
+
a0.drop_all(.__, .bidirectional);
|
|
631
|
+
// TODO: make sure that B1 uses WAL repair rather than state sync here.
|
|
632
|
+
const mark = marks.check("on_do_view_change: lagging primary; forfeiting");
|
|
633
|
+
t.run();
|
|
634
|
+
try mark.expect_hit();
|
|
635
|
+
|
|
636
|
+
try expectEqual(b2.role(), .primary);
|
|
637
|
+
try expectEqual(b2.index(), t.replica(.A0).index());
|
|
638
|
+
try expectEqual(b2.view(), b1.view());
|
|
639
|
+
try expectEqual(b2.log_view(), b1.log_view());
|
|
640
|
+
|
|
641
|
+
// Thanks to the new primary, the lagging backup is able to catch up to the latest
|
|
642
|
+
// checkpoint/commit.
|
|
643
|
+
try expectEqual(b1.role(), .backup);
|
|
644
|
+
try expectEqual(b1.commit(), checkpoint_1_prepare_max + 1);
|
|
645
|
+
try expectEqual(b1.op_checkpoint(), checkpoint_1);
|
|
646
|
+
|
|
647
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1_prepare_max + 1);
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
test "Cluster: repair: crash, corrupt committed pipeline op, repair it, view-change; dont nack" {
|
|
651
|
+
// This scenario is also applicable when any op within the pipeline suffix is corrupted.
|
|
652
|
+
// But we test by corrupting the last op to take advantage of recovering_head to learn the last
|
|
653
|
+
// op's header without its prepare.
|
|
654
|
+
//
|
|
655
|
+
// Also, a corrupt last op maximizes uncertainty — there are no higher ops which
|
|
656
|
+
// can definitively show that the last op is committed (via `header.commit`).
|
|
657
|
+
const t = try TestContext.init(.{
|
|
658
|
+
.replica_count = 3,
|
|
659
|
+
.client_count = constants.pipeline_prepare_queue_max,
|
|
660
|
+
});
|
|
661
|
+
defer t.deinit();
|
|
662
|
+
|
|
663
|
+
var c = t.clients(.{});
|
|
664
|
+
try c.request(2, 2);
|
|
665
|
+
|
|
666
|
+
var a0 = t.replica(.A0);
|
|
667
|
+
var b1 = t.replica(.B1);
|
|
668
|
+
var b2 = t.replica(.B2);
|
|
669
|
+
|
|
670
|
+
b2.drop_all(.R_, .bidirectional);
|
|
671
|
+
|
|
672
|
+
try c.request(4, 4);
|
|
673
|
+
|
|
674
|
+
b1.stop();
|
|
675
|
+
b1.corrupt(.{ .wal_prepare = 4 });
|
|
676
|
+
|
|
677
|
+
// We can't learn op=4's prepare, only its header (via start_view).
|
|
678
|
+
b1.drop(.R_, .bidirectional, .prepare);
|
|
679
|
+
try b1.open();
|
|
680
|
+
try expectEqual(b1.status(), .recovering_head);
|
|
681
|
+
t.run();
|
|
682
|
+
|
|
683
|
+
b1.pass_all(.R_, .bidirectional);
|
|
684
|
+
b2.pass_all(.R_, .bidirectional);
|
|
685
|
+
a0.stop();
|
|
686
|
+
a0.drop_all(.R_, .outgoing);
|
|
687
|
+
t.run();
|
|
688
|
+
|
|
689
|
+
// The cluster is stuck trying to repair op=4 (requesting the prepare).
|
|
690
|
+
// B2 can nack op=4, but B1 *must not*.
|
|
691
|
+
try expectEqual(b1.status(), .view_change);
|
|
692
|
+
try expectEqual(b1.commit(), 3);
|
|
693
|
+
try expectEqual(b1.op_head(), 4);
|
|
694
|
+
|
|
695
|
+
// A0 provides prepare=4.
|
|
696
|
+
a0.pass_all(.R_, .outgoing);
|
|
697
|
+
try a0.open();
|
|
698
|
+
t.run();
|
|
699
|
+
try expectEqual(t.replica(.R_).status(), .normal);
|
|
700
|
+
try expectEqual(t.replica(.R_).commit(), 4);
|
|
701
|
+
try expectEqual(t.replica(.R_).op_head(), 4);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
test "Cluster: repair: corrupt reply" {
|
|
705
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
706
|
+
defer t.deinit();
|
|
707
|
+
|
|
708
|
+
var c = t.clients(.{});
|
|
709
|
+
try c.request(2, 2);
|
|
710
|
+
try expectEqual(t.replica(.R_).commit(), 2);
|
|
711
|
+
|
|
712
|
+
// Prevent any view changes, to ensure A0 repairs its corrupt prepare.
|
|
713
|
+
t.replica(.R_).drop(.R_, .bidirectional, .do_view_change);
|
|
714
|
+
|
|
715
|
+
// Block the client from seeing the reply from the cluster.
|
|
716
|
+
t.replica(.R_).drop(.C_, .outgoing, .reply);
|
|
717
|
+
try c.request(3, 2);
|
|
718
|
+
|
|
719
|
+
// Corrupt all of the primary's saved replies.
|
|
720
|
+
// (This is easier than figuring out the reply's actual slot.)
|
|
721
|
+
var slot: usize = 0;
|
|
722
|
+
while (slot < constants.clients_max) : (slot += 1) {
|
|
723
|
+
t.replica(.A0).corrupt(.{ .client_reply = slot });
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
// The client will keep retrying request 3 until it receives a reply.
|
|
727
|
+
// The primary requests the reply from one of its backups.
|
|
728
|
+
// (Pass A0 only to ensure that no other client forwards the reply.)
|
|
729
|
+
t.replica(.A0).pass(.C_, .outgoing, .reply);
|
|
730
|
+
t.run();
|
|
731
|
+
|
|
732
|
+
try expectEqual(c.replies(), 3);
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
test "Cluster: repair: ack committed prepare" {
|
|
736
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
737
|
+
defer t.deinit();
|
|
738
|
+
|
|
739
|
+
var c = t.clients(.{});
|
|
740
|
+
try c.request(2, 2);
|
|
741
|
+
try expectEqual(t.replica(.R_).commit(), 2);
|
|
742
|
+
|
|
743
|
+
const p = t.replica(.A0);
|
|
744
|
+
const b1 = t.replica(.B1);
|
|
745
|
+
const b2 = t.replica(.B2);
|
|
746
|
+
|
|
747
|
+
// A0 commits 3.
|
|
748
|
+
// B1 prepares 3, but does not commit.
|
|
749
|
+
t.replica(.R_).drop(.R_, .bidirectional, .start_view_change);
|
|
750
|
+
t.replica(.R_).drop(.R_, .bidirectional, .do_view_change);
|
|
751
|
+
p.drop(.__, .outgoing, .commit);
|
|
752
|
+
b2.drop(.__, .incoming, .prepare);
|
|
753
|
+
try c.request(3, 3);
|
|
754
|
+
try expectEqual(p.commit(), 3);
|
|
755
|
+
try expectEqual(b1.commit(), 2);
|
|
756
|
+
try expectEqual(b2.commit(), 2);
|
|
757
|
+
|
|
758
|
+
try expectEqual(p.op_head(), 3);
|
|
759
|
+
try expectEqual(b1.op_head(), 3);
|
|
760
|
+
try expectEqual(b2.op_head(), 2);
|
|
761
|
+
|
|
762
|
+
try expectEqual(p.status(), .normal);
|
|
763
|
+
try expectEqual(b1.status(), .normal);
|
|
764
|
+
try expectEqual(b2.status(), .normal);
|
|
765
|
+
|
|
766
|
+
// Change views. B1/B2 participate. Don't allow B2 to repair op=3.
|
|
767
|
+
try expectEqual(p.role(), .primary);
|
|
768
|
+
t.replica(.R_).pass(.R_, .bidirectional, .start_view_change);
|
|
769
|
+
t.replica(.R_).pass(.R_, .bidirectional, .do_view_change);
|
|
770
|
+
p.drop(.__, .bidirectional, .prepare);
|
|
771
|
+
p.drop(.__, .bidirectional, .do_view_change);
|
|
772
|
+
p.drop(.__, .bidirectional, .start_view_change);
|
|
773
|
+
t.run();
|
|
774
|
+
try expectEqual(b1.commit(), 2);
|
|
775
|
+
try expectEqual(b2.commit(), 2);
|
|
776
|
+
try expectEqual(p.role(), .backup);
|
|
777
|
+
|
|
778
|
+
try expectEqual(p.status(), .normal);
|
|
779
|
+
try expectEqual(b1.status(), .normal);
|
|
780
|
+
try expectEqual(b2.status(), .normal);
|
|
781
|
+
|
|
782
|
+
// But other than that, heal A0/B1, but partition B2 completely.
|
|
783
|
+
// (Prevent another view change.)
|
|
784
|
+
p.pass_all(.__, .bidirectional);
|
|
785
|
+
b1.pass_all(.__, .bidirectional);
|
|
786
|
+
b2.drop_all(.__, .bidirectional);
|
|
787
|
+
t.replica(.R_).drop(.R_, .bidirectional, .start_view_change);
|
|
788
|
+
t.replica(.R_).drop(.R_, .bidirectional, .do_view_change);
|
|
789
|
+
t.run();
|
|
790
|
+
|
|
791
|
+
try expectEqual(p.status(), .normal);
|
|
792
|
+
try expectEqual(b1.status(), .normal);
|
|
793
|
+
try expectEqual(b2.status(), .normal);
|
|
794
|
+
|
|
795
|
+
// A0 acks op=3 even though it already committed it.
|
|
796
|
+
try expectEqual(p.commit(), 3);
|
|
797
|
+
try expectEqual(b1.commit(), 3);
|
|
798
|
+
try expectEqual(b2.commit(), 2);
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
test "Cluster: repair: primary checkpoint, backup crash before checkpoint, primary prepare" {
|
|
802
|
+
// 1. Given 3 replica: A0, B1, B2.
|
|
803
|
+
// 2. B2 is partitioned (for the entire scenario).
|
|
804
|
+
// 3. A0 and B1 prepare and commit many messages...
|
|
805
|
+
// 4. A0 commits a checkpoint trigger and checkpoints.
|
|
806
|
+
// 5. B1 crashes before it can commit the trigger or checkpoint.
|
|
807
|
+
// 6. A0 prepares a message.
|
|
808
|
+
// 7. B1 restarts. The very first entry in its WAL is corrupt.
|
|
809
|
+
// A0 has *not* already overwritten the corresponding entry in its own WAL, thanks to the
|
|
810
|
+
// pipeline component of the vsr_checkpoint_ops.
|
|
811
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
812
|
+
defer t.deinit();
|
|
813
|
+
|
|
814
|
+
var c = t.clients(.{});
|
|
815
|
+
var p = t.replica(.A0);
|
|
816
|
+
var b1 = t.replica(.B1);
|
|
817
|
+
var b2 = t.replica(.B2);
|
|
818
|
+
|
|
819
|
+
// B2 does not participate in this scenario.
|
|
820
|
+
b2.stop();
|
|
821
|
+
try c.request(checkpoint_1_trigger - 1, checkpoint_1_trigger - 1);
|
|
822
|
+
|
|
823
|
+
b1.drop(.R_, .incoming, .commit);
|
|
824
|
+
try c.request(checkpoint_1_trigger, checkpoint_1_trigger);
|
|
825
|
+
try expectEqual(p.op_checkpoint(), checkpoint_1);
|
|
826
|
+
try expectEqual(b1.op_checkpoint(), 0);
|
|
827
|
+
try expectEqual(p.commit(), checkpoint_1_trigger);
|
|
828
|
+
try expectEqual(b1.commit(), checkpoint_1_trigger - 1);
|
|
829
|
+
|
|
830
|
+
b1.pass(.R_, .incoming, .commit);
|
|
831
|
+
b1.stop();
|
|
832
|
+
b1.corrupt(.{ .wal_prepare = 1 });
|
|
833
|
+
try c.request(
|
|
834
|
+
checkpoint_1_trigger + constants.pipeline_prepare_queue_max,
|
|
835
|
+
checkpoint_1_trigger,
|
|
836
|
+
);
|
|
837
|
+
try b1.open();
|
|
838
|
+
t.run();
|
|
839
|
+
|
|
840
|
+
try expectEqual(p.op_checkpoint(), checkpoint_1);
|
|
841
|
+
try expectEqual(b1.op_checkpoint(), checkpoint_1);
|
|
842
|
+
try expectEqual(p.commit(), checkpoint_1_trigger + constants.pipeline_prepare_queue_max);
|
|
843
|
+
try expectEqual(b1.commit(), checkpoint_1_trigger + constants.pipeline_prepare_queue_max);
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
test "Cluster: view-change: DVC, 1+1/2 faulty header stall, 2+1/3 faulty header succeed" {
|
|
847
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
848
|
+
defer t.deinit();
|
|
849
|
+
|
|
850
|
+
var c = t.clients(.{});
|
|
851
|
+
try c.request(2, 2);
|
|
852
|
+
try expectEqual(t.replica(.R_).commit(), 2);
|
|
853
|
+
|
|
854
|
+
t.replica(.R0).stop();
|
|
855
|
+
try c.request(4, 4);
|
|
856
|
+
t.replica(.R1).stop();
|
|
857
|
+
t.replica(.R2).stop();
|
|
858
|
+
|
|
859
|
+
t.replica(.R1).corrupt(.{ .wal_prepare = 3 });
|
|
860
|
+
|
|
861
|
+
// The nack quorum size is 2.
|
|
862
|
+
// The new view must determine whether op=3 is possibly committed.
|
|
863
|
+
// - R0 never received op=3 (it had already crashed), so it nacks.
|
|
864
|
+
// - R1 did receive op=3, but upon recovering its WAL, it was corrupt, so it cannot nack.
|
|
865
|
+
// The cluster must wait form R2 before recovering.
|
|
866
|
+
try t.replica(.R0).open();
|
|
867
|
+
try t.replica(.R1).open();
|
|
868
|
+
const mark = marks.check("quorum received, awaiting repair");
|
|
869
|
+
t.run();
|
|
870
|
+
try expectEqual(t.replica(.R0).status(), .view_change);
|
|
871
|
+
try expectEqual(t.replica(.R1).status(), .view_change);
|
|
872
|
+
try mark.expect_hit();
|
|
873
|
+
|
|
874
|
+
// R2 provides the missing header, allowing the view-change to succeed.
|
|
875
|
+
try t.replica(.R2).open();
|
|
876
|
+
t.run();
|
|
877
|
+
try expectEqual(t.replica(.R_).status(), .normal);
|
|
878
|
+
try expectEqual(t.replica(.R_).commit(), 4);
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
test "Cluster: view-change: DVC, 2/3 faulty header stall" {
|
|
882
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
883
|
+
defer t.deinit();
|
|
884
|
+
|
|
885
|
+
var c = t.clients(.{});
|
|
886
|
+
|
|
887
|
+
t.replica(.R0).stop();
|
|
888
|
+
try c.request(3, 3);
|
|
889
|
+
t.replica(.R1).stop();
|
|
890
|
+
t.replica(.R2).stop();
|
|
891
|
+
|
|
892
|
+
t.replica(.R1).corrupt(.{ .wal_prepare = 2 });
|
|
893
|
+
t.replica(.R2).corrupt(.{ .wal_prepare = 2 });
|
|
894
|
+
|
|
895
|
+
try t.replica(.R_).open();
|
|
896
|
+
const mark = marks.check("quorum received, deadlocked");
|
|
897
|
+
t.run();
|
|
898
|
+
try expectEqual(t.replica(.R_).status(), .view_change);
|
|
899
|
+
try mark.expect_hit();
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
test "Cluster: view-change: duel of the primaries" {
|
|
903
|
+
// In a cluster of 3, one replica gets partitioned away, and the remaining two _both_ become
|
|
904
|
+
// primaries (for different views). Additionally, the primary from the higher view is
|
|
905
|
+
// abdicating. The primaries should figure out that they need to view-change to a higher view.
|
|
906
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
907
|
+
defer t.deinit();
|
|
908
|
+
|
|
909
|
+
var c = t.clients(.{});
|
|
910
|
+
try c.request(2, 2);
|
|
911
|
+
try expectEqual(t.replica(.R_).commit(), 2);
|
|
912
|
+
|
|
913
|
+
try expectEqual(t.replica(.R_).view(), 1);
|
|
914
|
+
try expectEqual(t.replica(.R1).role(), .primary);
|
|
915
|
+
|
|
916
|
+
t.replica(.R2).drop_all(.R_, .bidirectional);
|
|
917
|
+
t.replica(.R1).drop(.R_, .outgoing, .commit);
|
|
918
|
+
try c.request(3, 3);
|
|
919
|
+
|
|
920
|
+
try expectEqual(t.replica(.R0).commit_max(), 2);
|
|
921
|
+
try expectEqual(t.replica(.R1).commit_max(), 3);
|
|
922
|
+
try expectEqual(t.replica(.R2).commit_max(), 2);
|
|
923
|
+
|
|
924
|
+
t.replica(.R0).pass_all(.R_, .bidirectional);
|
|
925
|
+
t.replica(.R2).pass_all(.R_, .bidirectional);
|
|
926
|
+
t.replica(.R1).drop_all(.R_, .bidirectional);
|
|
927
|
+
t.replica(.R2).drop(.R0, .bidirectional, .prepare_ok);
|
|
928
|
+
t.replica(.R2).drop(.R0, .outgoing, .do_view_change);
|
|
929
|
+
t.run();
|
|
930
|
+
|
|
931
|
+
// The stage is set: we have two primaries in different views, R2 is about to abdicate.
|
|
932
|
+
try expectEqual(t.replica(.R1).view(), 1);
|
|
933
|
+
try expectEqual(t.replica(.R1).status(), .normal);
|
|
934
|
+
try expectEqual(t.replica(.R1).role(), .primary);
|
|
935
|
+
try expectEqual(t.replica(.R1).commit(), 3);
|
|
936
|
+
try expectEqual(t.replica(.R2).op_head(), 3);
|
|
937
|
+
|
|
938
|
+
try expectEqual(t.replica(.R2).view(), 2);
|
|
939
|
+
try expectEqual(t.replica(.R2).status(), .normal);
|
|
940
|
+
try expectEqual(t.replica(.R2).role(), .primary);
|
|
941
|
+
try expectEqual(t.replica(.R2).commit(), 2);
|
|
942
|
+
try expectEqual(t.replica(.R2).op_head(), 3);
|
|
943
|
+
|
|
944
|
+
t.replica(.R1).pass_all(.R_, .bidirectional);
|
|
945
|
+
t.replica(.R2).pass_all(.R_, .bidirectional);
|
|
946
|
+
t.replica(.R0).drop_all(.R_, .bidirectional);
|
|
947
|
+
t.run();
|
|
948
|
+
|
|
949
|
+
try expectEqual(t.replica(.R1).commit(), 3);
|
|
950
|
+
try expectEqual(t.replica(.R2).commit(), 3);
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
test "Cluster: view_change: lagging replica advances checkpoint during view change" {
|
|
954
|
+
// It could be the case that the replica with the most advanced checkpoint has its checkpoint
|
|
955
|
+
// corrupted. In this case, a replica with a slightly older checkpoint must step up as primary.
|
|
956
|
+
|
|
957
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
958
|
+
defer t.deinit();
|
|
959
|
+
|
|
960
|
+
var c = t.clients(.{});
|
|
961
|
+
var a0 = t.replica(.A0);
|
|
962
|
+
var b1 = t.replica(.B1);
|
|
963
|
+
var b2 = t.replica(.B2);
|
|
964
|
+
|
|
965
|
+
b2.stop();
|
|
966
|
+
|
|
967
|
+
// Ensure b1 only commits up till checkpoint_2_trigger - 1, so it stays at checkpoint_1 while
|
|
968
|
+
// a0 moves to checkpoint_2.
|
|
969
|
+
try c.request(checkpoint_2_trigger - 1, checkpoint_2_trigger - 1);
|
|
970
|
+
b1.drop(.R_, .incoming, .commit);
|
|
971
|
+
try c.request(checkpoint_2_trigger, checkpoint_2_trigger);
|
|
972
|
+
|
|
973
|
+
try expectEqual(a0.commit(), checkpoint_2_trigger);
|
|
974
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_2);
|
|
975
|
+
try expectEqual(b1.commit(), checkpoint_2_trigger - 1);
|
|
976
|
+
try expectEqual(b1.op_checkpoint(), checkpoint_1);
|
|
977
|
+
|
|
978
|
+
b1.stop();
|
|
979
|
+
|
|
980
|
+
try b2.open();
|
|
981
|
+
// Don't allow b2 to repair its grid, otherwise it could help a0 commit past op_prepare_max for
|
|
982
|
+
// checkpoint_2.
|
|
983
|
+
b2.drop(.R_, .incoming, .block);
|
|
984
|
+
|
|
985
|
+
t.run();
|
|
986
|
+
|
|
987
|
+
try expectEqual(b2.op_checkpoint(), checkpoint_2);
|
|
988
|
+
try expectEqual(b2.commit_max(), checkpoint_2_trigger);
|
|
989
|
+
try expectEqual(b2.status(), .normal);
|
|
990
|
+
|
|
991
|
+
// Progress a0 & b2's head past op_prepare_ok_max for checkpoint_2 (commit_max stays at
|
|
992
|
+
// op_prepare_ok_max since a syncing replica's don't prepare_ok ops past prepare_ok_max).
|
|
993
|
+
try c.request(
|
|
994
|
+
checkpoint_2_prepare_max,
|
|
995
|
+
checkpoint_2_prepare_ok_max,
|
|
996
|
+
);
|
|
997
|
+
|
|
998
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_2);
|
|
999
|
+
try expectEqual(a0.commit_max(), checkpoint_2_prepare_ok_max);
|
|
1000
|
+
|
|
1001
|
+
try expectEqual(b2.op_checkpoint(), checkpoint_2);
|
|
1002
|
+
try expectEqual(b2.commit_max(), checkpoint_2_prepare_ok_max);
|
|
1003
|
+
|
|
1004
|
+
b2.stop();
|
|
1005
|
+
|
|
1006
|
+
a0.stop();
|
|
1007
|
+
// Drop incoming DVCs to a0 to check if b1 steps up as primary.
|
|
1008
|
+
a0.drop(.R_, .incoming, .do_view_change);
|
|
1009
|
+
try a0.open();
|
|
1010
|
+
|
|
1011
|
+
try b1.open();
|
|
1012
|
+
b1.pass(.R_, .incoming, .commit);
|
|
1013
|
+
|
|
1014
|
+
t.run();
|
|
1015
|
+
|
|
1016
|
+
try expectEqual(a0.status(), .normal);
|
|
1017
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_2);
|
|
1018
|
+
|
|
1019
|
+
// b1 is able to advance its checkpoint during view change and become primary.
|
|
1020
|
+
try expectEqual(b1.role(), .primary);
|
|
1021
|
+
try expectEqual(b1.status(), .normal);
|
|
1022
|
+
try expectEqual(b1.op_checkpoint(), checkpoint_2);
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
test "Cluster: view-change: primary with dirty log" {
|
|
1026
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1027
|
+
defer t.deinit();
|
|
1028
|
+
|
|
1029
|
+
var c = t.clients(.{});
|
|
1030
|
+
var a0 = t.replica(.A0);
|
|
1031
|
+
var b1 = t.replica(.B1);
|
|
1032
|
+
var b2 = t.replica(.B2);
|
|
1033
|
+
|
|
1034
|
+
// Commit past the checkpoint_2_trigger to ensure that the op we will corrupt won't be found in
|
|
1035
|
+
// B1's pipeline cache.
|
|
1036
|
+
const commit_max = checkpoint_2_trigger +
|
|
1037
|
+
constants.pipeline_prepare_queue_max +
|
|
1038
|
+
constants.pipeline_request_queue_max;
|
|
1039
|
+
|
|
1040
|
+
// Partition B2 so that it falls behind the cluster.
|
|
1041
|
+
b2.drop_all(.R_, .bidirectional);
|
|
1042
|
+
try c.request(commit_max, commit_max);
|
|
1043
|
+
|
|
1044
|
+
// Allow B2 to join the cluster and complete state sync.
|
|
1045
|
+
b2.pass_all(.R_, .bidirectional);
|
|
1046
|
+
t.run();
|
|
1047
|
+
|
|
1048
|
+
try expectEqual(t.replica(.R_).commit(), commit_max);
|
|
1049
|
+
try TestReplicas.expect_sync_done(t.replica(.R_));
|
|
1050
|
+
|
|
1051
|
+
// Crash A0, and force B2 to become the primary.
|
|
1052
|
+
a0.stop();
|
|
1053
|
+
b1.drop(.__, .incoming, .do_view_change);
|
|
1054
|
+
|
|
1055
|
+
// B2 tries to become primary. (Don't let B1 become primary – it would not realize its
|
|
1056
|
+
// checkpoint entry is corrupt, which would defeat the purpose of this test).
|
|
1057
|
+
// B2 tries to repair (request_prepare) this corrupt op, even though it is before its
|
|
1058
|
+
// checkpoint. B1 discovers that this op is corrupt, and marks it as faulty.
|
|
1059
|
+
b1.corrupt(.{ .wal_prepare = checkpoint_2 % slot_count });
|
|
1060
|
+
t.run();
|
|
1061
|
+
|
|
1062
|
+
try expectEqual(b1.status(), .normal);
|
|
1063
|
+
try expectEqual(b2.status(), .normal);
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
test "Cluster: view-change: nack older view" {
|
|
1067
|
+
// a0 prepares (but does not commit) three ops (`x`, `x + 1`, `x + 2`) at view `v`.
|
|
1068
|
+
// b1 prepares (but does not commit) the same ops at view `v + 1`.
|
|
1069
|
+
// b2 receives only `x + 2` op prepared at b1.
|
|
1070
|
+
// b1 gets permanently partitioned from the cluster, and a0 and b2 form a core.
|
|
1071
|
+
//
|
|
1072
|
+
// a0 and b2 and should be able to truncate all the prepared, but uncommitted ops.
|
|
1073
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1074
|
+
defer t.deinit();
|
|
1075
|
+
|
|
1076
|
+
var c = t.clients(.{});
|
|
1077
|
+
try c.request(checkpoint_1_trigger, checkpoint_1_trigger);
|
|
1078
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1_trigger);
|
|
1079
|
+
|
|
1080
|
+
var a0 = t.replica(.A0);
|
|
1081
|
+
var b1 = t.replica(.B1);
|
|
1082
|
+
var b2 = t.replica(.B2);
|
|
1083
|
+
|
|
1084
|
+
try expectEqual(a0.role(), .primary);
|
|
1085
|
+
t.replica(.R_).drop_all(.R_, .bidirectional);
|
|
1086
|
+
try c.request(checkpoint_1_trigger + 3, checkpoint_1_trigger);
|
|
1087
|
+
try expectEqual(a0.op_head(), checkpoint_1_trigger + 3);
|
|
1088
|
+
|
|
1089
|
+
t.replica(.R_).pass(.R_, .bidirectional, .ping);
|
|
1090
|
+
t.replica(.R_).pass(.R_, .bidirectional, .pong);
|
|
1091
|
+
b1.pass(.R_, .bidirectional, .start_view_change);
|
|
1092
|
+
b1.pass(.R_, .incoming, .do_view_change);
|
|
1093
|
+
b1.pass(.R_, .outgoing, .start_view);
|
|
1094
|
+
a0.drop_all(.R_, .bidirectional);
|
|
1095
|
+
b2.pass(.R_, .incoming, .prepare);
|
|
1096
|
+
b2.drop_fn(.R_, .incoming, struct {
|
|
1097
|
+
fn drop_message(message: *const Message) bool {
|
|
1098
|
+
const header = message.header.into(.prepare) orelse return false;
|
|
1099
|
+
return header.op < checkpoint_1_trigger + 3;
|
|
1100
|
+
}
|
|
1101
|
+
}.drop_message);
|
|
1102
|
+
|
|
1103
|
+
t.run();
|
|
1104
|
+
try expectEqual(b1.role(), .primary);
|
|
1105
|
+
try expectEqual(b1.status(), .normal);
|
|
1106
|
+
|
|
1107
|
+
try expectEqual(t.replica(.R_).op_head(), checkpoint_1_trigger + 3);
|
|
1108
|
+
try expectEqual(t.replica(.R_).commit_max(), checkpoint_1_trigger);
|
|
1109
|
+
|
|
1110
|
+
a0.pass_all(.R_, .bidirectional);
|
|
1111
|
+
b2.pass_all(.R_, .bidirectional);
|
|
1112
|
+
b2.drop_fn(.R_, .incoming, null);
|
|
1113
|
+
b1.drop_all(.R_, .bidirectional);
|
|
1114
|
+
|
|
1115
|
+
try c.request(checkpoint_1_trigger + 3, checkpoint_1_trigger + 3);
|
|
1116
|
+
try expectEqual(b2.commit_max(), checkpoint_1_trigger + 3);
|
|
1117
|
+
try expectEqual(a0.commit_max(), checkpoint_1_trigger + 3);
|
|
1118
|
+
try expectEqual(b1.commit_max(), checkpoint_1_trigger);
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
test "Cluster: sync: partition, lag, sync (transition from idle)" {
|
|
1122
|
+
for ([_]u64{
|
|
1123
|
+
// Normal case: the cluster has prepared beyond the checkpoint.
|
|
1124
|
+
// The lagging replica can learn the latest checkpoint from a commit message.
|
|
1125
|
+
checkpoint_2_prepare_max + 1,
|
|
1126
|
+
// Idle case: the idle cluster has not prepared beyond the checkpoint.
|
|
1127
|
+
// The lagging replica is far enough behind the cluster that it can sync to the latest
|
|
1128
|
+
// checkpoint anyway, since it cannot possibly recover via WAL repair.
|
|
1129
|
+
checkpoint_2_prepare_max,
|
|
1130
|
+
}) |cluster_commit_max| {
|
|
1131
|
+
log.info("test cluster_commit_max={}", .{cluster_commit_max});
|
|
1132
|
+
|
|
1133
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1134
|
+
defer t.deinit();
|
|
1135
|
+
|
|
1136
|
+
var c = t.clients(.{});
|
|
1137
|
+
|
|
1138
|
+
t.replica(.R2).drop_all(.R_, .bidirectional);
|
|
1139
|
+
try c.request(cluster_commit_max, cluster_commit_max);
|
|
1140
|
+
|
|
1141
|
+
t.replica(.R2).pass_all(.R_, .bidirectional);
|
|
1142
|
+
t.run();
|
|
1143
|
+
|
|
1144
|
+
// R2 catches up via state sync.
|
|
1145
|
+
try expectEqual(t.replica(.R_).status(), .normal);
|
|
1146
|
+
try expectEqual(t.replica(.R_).commit(), cluster_commit_max);
|
|
1147
|
+
try expectEqual(t.replica(.R_).sync_status(), .idle);
|
|
1148
|
+
|
|
1149
|
+
// The entire cluster is healthy and able to commit more.
|
|
1150
|
+
try c.request(checkpoint_3_trigger, checkpoint_3_trigger);
|
|
1151
|
+
try expectEqual(t.replica(.R_).status(), .normal);
|
|
1152
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_3_trigger);
|
|
1153
|
+
|
|
1154
|
+
t.run(); // (Wait for grid sync to finish.)
|
|
1155
|
+
try TestReplicas.expect_sync_done(t.replica(.R_));
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
test "Cluster: repair: R=2 (primary checkpoints, but backup lags behind)" {
|
|
1160
|
+
const t = try TestContext.init(.{ .replica_count = 2 });
|
|
1161
|
+
defer t.deinit();
|
|
1162
|
+
|
|
1163
|
+
var c = t.clients(.{});
|
|
1164
|
+
try c.request(checkpoint_1_trigger - 1, checkpoint_1_trigger - 1);
|
|
1165
|
+
|
|
1166
|
+
var a0 = t.replica(.A0);
|
|
1167
|
+
var b1 = t.replica(.B1);
|
|
1168
|
+
|
|
1169
|
+
// A0 prepares the trigger op, commits it, and checkpoints.
|
|
1170
|
+
// B1 prepares the trigger op, but does not commit/checkpoint.
|
|
1171
|
+
b1.drop(.R_, .incoming, .commit); // Prevent last commit.
|
|
1172
|
+
try c.request(checkpoint_1_trigger, checkpoint_1_trigger);
|
|
1173
|
+
try expectEqual(a0.commit(), checkpoint_1_trigger);
|
|
1174
|
+
try expectEqual(b1.commit(), checkpoint_1_trigger - 1);
|
|
1175
|
+
try expectEqual(a0.op_head(), checkpoint_1_trigger);
|
|
1176
|
+
try expectEqual(b1.op_head(), checkpoint_1_trigger);
|
|
1177
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_1);
|
|
1178
|
+
try expectEqual(b1.op_checkpoint(), 0);
|
|
1179
|
+
|
|
1180
|
+
// On B1, corrupt the same slot that A0 is about to overwrite with a new prepare.
|
|
1181
|
+
// (B1 doesn't have any prepare in this slot, thanks to the vsr_checkpoint_ops.)
|
|
1182
|
+
b1.stop();
|
|
1183
|
+
b1.pass(.R_, .incoming, .commit);
|
|
1184
|
+
b1.corrupt(.{ .wal_prepare = (checkpoint_1_trigger + 2) % slot_count });
|
|
1185
|
+
|
|
1186
|
+
// Prepare a full pipeline of ops. Since B1 is still lagging behind, this doesn't actually
|
|
1187
|
+
// overwrite any entries from the previous wrap.
|
|
1188
|
+
const pipeline_prepare_queue_max = constants.pipeline_prepare_queue_max;
|
|
1189
|
+
try c.request(checkpoint_1_trigger + pipeline_prepare_queue_max, checkpoint_1_trigger);
|
|
1190
|
+
|
|
1191
|
+
try b1.open();
|
|
1192
|
+
t.run();
|
|
1193
|
+
|
|
1194
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1_trigger + pipeline_prepare_queue_max);
|
|
1195
|
+
try expectEqual(c.replies(), checkpoint_1_trigger + pipeline_prepare_queue_max);
|
|
1196
|
+
|
|
1197
|
+
// Neither replica used state sync, but it is "done" since all content is present.
|
|
1198
|
+
try TestReplicas.expect_sync_done(t.replica(.R_));
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
test "Cluster: sync: R=4, 2/4 ahead + idle, 2/4 lagging, sync" {
|
|
1202
|
+
const t = try TestContext.init(.{ .replica_count = 4 });
|
|
1203
|
+
defer t.deinit();
|
|
1204
|
+
|
|
1205
|
+
var c = t.clients(.{});
|
|
1206
|
+
try c.request(1, 1);
|
|
1207
|
+
try expectEqual(t.replica(.R_).commit(), 1);
|
|
1208
|
+
|
|
1209
|
+
var a0 = t.replica(.A0);
|
|
1210
|
+
var b1 = t.replica(.B1);
|
|
1211
|
+
var b2 = t.replica(.B2);
|
|
1212
|
+
var b3 = t.replica(.B3);
|
|
1213
|
+
|
|
1214
|
+
b2.stop();
|
|
1215
|
+
b3.stop();
|
|
1216
|
+
|
|
1217
|
+
try c.request(checkpoint_2_trigger, checkpoint_2_trigger);
|
|
1218
|
+
try expectEqual(a0.status(), .normal);
|
|
1219
|
+
try expectEqual(b1.status(), .normal);
|
|
1220
|
+
|
|
1221
|
+
try b2.open();
|
|
1222
|
+
try b3.open();
|
|
1223
|
+
t.run();
|
|
1224
|
+
t.run();
|
|
1225
|
+
|
|
1226
|
+
try expectEqual(t.replica(.R_).status(), .normal);
|
|
1227
|
+
try expectEqual(t.replica(.R_).sync_status(), .idle);
|
|
1228
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_2_trigger);
|
|
1229
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), checkpoint_2);
|
|
1230
|
+
|
|
1231
|
+
try TestReplicas.expect_sync_done(t.replica(.R_));
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
test "Cluster: sync: view-change with lagging replica" {
|
|
1235
|
+
// Check that a cluster can view change even if view-change quorum contains syncing replicas.
|
|
1236
|
+
// This used to be a special case for an older sync protocol, but now this mostly holds by
|
|
1237
|
+
// construction.
|
|
1238
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1239
|
+
defer t.deinit();
|
|
1240
|
+
|
|
1241
|
+
var c = t.clients(.{});
|
|
1242
|
+
try c.request(1, 1); // Make sure that the logic doesn't depend on the root prepare.
|
|
1243
|
+
try expectEqual(t.replica(.R_).commit(), 1);
|
|
1244
|
+
|
|
1245
|
+
var a0 = t.replica(.A0);
|
|
1246
|
+
var b1 = t.replica(.B1);
|
|
1247
|
+
var b2 = t.replica(.B2);
|
|
1248
|
+
|
|
1249
|
+
b2.drop_all(.R_, .bidirectional); // Isolate B2.
|
|
1250
|
+
try c.request(checkpoint_2_trigger, checkpoint_2_trigger);
|
|
1251
|
+
|
|
1252
|
+
// Allow B2 to join, but partition A0 to force a view change.
|
|
1253
|
+
// B2 is lagging far enough behind that it must state sync.
|
|
1254
|
+
// Despite this, the cluster of B1/B2 should recover to normal status.
|
|
1255
|
+
b2.pass_all(.R_, .bidirectional);
|
|
1256
|
+
a0.drop_all(.R_, .bidirectional);
|
|
1257
|
+
|
|
1258
|
+
// Let the cluster run for some time without B2 state syncing.
|
|
1259
|
+
b2.drop(.R_, .bidirectional, .start_view);
|
|
1260
|
+
t.run();
|
|
1261
|
+
try expectEqual(b2.status(), .view_change);
|
|
1262
|
+
try expectEqual(b2.op_checkpoint(), 0);
|
|
1263
|
+
try c.request(checkpoint_2_trigger + 1, checkpoint_2_trigger); // Cluster is blocked.
|
|
1264
|
+
|
|
1265
|
+
// Let B2 state sync. This unblocks the cluster.
|
|
1266
|
+
b2.pass(.R_, .bidirectional, .start_view);
|
|
1267
|
+
t.run();
|
|
1268
|
+
try expectEqual(b1.role(), .primary);
|
|
1269
|
+
try expectEqual(t.replica(.R_).status(), .normal);
|
|
1270
|
+
try expectEqual(t.replica(.R_).sync_status(), .idle);
|
|
1271
|
+
try expect(b2.commit() >= checkpoint_2_trigger);
|
|
1272
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), checkpoint_2);
|
|
1273
|
+
|
|
1274
|
+
// Note: we need to commit more --- state sync status is cleared only at checkpoint.
|
|
1275
|
+
try c.request(checkpoint_3_trigger, checkpoint_3_trigger);
|
|
1276
|
+
try TestReplicas.expect_sync_done(t.replica(.R_));
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
test "Cluster: sync: slightly lagging replica" {
|
|
1280
|
+
// Sometimes a replica must switch to state sync even if it is within journal_slot_count
|
|
1281
|
+
// ops from commit_max. Checkpointed ops are not repaired and might become unavailable.
|
|
1282
|
+
|
|
1283
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1284
|
+
defer t.deinit();
|
|
1285
|
+
|
|
1286
|
+
var c = t.clients(.{});
|
|
1287
|
+
try c.request(checkpoint_1 - 1, checkpoint_1 - 1);
|
|
1288
|
+
|
|
1289
|
+
var a0 = t.replica(.A0);
|
|
1290
|
+
var b1 = t.replica(.B1);
|
|
1291
|
+
var b2 = t.replica(.B2);
|
|
1292
|
+
|
|
1293
|
+
b2.drop_all(.R_, .bidirectional);
|
|
1294
|
+
try c.request(checkpoint_1_trigger + 1, checkpoint_1_trigger + 1);
|
|
1295
|
+
|
|
1296
|
+
// Corrupt all copies of a checkpointed prepare.
|
|
1297
|
+
a0.corrupt(.{ .wal_prepare = checkpoint_1 });
|
|
1298
|
+
b1.corrupt(.{ .wal_prepare = checkpoint_1 });
|
|
1299
|
+
try c.request(checkpoint_1_prepare_max + 1, checkpoint_1_prepare_max + 1);
|
|
1300
|
+
|
|
1301
|
+
// At this point, b2 won't be able to repair WAL and must state sync.
|
|
1302
|
+
b2.pass_all(.R_, .bidirectional);
|
|
1303
|
+
try c.request(checkpoint_1_prepare_max + 2, checkpoint_1_prepare_max + 2);
|
|
1304
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1_prepare_max + 2);
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
test "Cluster: sync: using SV from durable checkpoint" {
|
|
1308
|
+
// Primary sends a SV message to backups when a checkpoint becomes durable. A lagging backup
|
|
1309
|
+
// must use this SV message to state sync to the checkpoint.
|
|
1310
|
+
|
|
1311
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1312
|
+
defer t.deinit();
|
|
1313
|
+
|
|
1314
|
+
var c = t.clients(.{});
|
|
1315
|
+
|
|
1316
|
+
var a0 = t.replica(.A0);
|
|
1317
|
+
var b1 = t.replica(.B1);
|
|
1318
|
+
var b2 = t.replica(.B2);
|
|
1319
|
+
|
|
1320
|
+
// Run for a few ticks to ensure all replicas transition to normal status.
|
|
1321
|
+
t.run();
|
|
1322
|
+
|
|
1323
|
+
b2.stop();
|
|
1324
|
+
|
|
1325
|
+
try c.request(checkpoint_1_prepare_max - 1, checkpoint_1_prepare_max - 1);
|
|
1326
|
+
|
|
1327
|
+
// Ensure b2 can't repair its WAL, commit & transition to checkpoint_1.
|
|
1328
|
+
a0.drop(.R_, .incoming, .request_prepare);
|
|
1329
|
+
b1.drop(.R_, .incoming, .request_prepare);
|
|
1330
|
+
|
|
1331
|
+
try b2.open();
|
|
1332
|
+
|
|
1333
|
+
// Ensure b2 doesn't use repair_sync_timeout to initiate state sync and instead uses a SV
|
|
1334
|
+
// message that a0 sends on checkpoint durability.
|
|
1335
|
+
const b2_replica = &t.cluster.replicas[b2.replicas.get(0)];
|
|
1336
|
+
b2_replica.repair_sync_timeout.stop();
|
|
1337
|
+
|
|
1338
|
+
// b2 at first only accepts prepares up till checkpoint_1_prepare_max. When a0 and b1 commit
|
|
1339
|
+
// past checkpoint_2_prepare_ok_max and checkpoint_2 is durable, a0 sends a SV message to
|
|
1340
|
+
// the backups. b2 uses this SV message to state sync to checkpoint_2.
|
|
1341
|
+
try c.request(checkpoint_2_prepare_ok_max + 1, checkpoint_2_prepare_ok_max + 1);
|
|
1342
|
+
|
|
1343
|
+
try expectEqual(a0.commit(), checkpoint_2_prepare_ok_max + 1);
|
|
1344
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_2);
|
|
1345
|
+
|
|
1346
|
+
try expectEqual(b1.commit(), checkpoint_2_prepare_ok_max + 1);
|
|
1347
|
+
try expectEqual(b1.op_checkpoint(), checkpoint_2);
|
|
1348
|
+
|
|
1349
|
+
try expectEqual(b2.op_head(), checkpoint_2_prepare_ok_max + 1);
|
|
1350
|
+
try expectEqual(b2.commit(), checkpoint_2);
|
|
1351
|
+
try expectEqual(b2.op_checkpoint(), checkpoint_2);
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
test "Cluster: sync: checkpoint from a newer view" {
|
|
1355
|
+
// B1 appends (but does not commit) prepares across a checkpoint boundary.
|
|
1356
|
+
// Then the cluster truncates those prepares and commits past the checkpoint trigger.
|
|
1357
|
+
// When B1 subsequently joins, it should state sync and truncate the log. Immediately
|
|
1358
|
+
// after state sync, the log doesn't connect to B1's new checkpoint.
|
|
1359
|
+
const t = try TestContext.init(.{ .replica_count = 6 });
|
|
1360
|
+
defer t.deinit();
|
|
1361
|
+
|
|
1362
|
+
var c = t.clients(.{});
|
|
1363
|
+
try c.request(checkpoint_1 - 1, checkpoint_1 - 1);
|
|
1364
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1 - 1);
|
|
1365
|
+
|
|
1366
|
+
var a0 = t.replica(.A0);
|
|
1367
|
+
var b1 = t.replica(.B1);
|
|
1368
|
+
|
|
1369
|
+
{
|
|
1370
|
+
// Prevent A0 from committing, prevent any other replica from becoming a primary, and
|
|
1371
|
+
// only allow B1 to learn about A0 prepares.
|
|
1372
|
+
t.replica(.R_).drop(.R_, .incoming, .prepare);
|
|
1373
|
+
t.replica(.R_).drop(.R_, .incoming, .prepare_ok);
|
|
1374
|
+
t.replica(.R_).drop(.R_, .incoming, .start_view_change);
|
|
1375
|
+
|
|
1376
|
+
// Force b1 to sync, rather than repair, by making op=checkpoint_1 - 1 unavailable.
|
|
1377
|
+
b1.stop();
|
|
1378
|
+
b1.corrupt(.{ .wal_prepare = (checkpoint_1 - 1) % slot_count });
|
|
1379
|
+
try b1.open();
|
|
1380
|
+
b1.pass(.A0, .incoming, .prepare);
|
|
1381
|
+
b1.drop_fn(.A0, .incoming, struct {
|
|
1382
|
+
fn drop_message(message: *const Message) bool {
|
|
1383
|
+
const header = message.header.into(.prepare) orelse return false;
|
|
1384
|
+
return header.op == checkpoint_1 - 1;
|
|
1385
|
+
}
|
|
1386
|
+
}.drop_message);
|
|
1387
|
+
|
|
1388
|
+
try c.request(checkpoint_1 + 1, checkpoint_1 - 1);
|
|
1389
|
+
|
|
1390
|
+
try expectEqual(a0.op_head(), checkpoint_1 + 1);
|
|
1391
|
+
try expectEqual(b1.op_head(), checkpoint_1 + 1);
|
|
1392
|
+
try expectEqual(a0.commit(), checkpoint_1 - 1);
|
|
1393
|
+
try expectEqual(b1.commit(), checkpoint_1 - 2);
|
|
1394
|
+
}
|
|
1395
|
+
|
|
1396
|
+
{
|
|
1397
|
+
// Make the rest of cluster prepare and commit a different sequence of prepares.
|
|
1398
|
+
t.replica(.R_).pass(.R_, .incoming, .prepare);
|
|
1399
|
+
t.replica(.R_).pass(.R_, .incoming, .prepare_ok);
|
|
1400
|
+
t.replica(.R_).pass(.R_, .incoming, .start_view_change);
|
|
1401
|
+
|
|
1402
|
+
a0.drop_all(.R_, .bidirectional);
|
|
1403
|
+
b1.drop_all(.R_, .bidirectional);
|
|
1404
|
+
try c.request(checkpoint_2, checkpoint_2);
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1407
|
+
{
|
|
1408
|
+
// Let B1 rejoin, but prevent it from jumping into view change.
|
|
1409
|
+
b1.pass_all(.R_, .bidirectional);
|
|
1410
|
+
b1.drop(.R_, .bidirectional, .start_view);
|
|
1411
|
+
b1.drop(.R_, .incoming, .ping);
|
|
1412
|
+
b1.drop(.R_, .incoming, .pong);
|
|
1413
|
+
|
|
1414
|
+
try c.request(checkpoint_2_trigger - 1, checkpoint_2_trigger - 1);
|
|
1415
|
+
|
|
1416
|
+
// Wipe B1 in-memory state and check that it ends up in a consistent state after restart.
|
|
1417
|
+
b1.stop();
|
|
1418
|
+
try b1.open();
|
|
1419
|
+
t.run();
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
t.replica(.R_).pass_all(.R_, .bidirectional);
|
|
1423
|
+
t.run();
|
|
1424
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_2_trigger - 1);
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
test "Cluster: prepare beyond checkpoint trigger" {
|
|
1428
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1429
|
+
defer t.deinit();
|
|
1430
|
+
|
|
1431
|
+
var c = t.clients(.{});
|
|
1432
|
+
try c.request(checkpoint_1_trigger - 1, checkpoint_1_trigger - 1);
|
|
1433
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1_trigger - 1);
|
|
1434
|
+
|
|
1435
|
+
// Temporarily drop acks so that requests may prepare but not commit.
|
|
1436
|
+
// (And to make sure we don't start checkpointing until we have had a chance to assert the
|
|
1437
|
+
// cluster's state.)
|
|
1438
|
+
t.replica(.R_).drop(.__, .bidirectional, .prepare_ok);
|
|
1439
|
+
|
|
1440
|
+
// Prepare ops beyond the checkpoint.
|
|
1441
|
+
try c.request(checkpoint_1_prepare_ok_max, checkpoint_1_trigger - 1);
|
|
1442
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), 0);
|
|
1443
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1_trigger - 1);
|
|
1444
|
+
try expectEqual(t.replica(.R_).op_head(), checkpoint_1_prepare_ok_max - 1);
|
|
1445
|
+
|
|
1446
|
+
t.replica(.R_).pass(.__, .bidirectional, .prepare_ok);
|
|
1447
|
+
t.run();
|
|
1448
|
+
try expectEqual(c.replies(), checkpoint_1_prepare_ok_max);
|
|
1449
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), checkpoint_1);
|
|
1450
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1_prepare_ok_max);
|
|
1451
|
+
try expectEqual(t.replica(.R_).op_head(), checkpoint_1_prepare_ok_max);
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
test "Cluster: upgrade: operation=upgrade near trigger-minus-bar" {
|
|
1455
|
+
const trigger_for_checkpoint = vsr.Checkpoint.trigger_for_checkpoint;
|
|
1456
|
+
for ([_]struct {
|
|
1457
|
+
request: u64,
|
|
1458
|
+
checkpoint: u64,
|
|
1459
|
+
}{
|
|
1460
|
+
.{
|
|
1461
|
+
// The entire last bar before the operation is free for operation=upgrade's, so when we
|
|
1462
|
+
// hit the checkpoint trigger we can immediately upgrade the cluster.
|
|
1463
|
+
.request = checkpoint_1_trigger - constants.lsm_compaction_ops,
|
|
1464
|
+
.checkpoint = checkpoint_1,
|
|
1465
|
+
},
|
|
1466
|
+
.{
|
|
1467
|
+
// Since there is a non-upgrade request in the last bar, the replica cannot upgrade
|
|
1468
|
+
// during checkpoint_1 and must pad ahead to the next checkpoint.
|
|
1469
|
+
.request = checkpoint_1_trigger - constants.lsm_compaction_ops + 1,
|
|
1470
|
+
.checkpoint = checkpoint_2,
|
|
1471
|
+
},
|
|
1472
|
+
}) |data| {
|
|
1473
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1474
|
+
defer t.deinit();
|
|
1475
|
+
|
|
1476
|
+
var c = t.clients(.{});
|
|
1477
|
+
try c.request(data.request, data.request);
|
|
1478
|
+
|
|
1479
|
+
t.replica(.R_).stop();
|
|
1480
|
+
try t.replica(.R_).open_upgrade(&[_]u8{ 10, 20 });
|
|
1481
|
+
|
|
1482
|
+
// Prevent the upgrade from committing so that we can verify that the replica is still
|
|
1483
|
+
// running version 1.
|
|
1484
|
+
t.replica(.R_).drop(.__, .bidirectional, .prepare_ok);
|
|
1485
|
+
t.run();
|
|
1486
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), 0);
|
|
1487
|
+
try expectEqual(t.replica(.R_).release(), 10);
|
|
1488
|
+
|
|
1489
|
+
t.replica(.R_).pass(.__, .bidirectional, .prepare_ok);
|
|
1490
|
+
t.run();
|
|
1491
|
+
try expectEqual(t.replica(.R_).release(), 20);
|
|
1492
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), data.checkpoint);
|
|
1493
|
+
try expectEqual(t.replica(.R_).commit(), trigger_for_checkpoint(data.checkpoint).?);
|
|
1494
|
+
try expectEqual(t.replica(.R_).op_head(), trigger_for_checkpoint(data.checkpoint).?);
|
|
1495
|
+
|
|
1496
|
+
// Verify that the upgraded cluster is healthy; i.e. that it can commit.
|
|
1497
|
+
try c.request(data.request + 1, data.request + 1);
|
|
1498
|
+
}
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1501
|
+
test "Cluster: upgrade: R=1" {
|
|
1502
|
+
// R=1 clusters upgrade even though they don't build a quorum of upgrade targets.
|
|
1503
|
+
const t = try TestContext.init(.{ .replica_count = 1 });
|
|
1504
|
+
defer t.deinit();
|
|
1505
|
+
|
|
1506
|
+
t.replica(.R_).stop();
|
|
1507
|
+
try t.replica(.R0).open_upgrade(&[_]u8{ 10, 20 });
|
|
1508
|
+
t.run();
|
|
1509
|
+
|
|
1510
|
+
try expectEqual(t.replica(.R0).health(), .up);
|
|
1511
|
+
try expectEqual(t.replica(.R0).release(), 20);
|
|
1512
|
+
try expectEqual(t.replica(.R0).op_checkpoint(), checkpoint_1);
|
|
1513
|
+
try expectEqual(t.replica(.R0).commit(), checkpoint_1_trigger);
|
|
1514
|
+
}
|
|
1515
|
+
|
|
1516
|
+
test "Cluster: upgrade: state-sync to new release" {
|
|
1517
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1518
|
+
defer t.deinit();
|
|
1519
|
+
|
|
1520
|
+
var c = t.clients(.{});
|
|
1521
|
+
|
|
1522
|
+
t.replica(.R_).stop();
|
|
1523
|
+
try t.replica(.R0).open_upgrade(&[_]u8{ 10, 20 });
|
|
1524
|
+
try t.replica(.R1).open_upgrade(&[_]u8{ 10, 20 });
|
|
1525
|
+
try t.replica(.R2).open_upgrade(&[_]u8{ 10, 20 });
|
|
1526
|
+
|
|
1527
|
+
// R2 is advertising the new release (so that the upgrade can begin) but it doesn't actually
|
|
1528
|
+
// join in yet.
|
|
1529
|
+
t.replica(.R2).drop(.__, .bidirectional, .prepare);
|
|
1530
|
+
t.replica(.R2).drop(.__, .bidirectional, .start_view); // Prevent state sync.
|
|
1531
|
+
t.run();
|
|
1532
|
+
|
|
1533
|
+
try expectEqual(t.replica(.R0).commit(), checkpoint_1_trigger);
|
|
1534
|
+
try c.request(constants.vsr_checkpoint_ops, constants.vsr_checkpoint_ops);
|
|
1535
|
+
try expectEqual(t.replica(.R0).commit(), checkpoint_2_trigger);
|
|
1536
|
+
|
|
1537
|
+
// R2 state-syncs from R0/R1, updating its release from v1 to v2 via CheckpointState...
|
|
1538
|
+
t.replica(.R2).stop();
|
|
1539
|
+
t.replica(.R2).pass_all(.__, .bidirectional);
|
|
1540
|
+
try t.replica(.R2).open_upgrade(&[_]u8{10});
|
|
1541
|
+
try expectEqual(t.replica(.R2).health(), .up);
|
|
1542
|
+
try expectEqual(t.replica(.R2).release(), 10);
|
|
1543
|
+
try expectEqual(t.replica(.R2).commit(), 0);
|
|
1544
|
+
t.run();
|
|
1545
|
+
|
|
1546
|
+
// ...But R2 doesn't have v2 available, so it shuts down.
|
|
1547
|
+
try expectEqual(t.replica(.R2).health(), .down);
|
|
1548
|
+
try expectEqual(t.replica(.R2).release(), 10);
|
|
1549
|
+
try expectEqual(t.replica(.R2).commit(), checkpoint_2);
|
|
1550
|
+
|
|
1551
|
+
// Start R2 up with v2 available, and it recovers.
|
|
1552
|
+
try t.replica(.R2).open_upgrade(&[_]u8{ 10, 20 });
|
|
1553
|
+
try expectEqual(t.replica(.R2).health(), .up);
|
|
1554
|
+
try expectEqual(t.replica(.R2).release(), 20);
|
|
1555
|
+
try expectEqual(t.replica(.R2).commit(), checkpoint_2);
|
|
1556
|
+
|
|
1557
|
+
t.run();
|
|
1558
|
+
try expectEqual(t.replica(.R2).commit(), t.replica(.R_).commit());
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
test "Cluster: scrub: background scrubber, fully corrupt grid" {
|
|
1562
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1563
|
+
defer t.deinit();
|
|
1564
|
+
|
|
1565
|
+
var c = t.clients(.{});
|
|
1566
|
+
try c.request(checkpoint_2_trigger, checkpoint_2_trigger);
|
|
1567
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_2_trigger);
|
|
1568
|
+
|
|
1569
|
+
var a0 = t.replica(.A0);
|
|
1570
|
+
const b1 = t.replica(.B1);
|
|
1571
|
+
var b2 = t.replica(.B2);
|
|
1572
|
+
|
|
1573
|
+
const a0_free_set = &t.cluster.replicas[a0.replicas.get(0)].grid.free_set;
|
|
1574
|
+
const b2_free_set = &t.cluster.replicas[b2.replicas.get(0)].grid.free_set;
|
|
1575
|
+
const b2_storage = &t.cluster.storages[b2.replicas.get(0)];
|
|
1576
|
+
|
|
1577
|
+
// Corrupt B2's entire grid.
|
|
1578
|
+
// Note that we intentionally do *not* shut down B2 for this – the intent is to test the
|
|
1579
|
+
// scrubber, without leaning on Grid.read_block()'s `from_local_or_global_storage`.
|
|
1580
|
+
{
|
|
1581
|
+
const address_max = t.block_address_max();
|
|
1582
|
+
var address: u64 = 1;
|
|
1583
|
+
while (address <= address_max) : (address += 1) {
|
|
1584
|
+
b2.corrupt(.{ .grid_block = address });
|
|
1585
|
+
}
|
|
1586
|
+
}
|
|
1587
|
+
|
|
1588
|
+
// Disable new read/write faults so that we can use `storage.faults` to track repairs.
|
|
1589
|
+
// (That is, as the scrubber runs, the number of faults will monotonically decrease.)
|
|
1590
|
+
b2_storage.options.read_fault_probability = Ratio.zero();
|
|
1591
|
+
b2_storage.options.write_fault_probability = Ratio.zero();
|
|
1592
|
+
|
|
1593
|
+
// Tick until B2's grid repair stops making progress.
|
|
1594
|
+
{
|
|
1595
|
+
var faults_before = b2_storage.faults.count();
|
|
1596
|
+
while (true) {
|
|
1597
|
+
t.run();
|
|
1598
|
+
|
|
1599
|
+
const faults_after = b2_storage.faults.count();
|
|
1600
|
+
assert(faults_after <= faults_before);
|
|
1601
|
+
if (faults_after == faults_before) break;
|
|
1602
|
+
|
|
1603
|
+
faults_before = faults_after;
|
|
1604
|
+
}
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
// Verify that B2 repaired all blocks.
|
|
1608
|
+
const address_max = t.block_address_max();
|
|
1609
|
+
var address: u64 = 1;
|
|
1610
|
+
while (address <= address_max) : (address += 1) {
|
|
1611
|
+
if (a0_free_set.is_free(address)) {
|
|
1612
|
+
assert(b2_free_set.is_free(address));
|
|
1613
|
+
assert(b2_storage.area_faulty(.{ .grid = .{ .address = address } }));
|
|
1614
|
+
} else if (!a0_free_set.is_released(address)) {
|
|
1615
|
+
// Acquired (but not released) blocks are guaranteed to be repaired by the scrubber.
|
|
1616
|
+
assert(!b2_free_set.is_free(address));
|
|
1617
|
+
assert(!b2_free_set.is_released(address));
|
|
1618
|
+
assert(!b2_storage.area_faulty(.{ .grid = .{ .address = address } }));
|
|
1619
|
+
} else {
|
|
1620
|
+
// Acquired (but released) blocks are not guaranteed to be repaired by the scrubber.
|
|
1621
|
+
// Includes the following blocks that will be freed when checkpoint_2 becomes durable:
|
|
1622
|
+
// * Blocks released by ManifestLog compaction,
|
|
1623
|
+
// * Blocks released ClientSessions and FreeSet checkpoint trailers (these *could* be
|
|
1624
|
+
// released at the checkpoint itself, since new checkpoint trailers are allocated
|
|
1625
|
+
// at checkpoint, but we release them at checkpoint durability alongside other
|
|
1626
|
+
// released blocks).
|
|
1627
|
+
maybe(b2_storage.area_faulty(.{ .grid = .{ .address = address } }));
|
|
1628
|
+
}
|
|
1629
|
+
}
|
|
1630
|
+
|
|
1631
|
+
try TestReplicas.expect_equal_grid(a0, b2);
|
|
1632
|
+
try TestReplicas.expect_equal_grid(b1, b2);
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
// Compat(v0.15.3)
|
|
1636
|
+
test "Cluster: client: empty command=request operation=register body" {
|
|
1637
|
+
const run_test = struct {
|
|
1638
|
+
fn run_test(
|
|
1639
|
+
client_release: vsr.Release,
|
|
1640
|
+
eviction_reason: vsr.Header.Eviction.Reason,
|
|
1641
|
+
) !void {
|
|
1642
|
+
const t = try TestContext.init(.{ .replica_count = 1 });
|
|
1643
|
+
defer t.deinit();
|
|
1644
|
+
|
|
1645
|
+
// Wait for the primary to settle, since this test doesn't implement request retries.
|
|
1646
|
+
t.run();
|
|
1647
|
+
|
|
1648
|
+
var client_bus = try t.client_bus(0);
|
|
1649
|
+
defer client_bus.deinit();
|
|
1650
|
+
|
|
1651
|
+
var request_header = vsr.Header.Request{
|
|
1652
|
+
.cluster = t.cluster.options.cluster_id,
|
|
1653
|
+
.size = @sizeOf(vsr.Header),
|
|
1654
|
+
.client = client_bus.client_id,
|
|
1655
|
+
.request = 0,
|
|
1656
|
+
.command = .request,
|
|
1657
|
+
.operation = .register,
|
|
1658
|
+
.release = client_release,
|
|
1659
|
+
.previous_request_latency = 0,
|
|
1660
|
+
};
|
|
1661
|
+
request_header.set_checksum_body(&.{}); // Note the absence of a `vsr.RegisterRequest`.
|
|
1662
|
+
request_header.set_checksum();
|
|
1663
|
+
|
|
1664
|
+
client_bus.request(t.replica(.A0).index(), &request_header, &.{});
|
|
1665
|
+
t.run();
|
|
1666
|
+
|
|
1667
|
+
const reply = std.mem.bytesAsValue(
|
|
1668
|
+
vsr.Header.Eviction,
|
|
1669
|
+
client_bus.reply.?.buffer[0..@sizeOf(vsr.Header.Eviction)],
|
|
1670
|
+
);
|
|
1671
|
+
try expectEqual(reply.command, .eviction);
|
|
1672
|
+
try expectEqual(reply.size, @sizeOf(vsr.Header.Eviction));
|
|
1673
|
+
try expectEqual(reply.reason, eviction_reason);
|
|
1674
|
+
}
|
|
1675
|
+
}.run_test;
|
|
1676
|
+
|
|
1677
|
+
try run_test(vsr.Release.minimum, .client_release_too_low);
|
|
1678
|
+
try run_test(releases[0].release_client_min, .invalid_request_body_size);
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1681
|
+
test "Cluster: eviction: no_session" {
|
|
1682
|
+
const t = try TestContext.init(.{
|
|
1683
|
+
.replica_count = 3,
|
|
1684
|
+
.client_count = constants.clients_max + 1,
|
|
1685
|
+
});
|
|
1686
|
+
defer t.deinit();
|
|
1687
|
+
|
|
1688
|
+
var c0 = t.clients(.{ .index = 0, .count = 1 });
|
|
1689
|
+
var c = t.clients(.{ .index = 1, .count = constants.clients_max });
|
|
1690
|
+
|
|
1691
|
+
// Register a single client.
|
|
1692
|
+
try c0.request(1, 1);
|
|
1693
|
+
// Register clients_max other clients.
|
|
1694
|
+
// This evicts the "extra" client, though the eviction message has not been sent yet.
|
|
1695
|
+
try c.request(constants.clients_max, constants.clients_max);
|
|
1696
|
+
|
|
1697
|
+
// Try to send one last request -- which fails, since this client has been evicted.
|
|
1698
|
+
try c0.request(2, 1);
|
|
1699
|
+
try expectEqual(c0.eviction_reason(), .no_session);
|
|
1700
|
+
try expectEqual(c.eviction_reason(), null);
|
|
1701
|
+
}
|
|
1702
|
+
|
|
1703
|
+
test "Cluster: eviction: client_release_too_low" {
|
|
1704
|
+
const t = try TestContext.init(.{
|
|
1705
|
+
.replica_count = 3,
|
|
1706
|
+
.client_release = .{ .value = releases[0].release.value - 1 },
|
|
1707
|
+
});
|
|
1708
|
+
defer t.deinit();
|
|
1709
|
+
|
|
1710
|
+
var c0 = t.clients(.{ .index = 0, .count = 1 });
|
|
1711
|
+
try c0.request(1, 0);
|
|
1712
|
+
try expectEqual(c0.eviction_reason(), .client_release_too_low);
|
|
1713
|
+
}
|
|
1714
|
+
|
|
1715
|
+
test "Cluster: eviction: client_release_too_high" {
|
|
1716
|
+
const t = try TestContext.init(.{
|
|
1717
|
+
.replica_count = 3,
|
|
1718
|
+
.client_release = .{ .value = releases[0].release.value + 1 },
|
|
1719
|
+
});
|
|
1720
|
+
defer t.deinit();
|
|
1721
|
+
|
|
1722
|
+
var c0 = t.clients(.{ .index = 0, .count = 1 });
|
|
1723
|
+
try c0.request(1, 0);
|
|
1724
|
+
try expectEqual(c0.eviction_reason(), .client_release_too_high);
|
|
1725
|
+
}
|
|
1726
|
+
|
|
1727
|
+
test "Cluster: eviction: session_too_low" {
|
|
1728
|
+
const t = try TestContext.init(.{
|
|
1729
|
+
.replica_count = 3,
|
|
1730
|
+
.client_count = constants.clients_max + 1,
|
|
1731
|
+
});
|
|
1732
|
+
defer t.deinit();
|
|
1733
|
+
|
|
1734
|
+
var c0 = t.clients(.{ .index = 0, .count = 1 });
|
|
1735
|
+
var c = t.clients(.{ .index = 1, .count = constants.clients_max });
|
|
1736
|
+
|
|
1737
|
+
t.replica(.R_).record(.C0, .incoming, .request);
|
|
1738
|
+
try c0.request(1, 1);
|
|
1739
|
+
|
|
1740
|
+
// Evict C0. (C0 doesn't know this yet, though).
|
|
1741
|
+
try c.request(constants.clients_max, constants.clients_max);
|
|
1742
|
+
try expectEqual(c0.eviction_reason(), null);
|
|
1743
|
+
|
|
1744
|
+
// Replay C0's register message.
|
|
1745
|
+
t.replica(.R_).replay_recorded();
|
|
1746
|
+
t.run();
|
|
1747
|
+
|
|
1748
|
+
const mark = marks.check("on_request: ignoring older session");
|
|
1749
|
+
|
|
1750
|
+
// C0 now has a session again, but the client only knows the old (evicted) session number.
|
|
1751
|
+
try c0.request(2, 1);
|
|
1752
|
+
try mark.expect_hit();
|
|
1753
|
+
try expectEqual(c0.eviction_reason(), .session_too_low);
|
|
1754
|
+
}
|
|
1755
|
+
|
|
1756
|
+
test "Cluster: view_change: DVC header doesn't match current header in journal" {
|
|
1757
|
+
// It could be the case that a replica's DVC headers don't match the journal's current state.
|
|
1758
|
+
// For example, a header could be blank in the DVC but present in the journal (could happen if
|
|
1759
|
+
// the DVC was computed when that header was corrupt/missing in the replica's journal, and the
|
|
1760
|
+
// replica is simply reusing an old DVC). The replica must check the journal before
|
|
1761
|
+
// broadcasating its DVC, so it appropriately acks/nacks headers in the DVC based on the current
|
|
1762
|
+
// state of the journal.
|
|
1763
|
+
|
|
1764
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1765
|
+
defer t.deinit();
|
|
1766
|
+
|
|
1767
|
+
var c = t.clients(.{});
|
|
1768
|
+
var a0 = t.replica(.A0);
|
|
1769
|
+
var b1 = t.replica(.B1);
|
|
1770
|
+
var b2 = t.replica(.B2);
|
|
1771
|
+
|
|
1772
|
+
b2.stop();
|
|
1773
|
+
|
|
1774
|
+
// Ensure b1 only commits up till checkpoint_2_trigger - 1, so it stays at checkpoint_1 while
|
|
1775
|
+
// a0 moves to checkpoint_2.
|
|
1776
|
+
try c.request(checkpoint_2_trigger - 1, checkpoint_2_trigger - 1);
|
|
1777
|
+
b1.drop(.R_, .incoming, .commit);
|
|
1778
|
+
try c.request(checkpoint_2_trigger, checkpoint_2_trigger);
|
|
1779
|
+
|
|
1780
|
+
try expectEqual(a0.commit(), checkpoint_2_trigger);
|
|
1781
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_2);
|
|
1782
|
+
try expectEqual(b1.commit(), checkpoint_2_trigger - 1);
|
|
1783
|
+
try expectEqual(b1.op_checkpoint(), checkpoint_1);
|
|
1784
|
+
|
|
1785
|
+
b1.stop();
|
|
1786
|
+
|
|
1787
|
+
try b2.open();
|
|
1788
|
+
t.run();
|
|
1789
|
+
|
|
1790
|
+
// b2 performs state sync to get caught up with a0.
|
|
1791
|
+
try expectEqual(b2.op_checkpoint(), checkpoint_2);
|
|
1792
|
+
try expectEqual(b2.commit_max(), checkpoint_2_trigger);
|
|
1793
|
+
try expectEqual(b2.status(), .normal);
|
|
1794
|
+
try b2.expect_sync_done();
|
|
1795
|
+
|
|
1796
|
+
try c.request(checkpoint_2_prepare_max, checkpoint_2_prepare_max);
|
|
1797
|
+
|
|
1798
|
+
// a0 and b2 both prepare and commit up to the prepare_max for checkpoint_2.
|
|
1799
|
+
try expectEqual(a0.op_head(), checkpoint_2_prepare_max);
|
|
1800
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_2);
|
|
1801
|
+
try expectEqual(a0.commit_max(), checkpoint_2_prepare_max);
|
|
1802
|
+
|
|
1803
|
+
try expectEqual(b2.op_head(), checkpoint_2_prepare_max);
|
|
1804
|
+
try expectEqual(b2.op_checkpoint(), checkpoint_2);
|
|
1805
|
+
try expectEqual(b2.commit_max(), checkpoint_2_prepare_max);
|
|
1806
|
+
|
|
1807
|
+
b2.stop();
|
|
1808
|
+
a0.stop();
|
|
1809
|
+
|
|
1810
|
+
// Corrupt op_head() - 1 to ensure that the DVC headers computed by a0 on startup contain a
|
|
1811
|
+
// blank header for op_header() - 1.
|
|
1812
|
+
a0.corrupt(.{ .wal_prepare = (a0.op_head() - 1) % slot_count });
|
|
1813
|
+
|
|
1814
|
+
const mark = marks.check("quorum received, awaiting repair");
|
|
1815
|
+
|
|
1816
|
+
try a0.open();
|
|
1817
|
+
try b1.open();
|
|
1818
|
+
|
|
1819
|
+
t.run();
|
|
1820
|
+
|
|
1821
|
+
// The two replicas are stuck in view change:
|
|
1822
|
+
// B1 is still on checkpoint_1, it's DVC header lagging behind A0's. A0's DVC headers contain a
|
|
1823
|
+
// blank header for op_head() - 1, which it can't nack/ack because it is corrupted in the
|
|
1824
|
+
// journal. There aren't enough nacks for truncating op_head() -1 (nack_quorum=2), and no acks
|
|
1825
|
+
// for it to be retained in the view change.
|
|
1826
|
+
try expectEqual(a0.status(), .view_change);
|
|
1827
|
+
try expectEqual(b1.status(), .view_change);
|
|
1828
|
+
try mark.expect_hit();
|
|
1829
|
+
|
|
1830
|
+
a0.stop();
|
|
1831
|
+
const a0_storage = &t.cluster.storages[a0.replicas.get(0)];
|
|
1832
|
+
|
|
1833
|
+
a0_storage.faulty = false;
|
|
1834
|
+
const mark2 = marks.check("quorum received, awaiting repair");
|
|
1835
|
+
|
|
1836
|
+
try a0.open();
|
|
1837
|
+
|
|
1838
|
+
t.run();
|
|
1839
|
+
|
|
1840
|
+
// The two replicas are stuck in view change still. a0 reuses its old DVC headers with a blank
|
|
1841
|
+
// header for op_head() - 1, but it still can't ack/nack it.
|
|
1842
|
+
try mark2.expect_hit();
|
|
1843
|
+
try expectEqual(a0.status(), .view_change);
|
|
1844
|
+
try expectEqual(b1.status(), .view_change);
|
|
1845
|
+
|
|
1846
|
+
a0_storage.faulty = true;
|
|
1847
|
+
try b2.open();
|
|
1848
|
+
t.run();
|
|
1849
|
+
|
|
1850
|
+
// a0 is able to resolve its dilemma about op_head() - 1 with the help of b2, which acks it.
|
|
1851
|
+
try expectEqual(t.replica(.R0).status(), .normal);
|
|
1852
|
+
try expectEqual(t.replica(.R0).op_checkpoint(), checkpoint_2);
|
|
1853
|
+
try expectEqual(t.replica(.R0).commit_max(), checkpoint_2_prepare_max);
|
|
1854
|
+
}
|
|
1855
|
+
|
|
1856
|
+
test "Cluster: view_change: lagging replica repairs WAL using start_view from potential primary" {
|
|
1857
|
+
// It could be the case that the replica with the most advanced checkpoint has a corruption in
|
|
1858
|
+
// its grid. In this case, a replica on an older checkpoint can use a start_view message from
|
|
1859
|
+
// the most up-to-date replica to repair its WAL, advance its checkpoint, and become primary.
|
|
1860
|
+
|
|
1861
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1862
|
+
defer t.deinit();
|
|
1863
|
+
|
|
1864
|
+
var c = t.clients(.{});
|
|
1865
|
+
var a0 = t.replica(.A0);
|
|
1866
|
+
var b1 = t.replica(.B1);
|
|
1867
|
+
var b2 = t.replica(.B2);
|
|
1868
|
+
|
|
1869
|
+
b2.stop();
|
|
1870
|
+
|
|
1871
|
+
// Ensure b1 only commits up till checkpoint_2_trigger - 1, so it stays at checkpoint_1 while
|
|
1872
|
+
// a0 moves to checkpoint_2.
|
|
1873
|
+
try c.request(checkpoint_2_trigger - 1, checkpoint_2_trigger - 1);
|
|
1874
|
+
b1.drop(.R_, .incoming, .commit);
|
|
1875
|
+
try c.request(checkpoint_2_trigger, checkpoint_2_trigger);
|
|
1876
|
+
|
|
1877
|
+
try expectEqual(a0.commit(), checkpoint_2_trigger);
|
|
1878
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_2);
|
|
1879
|
+
try expectEqual(b1.commit(), checkpoint_2_trigger - 1);
|
|
1880
|
+
try expectEqual(b1.op_checkpoint(), checkpoint_1);
|
|
1881
|
+
|
|
1882
|
+
// Start b2 so that the a0 & b2 can make progress to checkpoint_3; b1 is stopped so it remains
|
|
1883
|
+
// lagging at checkpoint_1.
|
|
1884
|
+
try b2.open();
|
|
1885
|
+
b1.stop();
|
|
1886
|
+
|
|
1887
|
+
t.run();
|
|
1888
|
+
|
|
1889
|
+
try expectEqual(b2.op_checkpoint(), checkpoint_2);
|
|
1890
|
+
try expectEqual(b2.commit_max(), checkpoint_2_trigger);
|
|
1891
|
+
try expectEqual(b2.status(), .normal);
|
|
1892
|
+
try b2.expect_sync_done();
|
|
1893
|
+
|
|
1894
|
+
try c.request(
|
|
1895
|
+
checkpoint_3_trigger,
|
|
1896
|
+
checkpoint_3_trigger,
|
|
1897
|
+
);
|
|
1898
|
+
|
|
1899
|
+
try expectEqual(a0.op_head(), checkpoint_3_trigger);
|
|
1900
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_3);
|
|
1901
|
+
try expectEqual(b2.op_head(), checkpoint_3_trigger);
|
|
1902
|
+
try expectEqual(b2.op_checkpoint(), checkpoint_3);
|
|
1903
|
+
|
|
1904
|
+
// Simulate compaction getting stuck on a0 due to a grid corruption. Corrupting the grid doesn't
|
|
1905
|
+
// work here since compaction in replica tests is always able to apply the move table
|
|
1906
|
+
// optimization. This is because all requests in replica tests are `echo` operations, which are
|
|
1907
|
+
// inserted into the LSM with monotonically increasing id.
|
|
1908
|
+
const a0_replica = &t.cluster.replicas[a0.replicas.get(0)];
|
|
1909
|
+
a0_replica.commit_stage = .compact;
|
|
1910
|
+
|
|
1911
|
+
try c.request(
|
|
1912
|
+
checkpoint_3_trigger + 1,
|
|
1913
|
+
checkpoint_3_trigger,
|
|
1914
|
+
);
|
|
1915
|
+
|
|
1916
|
+
try expectEqual(a0.op_head(), checkpoint_3_trigger + 1);
|
|
1917
|
+
try expectEqual(a0.commit(), checkpoint_3_trigger);
|
|
1918
|
+
|
|
1919
|
+
try expectEqual(b2.op_head(), checkpoint_3_trigger + 1);
|
|
1920
|
+
try expectEqual(b2.commit(), checkpoint_3_trigger);
|
|
1921
|
+
|
|
1922
|
+
const committing_prepare = a0_replica.pipeline.queue.prepare_queue.head_ptr_const().?;
|
|
1923
|
+
a0_replica.commit_prepare = committing_prepare.message.ref();
|
|
1924
|
+
|
|
1925
|
+
// Partition a0, force b1 & b2 into view_change by blocking outgoing .do_view_change messages.
|
|
1926
|
+
a0.drop_all(.R_, .bidirectional);
|
|
1927
|
+
|
|
1928
|
+
try b1.open();
|
|
1929
|
+
b1.drop(.R_, .outgoing, .do_view_change);
|
|
1930
|
+
b2.drop(.R_, .outgoing, .do_view_change);
|
|
1931
|
+
|
|
1932
|
+
t.run();
|
|
1933
|
+
|
|
1934
|
+
try expectEqual(b1.status(), .view_change);
|
|
1935
|
+
try expectEqual(b2.status(), .view_change);
|
|
1936
|
+
|
|
1937
|
+
// Stop b2, allow a0 and b1 to view change. a0 can't step up as primary since it has a
|
|
1938
|
+
// corruption in its grid, due to which it can't make progress on its commit pipeline. However,
|
|
1939
|
+
// since it has an intact WAL, it is able to send a .start_view message to b1. With the help
|
|
1940
|
+
// of the .start_view message, b1 can repair, commit, advance from checkpoint_1 -> checkpoint_3,
|
|
1941
|
+
// and step up as primary.
|
|
1942
|
+
b2.stop();
|
|
1943
|
+
a0.pass_all(.R_, .bidirectional);
|
|
1944
|
+
b1.pass(.R_, .outgoing, .do_view_change);
|
|
1945
|
+
|
|
1946
|
+
t.run();
|
|
1947
|
+
t.run();
|
|
1948
|
+
t.run();
|
|
1949
|
+
|
|
1950
|
+
try expectEqual(b1.status(), .normal);
|
|
1951
|
+
try expectEqual(b1.role(), .primary);
|
|
1952
|
+
try expectEqual(b1.op_checkpoint(), checkpoint_3);
|
|
1953
|
+
try expectEqual(b1.commit(), checkpoint_3_trigger + 1);
|
|
1954
|
+
|
|
1955
|
+
try expectEqual(a0.status(), .normal);
|
|
1956
|
+
try expectEqual(a0.role(), .backup);
|
|
1957
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_3);
|
|
1958
|
+
try expectEqual(b1.commit(), checkpoint_3_trigger + 1);
|
|
1959
|
+
}
|
|
1960
|
+
|
|
1961
|
+
test "Cluster: partitioned replica with higher view cannot lock out client" {
|
|
1962
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
1963
|
+
defer t.deinit();
|
|
1964
|
+
|
|
1965
|
+
var c = t.clients(.{ .index = 0, .count = 1 });
|
|
1966
|
+
|
|
1967
|
+
try c.request(1, 1);
|
|
1968
|
+
|
|
1969
|
+
try expectEqual(t.replica(.R_).commit(), 1);
|
|
1970
|
+
try expectEqual(t.replica(.R_).view(), 1);
|
|
1971
|
+
try expectEqual(t.replica(.R_).log_view(), 1);
|
|
1972
|
+
|
|
1973
|
+
const a0 = t.replica(.A0);
|
|
1974
|
+
const b1 = t.replica(.B1);
|
|
1975
|
+
const b2 = t.replica(.B2);
|
|
1976
|
+
|
|
1977
|
+
// Partition primary, allow one of the backups to increment its view to 2 but the other to
|
|
1978
|
+
// maintain its view at 1. Block exchange of DVC messages to avoid view change.
|
|
1979
|
+
a0.drop_all(.R_, .bidirectional);
|
|
1980
|
+
t.replica(.R_).drop(.R_, .bidirectional, .do_view_change);
|
|
1981
|
+
b1.drop_all(.R_, .incoming);
|
|
1982
|
+
|
|
1983
|
+
t.run();
|
|
1984
|
+
try expectEqual(b1.view(), 1);
|
|
1985
|
+
try expectEqual(b1.log_view(), 1);
|
|
1986
|
+
try expectEqual(b2.view(), 2);
|
|
1987
|
+
try expectEqual(b2.log_view(), 1);
|
|
1988
|
+
|
|
1989
|
+
// Reconnect primary, partition the backup with view=2 so it doesn't influence a view change.
|
|
1990
|
+
a0.pass_all(.R_, .bidirectional);
|
|
1991
|
+
b2.drop_all(.R_, .bidirectional);
|
|
1992
|
+
|
|
1993
|
+
// Verify that the client is able to get its requests processed by the cluster even though
|
|
1994
|
+
// there is a partitioned replica with a higher view number (view=2) than the cluster (view=1).
|
|
1995
|
+
try c.request(2, 2);
|
|
1996
|
+
|
|
1997
|
+
try expectEqual(b2.view(), 2);
|
|
1998
|
+
try expectEqual(b1.view(), 1);
|
|
1999
|
+
try expectEqual(a0.view(), 1);
|
|
2000
|
+
|
|
2001
|
+
try expectEqual(b1.commit(), 2);
|
|
2002
|
+
try expectEqual(a0.commit(), 2);
|
|
2003
|
+
}
|
|
2004
|
+
|
|
2005
|
+
test "Cluster: broken hash chain within the same view does not stall commit via repair" {
|
|
2006
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
2007
|
+
defer t.deinit();
|
|
2008
|
+
|
|
2009
|
+
// Forcefully stall commit pipeline. We let the cluster run for a while to circumvent assertions
|
|
2010
|
+
// related to `commit_stage` on replica startup.
|
|
2011
|
+
t.run();
|
|
2012
|
+
const b2 = t.replica(.B2);
|
|
2013
|
+
const b2_replica = &t.cluster.replicas[b2.replicas.get(0)];
|
|
2014
|
+
b2_replica.commit_stage = .compact;
|
|
2015
|
+
|
|
2016
|
+
// Disallow receiving a specific prepare, and repairing headers via repair and start_view, to
|
|
2017
|
+
// force a hash chain break.
|
|
2018
|
+
b2.drop_fn(.R_, .incoming, struct {
|
|
2019
|
+
fn drop_message(message: *const Message) bool {
|
|
2020
|
+
const header = message.header.into(.prepare) orelse return false;
|
|
2021
|
+
return header.op == constants.pipeline_prepare_queue_max + 1;
|
|
2022
|
+
}
|
|
2023
|
+
}.drop_message);
|
|
2024
|
+
b2.drop(.R_, .outgoing, .request_headers);
|
|
2025
|
+
b2.drop(.R_, .incoming, .start_view);
|
|
2026
|
+
|
|
2027
|
+
var c = t.clients(.{});
|
|
2028
|
+
try c.request(
|
|
2029
|
+
constants.pipeline_prepare_queue_max - 1,
|
|
2030
|
+
constants.pipeline_prepare_queue_max - 1,
|
|
2031
|
+
);
|
|
2032
|
+
|
|
2033
|
+
try expectEqual(t.replica(.R_).op_head(), constants.pipeline_prepare_queue_max - 1);
|
|
2034
|
+
try expectEqual(t.replica(.R_).commit_max(), constants.pipeline_prepare_queue_max - 1);
|
|
2035
|
+
try expectEqual(b2.commit(), 0);
|
|
2036
|
+
|
|
2037
|
+
try c.request(
|
|
2038
|
+
2 * constants.pipeline_prepare_queue_max,
|
|
2039
|
+
2 * constants.pipeline_prepare_queue_max,
|
|
2040
|
+
);
|
|
2041
|
+
|
|
2042
|
+
// Disallow commit pipeline initiation via commit. Dropping incoming commit messages, and the
|
|
2043
|
+
// fact that no more prepares are exchanged, ensures commit can only be initiated via repair.
|
|
2044
|
+
b2_replica.commit_stage = .idle;
|
|
2045
|
+
b2.drop(.R_, .incoming, .commit);
|
|
2046
|
+
t.run();
|
|
2047
|
+
|
|
2048
|
+
try expectEqual(b2.op_head(), constants.pipeline_prepare_queue_max * 2);
|
|
2049
|
+
try expectEqual(b2.commit_max(), constants.pipeline_prepare_queue_max * 2);
|
|
2050
|
+
try expectEqual(b2.commit(), constants.pipeline_prepare_queue_max);
|
|
2051
|
+
}
|
|
2052
|
+
|
|
2053
|
+
test "Cluster: backups prepare past prepare_max if the next checkpoint is durable" {
|
|
2054
|
+
const t = try TestContext.init(.{ .replica_count = 3 });
|
|
2055
|
+
defer t.deinit();
|
|
2056
|
+
|
|
2057
|
+
var c = t.clients(.{});
|
|
2058
|
+
try c.request(checkpoint_1_trigger - 1, checkpoint_1_trigger - 1);
|
|
2059
|
+
|
|
2060
|
+
try expectEqual(t.replica(.R_).op_head(), checkpoint_1_trigger - 1);
|
|
2061
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_1_trigger - 1);
|
|
2062
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), 0);
|
|
2063
|
+
|
|
2064
|
+
const a0 = t.replica(.A0);
|
|
2065
|
+
const b1 = t.replica(.B1);
|
|
2066
|
+
const b2 = t.replica(.B2);
|
|
2067
|
+
|
|
2068
|
+
b2.drop(.R_, .incoming, .start_view);
|
|
2069
|
+
const b2_replica = &t.cluster.replicas[b2.replicas.get(0)];
|
|
2070
|
+
|
|
2071
|
+
// Stall commit pipeline on b2, forcing it to accept prepares but advance its checkpoint past 0.
|
|
2072
|
+
// Meanwhile, the rest of the cluster moves to checkpoint=checkpoint_2.
|
|
2073
|
+
b2_replica.commit_stage = .compact;
|
|
2074
|
+
|
|
2075
|
+
try c.request(checkpoint_2_prepare_max, checkpoint_2_prepare_max);
|
|
2076
|
+
|
|
2077
|
+
try expectEqual(t.replica(.R_).commit_max(), checkpoint_2_prepare_max);
|
|
2078
|
+
|
|
2079
|
+
try expectEqual(a0.op_head(), checkpoint_2_prepare_max);
|
|
2080
|
+
try expectEqual(b1.op_head(), checkpoint_2_prepare_max);
|
|
2081
|
+
|
|
2082
|
+
// Since checkpoint_1 is durable on a0, b1 (a commit quorum of replicas), b2 is able to accept
|
|
2083
|
+
// some prepares from the next checkpoint, overwriting some of its committed prepares.
|
|
2084
|
+
// However, even though ops [checkpoint_1, checkpoint_1_trigger - 1] are committed on b2,
|
|
2085
|
+
// they are not overwritten as they are required during checkpointing & upgrade.
|
|
2086
|
+
try expectEqual(b2.op_head(), checkpoint_1 + constants.journal_slot_count - 1);
|
|
2087
|
+
|
|
2088
|
+
try expectEqual(a0.op_checkpoint(), checkpoint_2);
|
|
2089
|
+
try expectEqual(b1.op_checkpoint(), checkpoint_2);
|
|
2090
|
+
try expectEqual(b2.op_checkpoint(), 0);
|
|
2091
|
+
|
|
2092
|
+
// b2 crashes and restarts, and truncates all prepares that past checkpoint_1_prepare_max,
|
|
2093
|
+
// since all prepares in checkpoint=0 must be replayed after restart.
|
|
2094
|
+
b2.stop();
|
|
2095
|
+
try b2.open();
|
|
2096
|
+
|
|
2097
|
+
try expectEqual(b2.op_head(), checkpoint_1_prepare_max);
|
|
2098
|
+
|
|
2099
|
+
b2.pass(.R_, .incoming, .start_view);
|
|
2100
|
+
t.run();
|
|
2101
|
+
|
|
2102
|
+
try expectEqual(t.replica(.R_).op_head(), checkpoint_2_prepare_max);
|
|
2103
|
+
try expectEqual(t.replica(.R_).commit(), checkpoint_2_prepare_max);
|
|
2104
|
+
try expectEqual(t.replica(.R_).op_checkpoint(), checkpoint_2);
|
|
2105
|
+
}
|
|
2106
|
+
|
|
2107
|
+
const ProcessSelector = enum {
|
|
2108
|
+
__, // all replicas, standbys, and clients
|
|
2109
|
+
R_, // all (non-standby) replicas
|
|
2110
|
+
R0,
|
|
2111
|
+
R1,
|
|
2112
|
+
R2,
|
|
2113
|
+
R3,
|
|
2114
|
+
R4,
|
|
2115
|
+
R5,
|
|
2116
|
+
S_, // all standbys
|
|
2117
|
+
S0,
|
|
2118
|
+
S1,
|
|
2119
|
+
S2,
|
|
2120
|
+
S3,
|
|
2121
|
+
S4,
|
|
2122
|
+
S5,
|
|
2123
|
+
A0, // current primary
|
|
2124
|
+
B1, // backup immediately following current primary
|
|
2125
|
+
B2,
|
|
2126
|
+
B3,
|
|
2127
|
+
B4,
|
|
2128
|
+
B5,
|
|
2129
|
+
C_, // all clients
|
|
2130
|
+
C0,
|
|
2131
|
+
};
|
|
2132
|
+
|
|
2133
|
+
const TestContext = struct {
|
|
2134
|
+
cluster: *Cluster,
|
|
2135
|
+
log_level: std.log.Level,
|
|
2136
|
+
client_requests: []usize,
|
|
2137
|
+
client_replies: []usize,
|
|
2138
|
+
|
|
2139
|
+
pub fn init(options: struct {
|
|
2140
|
+
replica_count: u8,
|
|
2141
|
+
standby_count: u8 = 0,
|
|
2142
|
+
client_count: u8 = constants.clients_max,
|
|
2143
|
+
client_release: vsr.Release = releases[0].release,
|
|
2144
|
+
seed: u64 = 123,
|
|
2145
|
+
}) !*TestContext {
|
|
2146
|
+
const log_level_original = std.testing.log_level;
|
|
2147
|
+
std.testing.log_level = log_level;
|
|
2148
|
+
var prng = stdx.PRNG.from_seed(options.seed);
|
|
2149
|
+
const storage_size_limit = vsr.sector_floor(128 * MiB);
|
|
2150
|
+
|
|
2151
|
+
const cluster = try Cluster.init(allocator, .{
|
|
2152
|
+
.cluster = .{
|
|
2153
|
+
.cluster_id = 0,
|
|
2154
|
+
.replica_count = options.replica_count,
|
|
2155
|
+
.standby_count = options.standby_count,
|
|
2156
|
+
.client_count = options.client_count,
|
|
2157
|
+
.storage_size_limit = storage_size_limit,
|
|
2158
|
+
.seed = prng.int(u64),
|
|
2159
|
+
.releases = &releases,
|
|
2160
|
+
.client_release = options.client_release,
|
|
2161
|
+
.reformats_max = 3,
|
|
2162
|
+
.state_machine = .{
|
|
2163
|
+
.batch_size_limit = constants.message_body_size_max,
|
|
2164
|
+
.lsm_forest_node_count = 4096,
|
|
2165
|
+
},
|
|
2166
|
+
},
|
|
2167
|
+
.network = .{
|
|
2168
|
+
.node_count = options.replica_count + options.standby_count,
|
|
2169
|
+
.client_count = options.client_count,
|
|
2170
|
+
.seed = prng.int(u64),
|
|
2171
|
+
.one_way_delay_mean = fuzz.range_inclusive_ms(&prng, 30, 120),
|
|
2172
|
+
.one_way_delay_min = fuzz.range_inclusive_ms(&prng, 0, 20),
|
|
2173
|
+
|
|
2174
|
+
.path_maximum_capacity = 10,
|
|
2175
|
+
.path_clog_duration_mean = .{ .ns = 0 },
|
|
2176
|
+
.path_clog_probability = Ratio.zero(),
|
|
2177
|
+
.recorded_count_max = 16,
|
|
2178
|
+
},
|
|
2179
|
+
.storage = .{
|
|
2180
|
+
.size = storage_size_limit,
|
|
2181
|
+
.read_latency_min = .ms(10),
|
|
2182
|
+
.read_latency_mean = .ms(50),
|
|
2183
|
+
.write_latency_min = .ms(10),
|
|
2184
|
+
.write_latency_mean = .ms(50),
|
|
2185
|
+
},
|
|
2186
|
+
.storage_fault_atlas = .{
|
|
2187
|
+
.faulty_superblock = false,
|
|
2188
|
+
.faulty_wal_headers = false,
|
|
2189
|
+
.faulty_wal_prepares = false,
|
|
2190
|
+
.faulty_client_replies = false,
|
|
2191
|
+
.faulty_grid = false,
|
|
2192
|
+
},
|
|
2193
|
+
.callbacks = .{
|
|
2194
|
+
.on_client_reply = TestContext.on_client_reply,
|
|
2195
|
+
},
|
|
2196
|
+
});
|
|
2197
|
+
errdefer cluster.deinit();
|
|
2198
|
+
|
|
2199
|
+
for (cluster.storages) |*storage| storage.faulty = true;
|
|
2200
|
+
|
|
2201
|
+
const client_requests = try allocator.alloc(usize, options.client_count);
|
|
2202
|
+
errdefer allocator.free(client_requests);
|
|
2203
|
+
@memset(client_requests, 0);
|
|
2204
|
+
|
|
2205
|
+
const client_replies = try allocator.alloc(usize, cluster.clients.len);
|
|
2206
|
+
errdefer allocator.free(client_replies);
|
|
2207
|
+
@memset(client_replies, 0);
|
|
2208
|
+
|
|
2209
|
+
const context = try allocator.create(TestContext);
|
|
2210
|
+
errdefer allocator.destroy(context);
|
|
2211
|
+
|
|
2212
|
+
context.* = .{
|
|
2213
|
+
.cluster = cluster,
|
|
2214
|
+
.log_level = log_level_original,
|
|
2215
|
+
.client_requests = client_requests,
|
|
2216
|
+
.client_replies = client_replies,
|
|
2217
|
+
};
|
|
2218
|
+
cluster.context = context;
|
|
2219
|
+
|
|
2220
|
+
return context;
|
|
2221
|
+
}
|
|
2222
|
+
|
|
2223
|
+
pub fn deinit(t: *TestContext) void {
|
|
2224
|
+
std.testing.log_level = t.log_level;
|
|
2225
|
+
allocator.free(t.client_replies);
|
|
2226
|
+
allocator.free(t.client_requests);
|
|
2227
|
+
t.cluster.deinit();
|
|
2228
|
+
allocator.destroy(t);
|
|
2229
|
+
}
|
|
2230
|
+
|
|
2231
|
+
pub fn replica(t: *TestContext, selector: ProcessSelector) TestReplicas {
|
|
2232
|
+
const replica_processes = t.processes(selector);
|
|
2233
|
+
var replica_indexes = stdx.BoundedArrayType(u8, constants.members_max){};
|
|
2234
|
+
for (replica_processes.const_slice()) |p| replica_indexes.push(p.replica);
|
|
2235
|
+
return TestReplicas{
|
|
2236
|
+
.context = t,
|
|
2237
|
+
.cluster = t.cluster,
|
|
2238
|
+
.replicas = replica_indexes,
|
|
2239
|
+
};
|
|
2240
|
+
}
|
|
2241
|
+
pub fn clients(
|
|
2242
|
+
t: *TestContext,
|
|
2243
|
+
options: struct {
|
|
2244
|
+
index: usize = 0,
|
|
2245
|
+
count: ?usize = null,
|
|
2246
|
+
},
|
|
2247
|
+
) TestClients {
|
|
2248
|
+
const index = options.index;
|
|
2249
|
+
const count = options.count orelse t.cluster.options.client_count;
|
|
2250
|
+
assert(index + count <= t.cluster.options.client_count);
|
|
2251
|
+
|
|
2252
|
+
var client_indexes = stdx.BoundedArrayType(usize, constants.clients_max){};
|
|
2253
|
+
for (index..index + count) |i| client_indexes.push(i);
|
|
2254
|
+
return TestClients{
|
|
2255
|
+
.context = t,
|
|
2256
|
+
.cluster = t.cluster,
|
|
2257
|
+
.clients = client_indexes,
|
|
2258
|
+
};
|
|
2259
|
+
}
|
|
2260
|
+
|
|
2261
|
+
pub fn client_bus(t: *TestContext, client_index: usize) !*TestClientBus {
|
|
2262
|
+
// Reuse one of `Cluster.clients`' ids since the Network preallocated links for it.
|
|
2263
|
+
return TestClientBus.init(t, t.cluster.clients[client_index].?.id);
|
|
2264
|
+
}
|
|
2265
|
+
|
|
2266
|
+
pub fn run(t: *TestContext) void {
|
|
2267
|
+
const tick_max = 4_100;
|
|
2268
|
+
var tick_count: usize = 0;
|
|
2269
|
+
while (tick_count < tick_max) : (tick_count += 1) {
|
|
2270
|
+
if (t.tick()) tick_count = 0;
|
|
2271
|
+
}
|
|
2272
|
+
}
|
|
2273
|
+
|
|
2274
|
+
pub fn block_address_max(t: *TestContext) u64 {
|
|
2275
|
+
const grid_blocks = t.cluster.storages[0].grid_blocks();
|
|
2276
|
+
for (t.cluster.storages) |storage| {
|
|
2277
|
+
assert(storage.grid_blocks() == grid_blocks);
|
|
2278
|
+
}
|
|
2279
|
+
return grid_blocks; // NB: no -1 needed, addresses start from 1.
|
|
2280
|
+
}
|
|
2281
|
+
|
|
2282
|
+
/// Returns whether the cluster state advanced.
|
|
2283
|
+
fn tick(t: *TestContext) bool {
|
|
2284
|
+
const commits_before = t.cluster.state_checker.commits.items.len;
|
|
2285
|
+
t.cluster.tick();
|
|
2286
|
+
return commits_before != t.cluster.state_checker.commits.items.len;
|
|
2287
|
+
}
|
|
2288
|
+
|
|
2289
|
+
fn on_client_reply(
|
|
2290
|
+
cluster: *Cluster,
|
|
2291
|
+
client: usize,
|
|
2292
|
+
request: *const Message.Request,
|
|
2293
|
+
reply: *const Message.Reply,
|
|
2294
|
+
) void {
|
|
2295
|
+
_ = request;
|
|
2296
|
+
_ = reply;
|
|
2297
|
+
const t: *TestContext = @ptrCast(@alignCast(cluster.context.?));
|
|
2298
|
+
t.client_replies[client] += 1;
|
|
2299
|
+
}
|
|
2300
|
+
|
|
2301
|
+
const ProcessList = stdx.BoundedArrayType(
|
|
2302
|
+
Process,
|
|
2303
|
+
constants.members_max + constants.clients_max,
|
|
2304
|
+
);
|
|
2305
|
+
|
|
2306
|
+
fn processes(t: *const TestContext, selector: ProcessSelector) ProcessList {
|
|
2307
|
+
const replica_count = t.cluster.options.replica_count;
|
|
2308
|
+
|
|
2309
|
+
var view: u32 = 0;
|
|
2310
|
+
for (t.cluster.replicas) |*r| view = @max(view, r.view);
|
|
2311
|
+
|
|
2312
|
+
var array = ProcessList{};
|
|
2313
|
+
switch (selector) {
|
|
2314
|
+
.R0 => array.push(.{ .replica = 0 }),
|
|
2315
|
+
.R1 => array.push(.{ .replica = 1 }),
|
|
2316
|
+
.R2 => array.push(.{ .replica = 2 }),
|
|
2317
|
+
.R3 => array.push(.{ .replica = 3 }),
|
|
2318
|
+
.R4 => array.push(.{ .replica = 4 }),
|
|
2319
|
+
.R5 => array.push(.{ .replica = 5 }),
|
|
2320
|
+
.S0 => array.push(.{ .replica = replica_count + 0 }),
|
|
2321
|
+
.S1 => array.push(.{ .replica = replica_count + 1 }),
|
|
2322
|
+
.S2 => array.push(.{ .replica = replica_count + 2 }),
|
|
2323
|
+
.S3 => array.push(.{ .replica = replica_count + 3 }),
|
|
2324
|
+
.S4 => array.push(.{ .replica = replica_count + 4 }),
|
|
2325
|
+
.S5 => array.push(.{ .replica = replica_count + 5 }),
|
|
2326
|
+
.A0 => array
|
|
2327
|
+
.push(.{ .replica = @intCast((view + 0) % replica_count) }),
|
|
2328
|
+
.B1 => array
|
|
2329
|
+
.push(.{ .replica = @intCast((view + 1) % replica_count) }),
|
|
2330
|
+
.B2 => array
|
|
2331
|
+
.push(.{ .replica = @intCast((view + 2) % replica_count) }),
|
|
2332
|
+
.B3 => array
|
|
2333
|
+
.push(.{ .replica = @intCast((view + 3) % replica_count) }),
|
|
2334
|
+
.B4 => array
|
|
2335
|
+
.push(.{ .replica = @intCast((view + 4) % replica_count) }),
|
|
2336
|
+
.B5 => array
|
|
2337
|
+
.push(.{ .replica = @intCast((view + 5) % replica_count) }),
|
|
2338
|
+
.C0 => array.push(.{ .client = t.cluster.clients[0].?.id }),
|
|
2339
|
+
.__, .R_, .S_, .C_ => {
|
|
2340
|
+
if (selector == .__ or selector == .R_) {
|
|
2341
|
+
for (t.cluster.replicas[0..replica_count], 0..) |_, i| {
|
|
2342
|
+
array.push(.{ .replica = @intCast(i) });
|
|
2343
|
+
}
|
|
2344
|
+
}
|
|
2345
|
+
if (selector == .__ or selector == .S_) {
|
|
2346
|
+
for (t.cluster.replicas[replica_count..], 0..) |_, i| {
|
|
2347
|
+
array.push(.{ .replica = @intCast(replica_count + i) });
|
|
2348
|
+
}
|
|
2349
|
+
}
|
|
2350
|
+
if (selector == .__ or selector == .C_) {
|
|
2351
|
+
for (t.cluster.clients) |*client| {
|
|
2352
|
+
array.push(.{ .client = client.*.?.id });
|
|
2353
|
+
}
|
|
2354
|
+
}
|
|
2355
|
+
},
|
|
2356
|
+
}
|
|
2357
|
+
assert(array.count() > 0);
|
|
2358
|
+
return array;
|
|
2359
|
+
}
|
|
2360
|
+
};
|
|
2361
|
+
|
|
2362
|
+
const TestReplicas = struct {
|
|
2363
|
+
context: *TestContext,
|
|
2364
|
+
cluster: *Cluster,
|
|
2365
|
+
replicas: stdx.BoundedArrayType(u8, constants.members_max),
|
|
2366
|
+
|
|
2367
|
+
pub fn stop(t: *const TestReplicas) void {
|
|
2368
|
+
for (t.replicas.const_slice()) |r| {
|
|
2369
|
+
log.info("{}: crash replica", .{r});
|
|
2370
|
+
t.cluster.replica_crash(r);
|
|
2371
|
+
|
|
2372
|
+
// For simplicity, ensure that any packets that are in flight to this replica are
|
|
2373
|
+
// discarded before it starts up again.
|
|
2374
|
+
const paths = t.peer_paths(.__, .incoming);
|
|
2375
|
+
for (paths.const_slice()) |path| {
|
|
2376
|
+
t.cluster.network.link_clear(path);
|
|
2377
|
+
}
|
|
2378
|
+
}
|
|
2379
|
+
}
|
|
2380
|
+
|
|
2381
|
+
pub fn open(t: *const TestReplicas) !void {
|
|
2382
|
+
for (t.replicas.const_slice()) |r| {
|
|
2383
|
+
log.info("{}: restart replica", .{r});
|
|
2384
|
+
t.cluster.replica_restart(r) catch |err| {
|
|
2385
|
+
assert(t.replicas.count() == 1);
|
|
2386
|
+
return switch (err) {
|
|
2387
|
+
error.WALCorrupt => return error.WALCorrupt,
|
|
2388
|
+
error.WALInvalid => return error.WALInvalid,
|
|
2389
|
+
else => @panic("unexpected error"),
|
|
2390
|
+
};
|
|
2391
|
+
};
|
|
2392
|
+
}
|
|
2393
|
+
}
|
|
2394
|
+
|
|
2395
|
+
pub fn open_upgrade(t: *const TestReplicas, releases_bundled_patch: []const u8) !void {
|
|
2396
|
+
var releases_bundled: vsr.ReleaseList = .empty;
|
|
2397
|
+
for (releases_bundled_patch) |patch| {
|
|
2398
|
+
releases_bundled.push(vsr.Release.from(.{
|
|
2399
|
+
.major = 0,
|
|
2400
|
+
.minor = 0,
|
|
2401
|
+
.patch = patch,
|
|
2402
|
+
}));
|
|
2403
|
+
}
|
|
2404
|
+
releases_bundled.verify();
|
|
2405
|
+
|
|
2406
|
+
for (t.replicas.const_slice()) |r| {
|
|
2407
|
+
log.info("{}: restart replica", .{r});
|
|
2408
|
+
t.cluster.replica_set_releases(r, &releases_bundled);
|
|
2409
|
+
t.cluster.replica_restart(r) catch |err| {
|
|
2410
|
+
assert(t.replicas.count() == 1);
|
|
2411
|
+
return switch (err) {
|
|
2412
|
+
error.WALCorrupt => return error.WALCorrupt,
|
|
2413
|
+
error.WALInvalid => return error.WALInvalid,
|
|
2414
|
+
else => @panic("unexpected error"),
|
|
2415
|
+
};
|
|
2416
|
+
};
|
|
2417
|
+
}
|
|
2418
|
+
}
|
|
2419
|
+
|
|
2420
|
+
pub fn open_reformat(t: *const TestReplicas) !void {
|
|
2421
|
+
for (t.replicas.const_slice()) |r| {
|
|
2422
|
+
log.info("{}: recover replica", .{r});
|
|
2423
|
+
try t.cluster.replica_reformat(r);
|
|
2424
|
+
}
|
|
2425
|
+
}
|
|
2426
|
+
|
|
2427
|
+
pub fn index(t: *const TestReplicas) u8 {
|
|
2428
|
+
assert(t.replicas.count() == 1);
|
|
2429
|
+
return t.replicas.get(0);
|
|
2430
|
+
}
|
|
2431
|
+
|
|
2432
|
+
const Health = enum { up, down, reformatting };
|
|
2433
|
+
|
|
2434
|
+
pub fn health(t: *const TestReplicas) Health {
|
|
2435
|
+
var value_all: ?Health = null;
|
|
2436
|
+
for (t.replicas.const_slice()) |r| {
|
|
2437
|
+
const value: Health = switch (t.cluster.replica_health[r]) {
|
|
2438
|
+
.up => .up,
|
|
2439
|
+
.down => .down,
|
|
2440
|
+
.reformatting => .reformatting,
|
|
2441
|
+
};
|
|
2442
|
+
if (value_all) |all| {
|
|
2443
|
+
assert(all == value);
|
|
2444
|
+
} else {
|
|
2445
|
+
value_all = value;
|
|
2446
|
+
}
|
|
2447
|
+
}
|
|
2448
|
+
return value_all.?;
|
|
2449
|
+
}
|
|
2450
|
+
|
|
2451
|
+
fn get(
|
|
2452
|
+
t: *const TestReplicas,
|
|
2453
|
+
comptime field: std.meta.FieldEnum(Cluster.Replica),
|
|
2454
|
+
) @FieldType(Cluster.Replica, @tagName(field)) {
|
|
2455
|
+
var value_all: ?@FieldType(Cluster.Replica, @tagName(field)) = null;
|
|
2456
|
+
for (t.replicas.const_slice()) |r| {
|
|
2457
|
+
const replica = &t.cluster.replicas[r];
|
|
2458
|
+
const value = @field(replica, @tagName(field));
|
|
2459
|
+
if (value_all) |all| {
|
|
2460
|
+
if (all != value) {
|
|
2461
|
+
for (t.replicas.const_slice()) |replica_index| {
|
|
2462
|
+
log.err("replica={} field={s} value={}", .{
|
|
2463
|
+
replica_index,
|
|
2464
|
+
@tagName(field),
|
|
2465
|
+
@field(&t.cluster.replicas[replica_index], @tagName(field)),
|
|
2466
|
+
});
|
|
2467
|
+
}
|
|
2468
|
+
@panic("test failed: value mismatch");
|
|
2469
|
+
}
|
|
2470
|
+
} else {
|
|
2471
|
+
value_all = value;
|
|
2472
|
+
}
|
|
2473
|
+
}
|
|
2474
|
+
return value_all.?;
|
|
2475
|
+
}
|
|
2476
|
+
|
|
2477
|
+
pub fn release(t: *const TestReplicas) u16 {
|
|
2478
|
+
var value_all: ?u16 = null;
|
|
2479
|
+
for (t.replicas.const_slice()) |r| {
|
|
2480
|
+
const value = t.cluster.replicas[r].release.triple().patch;
|
|
2481
|
+
if (value_all) |all| {
|
|
2482
|
+
assert(all == value);
|
|
2483
|
+
} else {
|
|
2484
|
+
value_all = value;
|
|
2485
|
+
}
|
|
2486
|
+
}
|
|
2487
|
+
return value_all.?;
|
|
2488
|
+
}
|
|
2489
|
+
|
|
2490
|
+
pub fn status(t: *const TestReplicas) vsr.Status {
|
|
2491
|
+
return t.get(.status);
|
|
2492
|
+
}
|
|
2493
|
+
|
|
2494
|
+
pub fn view(t: *const TestReplicas) u32 {
|
|
2495
|
+
return t.get(.view);
|
|
2496
|
+
}
|
|
2497
|
+
|
|
2498
|
+
pub fn log_view(t: *const TestReplicas) u32 {
|
|
2499
|
+
return t.get(.log_view);
|
|
2500
|
+
}
|
|
2501
|
+
|
|
2502
|
+
pub fn op_head(t: *const TestReplicas) u64 {
|
|
2503
|
+
return t.get(.op);
|
|
2504
|
+
}
|
|
2505
|
+
|
|
2506
|
+
pub fn commit(t: *const TestReplicas) u64 {
|
|
2507
|
+
return t.get(.commit_min);
|
|
2508
|
+
}
|
|
2509
|
+
|
|
2510
|
+
pub fn commit_max(t: *const TestReplicas) u64 {
|
|
2511
|
+
return t.get(.commit_max);
|
|
2512
|
+
}
|
|
2513
|
+
|
|
2514
|
+
pub fn state_machine_opened(t: *const TestReplicas) bool {
|
|
2515
|
+
return t.get(.state_machine_opened);
|
|
2516
|
+
}
|
|
2517
|
+
|
|
2518
|
+
fn sync_stage(t: *const TestReplicas) vsr.SyncStage {
|
|
2519
|
+
assert(t.replicas.count() > 0);
|
|
2520
|
+
|
|
2521
|
+
var sync_stage_all: ?vsr.SyncStage = null;
|
|
2522
|
+
for (t.replicas.const_slice()) |r| {
|
|
2523
|
+
const replica = &t.cluster.replicas[r];
|
|
2524
|
+
if (sync_stage_all) |all| {
|
|
2525
|
+
assert(std.meta.eql(all, replica.syncing));
|
|
2526
|
+
} else {
|
|
2527
|
+
sync_stage_all = replica.syncing;
|
|
2528
|
+
}
|
|
2529
|
+
}
|
|
2530
|
+
return sync_stage_all.?;
|
|
2531
|
+
}
|
|
2532
|
+
|
|
2533
|
+
pub fn sync_status(t: *const TestReplicas) std.meta.Tag(vsr.SyncStage) {
|
|
2534
|
+
return @as(std.meta.Tag(vsr.SyncStage), t.sync_stage());
|
|
2535
|
+
}
|
|
2536
|
+
|
|
2537
|
+
fn sync_target(t: *const TestReplicas) ?vsr.SyncTarget {
|
|
2538
|
+
return t.sync_stage().target();
|
|
2539
|
+
}
|
|
2540
|
+
|
|
2541
|
+
pub fn sync_target_checkpoint_op(t: *const TestReplicas) ?u64 {
|
|
2542
|
+
if (t.sync_target()) |target| {
|
|
2543
|
+
return target.checkpoint_op;
|
|
2544
|
+
} else {
|
|
2545
|
+
return null;
|
|
2546
|
+
}
|
|
2547
|
+
}
|
|
2548
|
+
|
|
2549
|
+
pub fn sync_target_checkpoint_id(t: *const TestReplicas) ?u128 {
|
|
2550
|
+
if (t.sync_target()) |target| {
|
|
2551
|
+
return target.checkpoint_id;
|
|
2552
|
+
} else {
|
|
2553
|
+
return null;
|
|
2554
|
+
}
|
|
2555
|
+
}
|
|
2556
|
+
|
|
2557
|
+
const Role = enum { primary, backup, standby };
|
|
2558
|
+
|
|
2559
|
+
pub fn role(t: *const TestReplicas) Role {
|
|
2560
|
+
var role_all: ?Role = null;
|
|
2561
|
+
for (t.replicas.const_slice()) |r| {
|
|
2562
|
+
const replica = &t.cluster.replicas[r];
|
|
2563
|
+
const replica_role: Role = role: {
|
|
2564
|
+
if (replica.standby()) {
|
|
2565
|
+
break :role .standby;
|
|
2566
|
+
} else if (replica.replica == replica.primary_index(replica.view)) {
|
|
2567
|
+
break :role .primary;
|
|
2568
|
+
} else {
|
|
2569
|
+
break :role .backup;
|
|
2570
|
+
}
|
|
2571
|
+
};
|
|
2572
|
+
assert(role_all == null or role_all.? == replica_role);
|
|
2573
|
+
role_all = replica_role;
|
|
2574
|
+
}
|
|
2575
|
+
return role_all.?;
|
|
2576
|
+
}
|
|
2577
|
+
|
|
2578
|
+
pub fn op_checkpoint_id(t: *const TestReplicas) u128 {
|
|
2579
|
+
var checkpoint_id_all: ?u128 = null;
|
|
2580
|
+
for (t.replicas.const_slice()) |r| {
|
|
2581
|
+
const replica = &t.cluster.replicas[r];
|
|
2582
|
+
const replica_checkpoint_id = replica.superblock.working.checkpoint_id();
|
|
2583
|
+
assert(checkpoint_id_all == null or checkpoint_id_all.? == replica_checkpoint_id);
|
|
2584
|
+
checkpoint_id_all = replica_checkpoint_id;
|
|
2585
|
+
}
|
|
2586
|
+
return checkpoint_id_all.?;
|
|
2587
|
+
}
|
|
2588
|
+
|
|
2589
|
+
pub fn op_checkpoint(t: *const TestReplicas) u64 {
|
|
2590
|
+
var checkpoint_all: ?u64 = null;
|
|
2591
|
+
for (t.replicas.const_slice()) |r| {
|
|
2592
|
+
const replica = &t.cluster.replicas[r];
|
|
2593
|
+
assert(checkpoint_all == null or checkpoint_all.? == replica.op_checkpoint());
|
|
2594
|
+
checkpoint_all = replica.op_checkpoint();
|
|
2595
|
+
}
|
|
2596
|
+
return checkpoint_all.?;
|
|
2597
|
+
}
|
|
2598
|
+
|
|
2599
|
+
pub fn corrupt(
|
|
2600
|
+
t: *const TestReplicas,
|
|
2601
|
+
target: union(enum) {
|
|
2602
|
+
wal_header: usize, // slot
|
|
2603
|
+
wal_prepare: usize, // slot
|
|
2604
|
+
client_reply: usize, // slot
|
|
2605
|
+
grid_block: u64, // address
|
|
2606
|
+
},
|
|
2607
|
+
) void {
|
|
2608
|
+
switch (target) {
|
|
2609
|
+
.wal_header => |slot| {
|
|
2610
|
+
const fault_offset = vsr.Zone.wal_headers.offset(slot * @sizeOf(vsr.Header));
|
|
2611
|
+
for (t.replicas.const_slice()) |r| {
|
|
2612
|
+
t.cluster.storages[r].memory[fault_offset] +%= 1;
|
|
2613
|
+
}
|
|
2614
|
+
},
|
|
2615
|
+
.wal_prepare => |slot| {
|
|
2616
|
+
const fault_offset = vsr.Zone.wal_prepares.offset(slot *
|
|
2617
|
+
constants.message_size_max);
|
|
2618
|
+
const fault_sector = @divExact(fault_offset, constants.sector_size);
|
|
2619
|
+
for (t.replicas.const_slice()) |r| {
|
|
2620
|
+
t.cluster.storages[r].faults.set(fault_sector);
|
|
2621
|
+
}
|
|
2622
|
+
},
|
|
2623
|
+
.client_reply => |slot| {
|
|
2624
|
+
const fault_offset = vsr.Zone.client_replies.offset(slot *
|
|
2625
|
+
constants.message_size_max);
|
|
2626
|
+
const fault_sector = @divExact(fault_offset, constants.sector_size);
|
|
2627
|
+
for (t.replicas.const_slice()) |r| {
|
|
2628
|
+
t.cluster.storages[r].faults.set(fault_sector);
|
|
2629
|
+
}
|
|
2630
|
+
},
|
|
2631
|
+
.grid_block => |address| {
|
|
2632
|
+
const fault_offset = vsr.Zone.grid.offset((address - 1) * constants.block_size);
|
|
2633
|
+
const fault_sector = @divExact(fault_offset, constants.sector_size);
|
|
2634
|
+
for (t.replicas.const_slice()) |r| {
|
|
2635
|
+
t.cluster.storages[r].faults.set(fault_sector);
|
|
2636
|
+
}
|
|
2637
|
+
},
|
|
2638
|
+
}
|
|
2639
|
+
}
|
|
2640
|
+
|
|
2641
|
+
pub const LinkDirection = enum { bidirectional, incoming, outgoing };
|
|
2642
|
+
|
|
2643
|
+
pub fn pass_all(t: *const TestReplicas, peer: ProcessSelector, direction: LinkDirection) void {
|
|
2644
|
+
const paths = t.peer_paths(peer, direction);
|
|
2645
|
+
for (paths.const_slice()) |path| {
|
|
2646
|
+
t.cluster.network.link_filter(path).* = LinkFilter.initFull();
|
|
2647
|
+
}
|
|
2648
|
+
}
|
|
2649
|
+
|
|
2650
|
+
pub fn drop_all(t: *const TestReplicas, peer: ProcessSelector, direction: LinkDirection) void {
|
|
2651
|
+
const paths = t.peer_paths(peer, direction);
|
|
2652
|
+
for (paths.const_slice()) |path| t.cluster.network.link_filter(path).* = LinkFilter{};
|
|
2653
|
+
}
|
|
2654
|
+
|
|
2655
|
+
pub fn pass(
|
|
2656
|
+
t: *const TestReplicas,
|
|
2657
|
+
peer: ProcessSelector,
|
|
2658
|
+
direction: LinkDirection,
|
|
2659
|
+
command: vsr.Command,
|
|
2660
|
+
) void {
|
|
2661
|
+
const paths = t.peer_paths(peer, direction);
|
|
2662
|
+
for (paths.const_slice()) |path| t.cluster.network.link_filter(path).insert(command);
|
|
2663
|
+
}
|
|
2664
|
+
|
|
2665
|
+
pub fn drop(
|
|
2666
|
+
t: *const TestReplicas,
|
|
2667
|
+
peer: ProcessSelector,
|
|
2668
|
+
direction: LinkDirection,
|
|
2669
|
+
command: vsr.Command,
|
|
2670
|
+
) void {
|
|
2671
|
+
const paths = t.peer_paths(peer, direction);
|
|
2672
|
+
for (paths.const_slice()) |path| t.cluster.network.link_filter(path).remove(command);
|
|
2673
|
+
}
|
|
2674
|
+
|
|
2675
|
+
pub fn drop_fn(
|
|
2676
|
+
t: *const TestReplicas,
|
|
2677
|
+
peer: ProcessSelector,
|
|
2678
|
+
direction: LinkDirection,
|
|
2679
|
+
comptime drop_message_fn: ?fn (message: *const Message) bool,
|
|
2680
|
+
) void {
|
|
2681
|
+
const paths = t.peer_paths(peer, direction);
|
|
2682
|
+
for (paths.const_slice()) |path| {
|
|
2683
|
+
t.cluster.network.link_drop_packet_fn(path).* = if (drop_message_fn) |f|
|
|
2684
|
+
&f
|
|
2685
|
+
else
|
|
2686
|
+
null;
|
|
2687
|
+
}
|
|
2688
|
+
}
|
|
2689
|
+
|
|
2690
|
+
pub fn record(
|
|
2691
|
+
t: *const TestReplicas,
|
|
2692
|
+
peer: ProcessSelector,
|
|
2693
|
+
direction: LinkDirection,
|
|
2694
|
+
command: vsr.Command,
|
|
2695
|
+
) void {
|
|
2696
|
+
const paths = t.peer_paths(peer, direction);
|
|
2697
|
+
for (paths.const_slice()) |path| t.cluster.network.link_record(path).insert(command);
|
|
2698
|
+
}
|
|
2699
|
+
|
|
2700
|
+
pub fn replay_recorded(
|
|
2701
|
+
t: *const TestReplicas,
|
|
2702
|
+
) void {
|
|
2703
|
+
t.cluster.network.replay_recorded();
|
|
2704
|
+
}
|
|
2705
|
+
|
|
2706
|
+
// -1: no route to self.
|
|
2707
|
+
const paths_max = constants.members_max * (constants.members_max - 1 + constants.clients_max);
|
|
2708
|
+
|
|
2709
|
+
fn peer_paths(
|
|
2710
|
+
t: *const TestReplicas,
|
|
2711
|
+
peer: ProcessSelector,
|
|
2712
|
+
direction: LinkDirection,
|
|
2713
|
+
) stdx.BoundedArrayType(Network.Path, paths_max) {
|
|
2714
|
+
var paths = stdx.BoundedArrayType(Network.Path, paths_max){};
|
|
2715
|
+
const peers = t.context.processes(peer);
|
|
2716
|
+
for (t.replicas.const_slice()) |a| {
|
|
2717
|
+
const process_a = Process{ .replica = a };
|
|
2718
|
+
for (peers.const_slice()) |process_b| {
|
|
2719
|
+
if (direction == .bidirectional or direction == .outgoing) {
|
|
2720
|
+
paths.push(.{ .source = process_a, .target = process_b });
|
|
2721
|
+
}
|
|
2722
|
+
if (direction == .bidirectional or direction == .incoming) {
|
|
2723
|
+
paths.push(.{ .source = process_b, .target = process_a });
|
|
2724
|
+
}
|
|
2725
|
+
}
|
|
2726
|
+
}
|
|
2727
|
+
return paths;
|
|
2728
|
+
}
|
|
2729
|
+
|
|
2730
|
+
fn expect_sync_done(t: TestReplicas) !void {
|
|
2731
|
+
assert(t.replicas.count() > 0);
|
|
2732
|
+
|
|
2733
|
+
for (t.replicas.const_slice()) |replica_index| {
|
|
2734
|
+
const replica: *const Cluster.Replica = &t.cluster.replicas[replica_index];
|
|
2735
|
+
if (!replica.sync_content_done()) return error.SyncContentPending;
|
|
2736
|
+
|
|
2737
|
+
// If the replica has finished syncing, but not yet checkpointed, then it might not have
|
|
2738
|
+
// updated its sync_op_max.
|
|
2739
|
+
maybe(replica.superblock.staging.vsr_state.sync_op_max > 0);
|
|
2740
|
+
|
|
2741
|
+
try t.cluster.storage_checker.replica_sync(Cluster.Replica, replica);
|
|
2742
|
+
}
|
|
2743
|
+
}
|
|
2744
|
+
|
|
2745
|
+
fn expect_equal_grid(want: TestReplicas, got: TestReplicas) !void {
|
|
2746
|
+
assert(want.replicas.count() == 1);
|
|
2747
|
+
assert(got.replicas.count() > 0);
|
|
2748
|
+
|
|
2749
|
+
const want_replica: *const Cluster.Replica = &want.cluster.replicas[want.replicas.get(0)];
|
|
2750
|
+
|
|
2751
|
+
for (got.replicas.const_slice()) |replica_index| {
|
|
2752
|
+
const got_replica: *const Cluster.Replica = &got.cluster.replicas[replica_index];
|
|
2753
|
+
|
|
2754
|
+
const address_max = want.context.block_address_max();
|
|
2755
|
+
var address: u64 = 1;
|
|
2756
|
+
while (address <= address_max) : (address += 1) {
|
|
2757
|
+
const address_free = want_replica.grid.free_set.is_free(address);
|
|
2758
|
+
assert(address_free == got_replica.grid.free_set.is_free(address));
|
|
2759
|
+
if (address_free) continue;
|
|
2760
|
+
|
|
2761
|
+
const block_want = want_replica.superblock.storage.grid_block(address).?;
|
|
2762
|
+
const block_got = got_replica.superblock.storage.grid_block(address).?;
|
|
2763
|
+
|
|
2764
|
+
try expectEqual(
|
|
2765
|
+
std.mem.bytesToValue(vsr.Header, block_want[0..@sizeOf(vsr.Header)]),
|
|
2766
|
+
std.mem.bytesToValue(vsr.Header, block_got[0..@sizeOf(vsr.Header)]),
|
|
2767
|
+
);
|
|
2768
|
+
}
|
|
2769
|
+
}
|
|
2770
|
+
}
|
|
2771
|
+
};
|
|
2772
|
+
|
|
2773
|
+
const TestClients = struct {
|
|
2774
|
+
context: *TestContext,
|
|
2775
|
+
cluster: *Cluster,
|
|
2776
|
+
clients: stdx.BoundedArrayType(usize, constants.clients_max),
|
|
2777
|
+
requests: usize = 0,
|
|
2778
|
+
|
|
2779
|
+
pub fn request(t: *TestClients, requests: usize, expect_replies: usize) !void {
|
|
2780
|
+
assert(t.requests <= requests);
|
|
2781
|
+
defer assert(t.requests == requests);
|
|
2782
|
+
|
|
2783
|
+
outer: while (true) {
|
|
2784
|
+
for (t.clients.const_slice()) |c| {
|
|
2785
|
+
if (t.requests == requests) break :outer;
|
|
2786
|
+
t.context.client_requests[c] += 1;
|
|
2787
|
+
t.requests += 1;
|
|
2788
|
+
}
|
|
2789
|
+
}
|
|
2790
|
+
|
|
2791
|
+
const tick_max = 3_000;
|
|
2792
|
+
var tick: usize = 0;
|
|
2793
|
+
while (tick < tick_max) : (tick += 1) {
|
|
2794
|
+
if (t.context.tick()) tick = 0;
|
|
2795
|
+
|
|
2796
|
+
for (t.clients.const_slice()) |c| {
|
|
2797
|
+
if (t.cluster.clients[c]) |*client| {
|
|
2798
|
+
if (client.request_inflight == null and
|
|
2799
|
+
t.context.client_requests[c] > client.request_number)
|
|
2800
|
+
{
|
|
2801
|
+
if (client.request_number == 0) {
|
|
2802
|
+
t.cluster.register(c);
|
|
2803
|
+
} else {
|
|
2804
|
+
const message = client.get_message();
|
|
2805
|
+
errdefer client.release_message(message);
|
|
2806
|
+
|
|
2807
|
+
const body_size = 123;
|
|
2808
|
+
@memset(message.buffer[@sizeOf(vsr.Header)..][0..body_size], 42);
|
|
2809
|
+
t.cluster.request(c, .echo, message, body_size);
|
|
2810
|
+
}
|
|
2811
|
+
}
|
|
2812
|
+
}
|
|
2813
|
+
}
|
|
2814
|
+
}
|
|
2815
|
+
try std.testing.expectEqual(t.replies(), expect_replies);
|
|
2816
|
+
}
|
|
2817
|
+
|
|
2818
|
+
pub fn replies(t: *const TestClients) usize {
|
|
2819
|
+
var replies_total: usize = 0;
|
|
2820
|
+
for (t.clients.const_slice()) |c| replies_total += t.context.client_replies[c];
|
|
2821
|
+
return replies_total;
|
|
2822
|
+
}
|
|
2823
|
+
|
|
2824
|
+
pub fn eviction_reason(t: *const TestClients) ?vsr.Header.Eviction.Reason {
|
|
2825
|
+
var evicted_all: ?vsr.Header.Eviction.Reason = null;
|
|
2826
|
+
for (t.clients.const_slice(), 0..) |r, i| {
|
|
2827
|
+
const client_eviction_reason = t.cluster.client_eviction_reasons[r];
|
|
2828
|
+
if (i == 0) {
|
|
2829
|
+
assert(evicted_all == null);
|
|
2830
|
+
} else {
|
|
2831
|
+
assert(evicted_all == client_eviction_reason);
|
|
2832
|
+
}
|
|
2833
|
+
evicted_all = client_eviction_reason;
|
|
2834
|
+
}
|
|
2835
|
+
return evicted_all;
|
|
2836
|
+
}
|
|
2837
|
+
};
|
|
2838
|
+
|
|
2839
|
+
/// TestClientBus supports tests which require fine-grained control of the client protocol.
|
|
2840
|
+
/// Note that in particular, TestClientBus does *not* implement message retries.
|
|
2841
|
+
const TestClientBus = struct {
|
|
2842
|
+
const MessagePool = @import("../message_pool.zig").MessagePool;
|
|
2843
|
+
const MessageBus = Cluster.MessageBus;
|
|
2844
|
+
|
|
2845
|
+
context: *TestContext,
|
|
2846
|
+
client_id: u128,
|
|
2847
|
+
message_pool: *MessagePool,
|
|
2848
|
+
message_bus: MessageBus,
|
|
2849
|
+
reply: ?*Message = null,
|
|
2850
|
+
|
|
2851
|
+
fn init(context: *TestContext, client_id: u128) !*TestClientBus {
|
|
2852
|
+
const message_pool = try allocator.create(MessagePool);
|
|
2853
|
+
errdefer allocator.destroy(message_pool);
|
|
2854
|
+
|
|
2855
|
+
message_pool.* = try MessagePool.init(allocator, .client);
|
|
2856
|
+
errdefer message_pool.deinit(allocator);
|
|
2857
|
+
|
|
2858
|
+
var client_bus = try allocator.create(TestClientBus);
|
|
2859
|
+
errdefer allocator.destroy(client_bus);
|
|
2860
|
+
|
|
2861
|
+
client_bus.* = .{
|
|
2862
|
+
.context = context,
|
|
2863
|
+
.client_id = client_id,
|
|
2864
|
+
.message_pool = message_pool,
|
|
2865
|
+
.message_bus = try MessageBus.init(
|
|
2866
|
+
allocator,
|
|
2867
|
+
.{ .client = client_id },
|
|
2868
|
+
message_pool,
|
|
2869
|
+
on_messages,
|
|
2870
|
+
.{ .network = context.cluster.network },
|
|
2871
|
+
),
|
|
2872
|
+
};
|
|
2873
|
+
errdefer client_bus.message_bus.deinit(allocator);
|
|
2874
|
+
|
|
2875
|
+
context.cluster.state_checker.clients_exhaustive = false;
|
|
2876
|
+
context.cluster.network.link(client_bus.message_bus.process, &client_bus.message_bus);
|
|
2877
|
+
|
|
2878
|
+
return client_bus;
|
|
2879
|
+
}
|
|
2880
|
+
|
|
2881
|
+
pub fn deinit(t: *TestClientBus) void {
|
|
2882
|
+
if (t.reply) |reply| {
|
|
2883
|
+
t.message_pool.unref(reply);
|
|
2884
|
+
t.reply = null;
|
|
2885
|
+
}
|
|
2886
|
+
t.message_bus.deinit(allocator);
|
|
2887
|
+
t.message_pool.deinit(allocator);
|
|
2888
|
+
allocator.destroy(t.message_pool);
|
|
2889
|
+
allocator.destroy(t);
|
|
2890
|
+
}
|
|
2891
|
+
|
|
2892
|
+
fn on_messages(message_bus: *Cluster.MessageBus, buffer: *MessageBuffer) void {
|
|
2893
|
+
const t: *TestClientBus = @fieldParentPtr("message_bus", message_bus);
|
|
2894
|
+
while (buffer.next_header()) |header| {
|
|
2895
|
+
const message = buffer.consume_message(t.message_pool, &header);
|
|
2896
|
+
defer t.message_pool.unref(message);
|
|
2897
|
+
|
|
2898
|
+
assert(message.header.cluster == t.context.cluster.options.cluster_id);
|
|
2899
|
+
|
|
2900
|
+
switch (message.header.command) {
|
|
2901
|
+
.reply, .eviction => {
|
|
2902
|
+
assert(t.reply == null);
|
|
2903
|
+
t.reply = message.ref();
|
|
2904
|
+
},
|
|
2905
|
+
.pong_client => {},
|
|
2906
|
+
else => unreachable,
|
|
2907
|
+
}
|
|
2908
|
+
}
|
|
2909
|
+
}
|
|
2910
|
+
|
|
2911
|
+
pub fn request(
|
|
2912
|
+
t: *TestClientBus,
|
|
2913
|
+
replica: u8,
|
|
2914
|
+
header: *const vsr.Header.Request,
|
|
2915
|
+
body: []const u8,
|
|
2916
|
+
) void {
|
|
2917
|
+
assert(replica < t.context.cluster.replicas.len);
|
|
2918
|
+
assert(body.len <= constants.message_body_size_max);
|
|
2919
|
+
|
|
2920
|
+
const message = t.message_pool.get_message(.request);
|
|
2921
|
+
defer t.message_pool.unref(message);
|
|
2922
|
+
|
|
2923
|
+
message.header.* = header.*;
|
|
2924
|
+
stdx.copy_disjoint(.inexact, u8, message.buffer[@sizeOf(vsr.Header)..], body);
|
|
2925
|
+
|
|
2926
|
+
t.message_bus.send_message_to_replica(replica, message.base());
|
|
2927
|
+
}
|
|
2928
|
+
};
|