tigerbeetle 0.0.36 → 0.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/tb_client/extconf.rb +13 -13
- data/ext/tb_client/tigerbeetle/LICENSE +177 -0
- data/ext/tb_client/tigerbeetle/build.zig +2327 -0
- data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
- data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
- data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1092 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
- data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
- data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
- data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
- data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
- data/ext/tb_client/tigerbeetle/src/direction.zig +120 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
- data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
- data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
- data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
- data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
- data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
- data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
- data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
- data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
- data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
- data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
- data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
- data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
- data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
- data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +359 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +962 -0
- data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
- data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
- data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +534 -0
- data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
- data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
- data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
- data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
- data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
- data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
- data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
- data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
- data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
- data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
- data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
- data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
- data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
- data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
- data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
- data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
- data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
- data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
- data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
- data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
- data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
- data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
- data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
- data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
- data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
- data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
- data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
- data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
- data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
- data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
- data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
- data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
- data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
- data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
- data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
- data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
- data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
- data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
- data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
- data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
- data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
- data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
- data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
- data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
- data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
- data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
- data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
- data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
- data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
- data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
- data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
- data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
- data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
- data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
- data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
- data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
- data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2928 -0
- data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
- data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
- data/lib/tb_client/shared_lib.rb +12 -5
- data/lib/tigerbeetle/platforms.rb +9 -0
- data/lib/tigerbeetle/version.rb +2 -2
- data/tigerbeetle.gemspec +22 -5
- metadata +242 -3
- data/ext/tb_client/pkg.tar.gz +0 -0
|
@@ -0,0 +1,1198 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
const maybe = stdx.maybe;
|
|
4
|
+
const mem = std.mem;
|
|
5
|
+
const log = std.log.scoped(.cluster);
|
|
6
|
+
|
|
7
|
+
const stdx = @import("stdx");
|
|
8
|
+
const Ratio = stdx.PRNG.Ratio;
|
|
9
|
+
|
|
10
|
+
const constants = @import("../constants.zig");
|
|
11
|
+
const message_pool = @import("../message_pool.zig");
|
|
12
|
+
const ratio = stdx.PRNG.ratio;
|
|
13
|
+
const MessagePool = message_pool.MessagePool;
|
|
14
|
+
const Message = MessagePool.Message;
|
|
15
|
+
const IO = @import("io.zig").IO;
|
|
16
|
+
|
|
17
|
+
const AOF = @import("../aof.zig").AOFType(IO);
|
|
18
|
+
const TimeSim = @import("time.zig").TimeSim;
|
|
19
|
+
const Multiversion = vsr.multiversion.Multiversion;
|
|
20
|
+
const IdPermutation = @import("id.zig").IdPermutation;
|
|
21
|
+
|
|
22
|
+
const StateCheckerType = @import("cluster/state_checker.zig").StateCheckerType;
|
|
23
|
+
const StorageChecker = @import("cluster/storage_checker.zig").StorageChecker;
|
|
24
|
+
const GridChecker = @import("cluster/grid_checker.zig").GridChecker;
|
|
25
|
+
const ManifestCheckerType = @import("cluster/manifest_checker.zig").ManifestCheckerType;
|
|
26
|
+
const JournalCheckerType = @import("cluster/journal_checker.zig").JournalCheckerType;
|
|
27
|
+
|
|
28
|
+
const vsr = @import("../vsr.zig");
|
|
29
|
+
const format_writes_max = @import("../vsr/replica_format.zig").writes_max;
|
|
30
|
+
|
|
31
|
+
const MiB = stdx.MiB;
|
|
32
|
+
|
|
33
|
+
pub const ReplicaHealth = union(enum) {
|
|
34
|
+
up: struct { paused: bool },
|
|
35
|
+
down,
|
|
36
|
+
reformatting,
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
pub const Release = struct {
|
|
40
|
+
release: vsr.Release,
|
|
41
|
+
release_client_min: vsr.Release,
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
/// Integer values represent exit codes.
|
|
45
|
+
// TODO This doesn't really belong in Cluster, but it is needed here so that StateChecker failures
|
|
46
|
+
// use the particular exit code.
|
|
47
|
+
pub const Failure = enum(u8) {
|
|
48
|
+
/// Any assertion crash will be given an exit code of 127 by default.
|
|
49
|
+
crash = 127,
|
|
50
|
+
liveness = 128,
|
|
51
|
+
correctness = 129,
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/// Shift the id-generating index because the simulator network expects client ids to never collide
|
|
55
|
+
/// with a replica index.
|
|
56
|
+
const client_id_permutation_shift = constants.members_max;
|
|
57
|
+
|
|
58
|
+
pub fn ClusterType(comptime StateMachineType: anytype) type {
|
|
59
|
+
return struct {
|
|
60
|
+
const Cluster = @This();
|
|
61
|
+
|
|
62
|
+
pub const Network = @import("cluster/network.zig").Network;
|
|
63
|
+
pub const NetworkOptions = @import("cluster/network.zig").NetworkOptions;
|
|
64
|
+
pub const Storage = @import("storage.zig").Storage;
|
|
65
|
+
pub const StorageFaultAtlas = @import("storage.zig").ClusterFaultAtlas;
|
|
66
|
+
pub const Tracer = Storage.Tracer;
|
|
67
|
+
pub const SuperBlock = vsr.SuperBlockType(Storage);
|
|
68
|
+
pub const MessageBus = @import("cluster/message_bus.zig").MessageBus;
|
|
69
|
+
pub const StateMachine = StateMachineType(Storage);
|
|
70
|
+
pub const Replica = vsr.ReplicaType(StateMachine, MessageBus, Storage, AOF);
|
|
71
|
+
pub const ReplicaReformat =
|
|
72
|
+
vsr.ReplicaReformatType(StateMachine, MessageBus, Storage);
|
|
73
|
+
pub const Client = vsr.ClientType(StateMachine.Operation, MessageBus);
|
|
74
|
+
pub const StateChecker = StateCheckerType(Client, Replica);
|
|
75
|
+
pub const ManifestChecker = ManifestCheckerType(StateMachine.Forest);
|
|
76
|
+
pub const JournalChecker = JournalCheckerType(Replica);
|
|
77
|
+
|
|
78
|
+
pub const Options = struct {
|
|
79
|
+
cluster_id: u128,
|
|
80
|
+
replica_count: u8,
|
|
81
|
+
standby_count: u8,
|
|
82
|
+
client_count: u8,
|
|
83
|
+
storage_size_limit: u64,
|
|
84
|
+
reformats_max: u8,
|
|
85
|
+
seed: u64,
|
|
86
|
+
/// A monotonically-increasing list of releases.
|
|
87
|
+
/// Initially:
|
|
88
|
+
/// - All replicas are formatted and started with releases[0].
|
|
89
|
+
/// - Only releases[0] is "bundled" in each replica. (Use `replica_restart()` to add
|
|
90
|
+
/// more).
|
|
91
|
+
releases: []const Release,
|
|
92
|
+
client_release: vsr.Release,
|
|
93
|
+
state_machine: StateMachine.Options,
|
|
94
|
+
replicate_options: Replica.ReplicateOptions = .{},
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
pub const Callbacks = struct {
|
|
98
|
+
/// Invoked when a replica produces a reply.
|
|
99
|
+
/// Includes operation=register messages.
|
|
100
|
+
/// `client` is null when the prepare does not originate from a client.
|
|
101
|
+
on_cluster_reply: ?*const fn (
|
|
102
|
+
cluster: *Cluster,
|
|
103
|
+
client: ?usize,
|
|
104
|
+
prepare: *const Message.Prepare,
|
|
105
|
+
reply: *const Message.Reply,
|
|
106
|
+
) void = null,
|
|
107
|
+
|
|
108
|
+
/// Invoked when a client receives a reply.
|
|
109
|
+
/// Includes operation=register messages.
|
|
110
|
+
on_client_reply: ?*const fn (
|
|
111
|
+
cluster: *Cluster,
|
|
112
|
+
client: usize,
|
|
113
|
+
request: *const Message.Request,
|
|
114
|
+
reply: *const Message.Reply,
|
|
115
|
+
) void = null,
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
allocator: mem.Allocator,
|
|
119
|
+
prng: stdx.PRNG,
|
|
120
|
+
options: Options,
|
|
121
|
+
callbacks: Callbacks,
|
|
122
|
+
|
|
123
|
+
network: *Network,
|
|
124
|
+
storages: []Storage,
|
|
125
|
+
storage_fault_atlas: *StorageFaultAtlas,
|
|
126
|
+
|
|
127
|
+
aofs: []AOF,
|
|
128
|
+
aof_ios: []IO,
|
|
129
|
+
aof_io_files: [][1]IO.File,
|
|
130
|
+
|
|
131
|
+
/// NB: includes both active replicas and standbys.
|
|
132
|
+
replicas: []Replica,
|
|
133
|
+
replica_pools: []MessagePool,
|
|
134
|
+
replica_times: []TimeSim,
|
|
135
|
+
replica_tracers: []Tracer,
|
|
136
|
+
replica_health: []ReplicaHealth,
|
|
137
|
+
replica_upgrades: []?vsr.Release,
|
|
138
|
+
replica_reformats: []?ReplicaReformat,
|
|
139
|
+
replica_releases_bundled: []vsr.ReleaseList,
|
|
140
|
+
replica_pipeline_requests_limit: u32,
|
|
141
|
+
replica_count: u8,
|
|
142
|
+
standby_count: u8,
|
|
143
|
+
reformat_count: u32 = 0,
|
|
144
|
+
|
|
145
|
+
clients: []?Client,
|
|
146
|
+
client_pools: []MessagePool,
|
|
147
|
+
client_times: []TimeSim,
|
|
148
|
+
/// Updated when the *client* is informed of the eviction.
|
|
149
|
+
/// (Which may be some time after the client is actually evicted by the cluster.)
|
|
150
|
+
client_eviction_reasons: []?vsr.Header.Eviction.Reason,
|
|
151
|
+
client_eviction_requests_cancelled: u32 = 0,
|
|
152
|
+
|
|
153
|
+
client_id_permutation: IdPermutation,
|
|
154
|
+
|
|
155
|
+
state_checker: StateChecker,
|
|
156
|
+
storage_checker: StorageChecker,
|
|
157
|
+
grid_checker: *GridChecker,
|
|
158
|
+
manifest_checker: ManifestChecker,
|
|
159
|
+
|
|
160
|
+
context: ?*anyopaque = null,
|
|
161
|
+
|
|
162
|
+
pub fn init(
|
|
163
|
+
allocator: mem.Allocator,
|
|
164
|
+
options: struct {
|
|
165
|
+
cluster: Options,
|
|
166
|
+
network: NetworkOptions,
|
|
167
|
+
storage: Storage.Options,
|
|
168
|
+
storage_fault_atlas: StorageFaultAtlas.Options,
|
|
169
|
+
callbacks: Callbacks,
|
|
170
|
+
},
|
|
171
|
+
) !*Cluster {
|
|
172
|
+
assert(options.cluster.replica_count >= 1);
|
|
173
|
+
assert(options.cluster.replica_count <= 6);
|
|
174
|
+
assert(options.cluster.client_count > 0);
|
|
175
|
+
assert(options.cluster.storage_size_limit % constants.sector_size == 0);
|
|
176
|
+
assert(options.cluster.storage_size_limit <= constants.storage_size_limit_max);
|
|
177
|
+
assert(options.cluster.releases.len > 0);
|
|
178
|
+
assert(options.storage.replica_index == null);
|
|
179
|
+
assert(options.storage.fault_atlas == null);
|
|
180
|
+
|
|
181
|
+
for (
|
|
182
|
+
options.cluster.releases[0 .. options.cluster.releases.len - 1],
|
|
183
|
+
options.cluster.releases[1..],
|
|
184
|
+
) |release_a, release_b| {
|
|
185
|
+
assert(release_a.release.value < release_b.release.value);
|
|
186
|
+
assert(release_a.release_client_min.value <= release_b.release.value);
|
|
187
|
+
assert(release_a.release_client_min.value <= release_b.release_client_min.value);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const client_count_total = options.cluster.client_count + options.cluster.reformats_max;
|
|
191
|
+
const node_count = options.cluster.replica_count + options.cluster.standby_count;
|
|
192
|
+
|
|
193
|
+
var prng = stdx.PRNG.from_seed(options.cluster.seed);
|
|
194
|
+
|
|
195
|
+
// TODO(Zig) Client.init()'s MessagePool.Options require a reference to the network.
|
|
196
|
+
// Use @returnAddress() instead.
|
|
197
|
+
var network = try allocator.create(Network);
|
|
198
|
+
errdefer allocator.destroy(network);
|
|
199
|
+
|
|
200
|
+
var network_options = options.network;
|
|
201
|
+
network_options.client_count += options.cluster.reformats_max;
|
|
202
|
+
network.* = try Network.init(allocator, network_options);
|
|
203
|
+
errdefer network.deinit();
|
|
204
|
+
|
|
205
|
+
const storage_fault_atlas = try allocator.create(StorageFaultAtlas);
|
|
206
|
+
errdefer allocator.destroy(storage_fault_atlas);
|
|
207
|
+
|
|
208
|
+
storage_fault_atlas.* = try StorageFaultAtlas.init(
|
|
209
|
+
allocator,
|
|
210
|
+
options.cluster.replica_count,
|
|
211
|
+
&prng,
|
|
212
|
+
options.storage_fault_atlas,
|
|
213
|
+
);
|
|
214
|
+
errdefer storage_fault_atlas.deinit(allocator);
|
|
215
|
+
|
|
216
|
+
var grid_checker = try allocator.create(GridChecker);
|
|
217
|
+
errdefer allocator.destroy(grid_checker);
|
|
218
|
+
|
|
219
|
+
grid_checker.* = GridChecker.init(allocator);
|
|
220
|
+
errdefer grid_checker.deinit();
|
|
221
|
+
|
|
222
|
+
const storages = try allocator.alloc(Storage, node_count);
|
|
223
|
+
errdefer allocator.free(storages);
|
|
224
|
+
|
|
225
|
+
for (storages, 0..) |*storage, replica_index| {
|
|
226
|
+
errdefer for (storages[0..replica_index]) |*s| s.deinit(allocator);
|
|
227
|
+
var storage_options = options.storage;
|
|
228
|
+
storage_options.replica_index = @intCast(replica_index);
|
|
229
|
+
storage_options.fault_atlas = storage_fault_atlas;
|
|
230
|
+
storage_options.grid_checker = grid_checker;
|
|
231
|
+
storage_options.iops_write_max = @max(format_writes_max, constants.iops_write_max);
|
|
232
|
+
storage.* = try Storage.init(allocator, storage_options);
|
|
233
|
+
// Disable most faults at startup,
|
|
234
|
+
// so that the replicas don't get stuck recovering_head.
|
|
235
|
+
storage.faulty =
|
|
236
|
+
replica_index >= vsr.quorums(options.cluster.replica_count).view_change;
|
|
237
|
+
}
|
|
238
|
+
errdefer for (storages) |*storage| storage.deinit(allocator);
|
|
239
|
+
|
|
240
|
+
var replica_pools = try allocator.alloc(MessagePool, node_count);
|
|
241
|
+
errdefer allocator.free(replica_pools);
|
|
242
|
+
|
|
243
|
+
// There may be more clients than `clients_max` (to test session eviction).
|
|
244
|
+
// +1 is for pulse which uses client_id = 0.
|
|
245
|
+
const pipeline_requests_limit =
|
|
246
|
+
(@min(options.cluster.client_count, constants.clients_max) + @as(u8, 1)) -|
|
|
247
|
+
constants.pipeline_prepare_queue_max;
|
|
248
|
+
|
|
249
|
+
for (replica_pools, 0..) |*pool, i| {
|
|
250
|
+
errdefer for (replica_pools[0..i]) |*p| p.deinit(allocator);
|
|
251
|
+
pool.* = try MessagePool.init(allocator, .{ .replica = .{
|
|
252
|
+
.members_count = options.cluster.replica_count + options.cluster.standby_count,
|
|
253
|
+
.pipeline_requests_limit = pipeline_requests_limit,
|
|
254
|
+
.message_bus = .testing,
|
|
255
|
+
} });
|
|
256
|
+
}
|
|
257
|
+
errdefer for (replica_pools) |*pool| pool.deinit(allocator);
|
|
258
|
+
|
|
259
|
+
const replica_times = try allocator.alloc(TimeSim, node_count);
|
|
260
|
+
errdefer allocator.free(replica_times);
|
|
261
|
+
@memset(replica_times, .{
|
|
262
|
+
.resolution = constants.tick_ms * std.time.ns_per_ms,
|
|
263
|
+
.offset_type = .linear,
|
|
264
|
+
.offset_coefficient_A = 0,
|
|
265
|
+
.offset_coefficient_B = 0,
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
const replica_tracers = try allocator.alloc(Tracer, node_count);
|
|
269
|
+
errdefer allocator.free(replica_tracers);
|
|
270
|
+
|
|
271
|
+
for (replica_tracers, 0..) |*tracer, replica_index| {
|
|
272
|
+
errdefer for (replica_tracers[0..replica_index]) |*t| t.deinit(allocator);
|
|
273
|
+
const time = replica_times[replica_index].time();
|
|
274
|
+
tracer.* = try Tracer.init(allocator, time, .{ .replica = .{
|
|
275
|
+
.cluster = options.cluster.cluster_id,
|
|
276
|
+
.replica = @intCast(replica_index),
|
|
277
|
+
} }, .{});
|
|
278
|
+
}
|
|
279
|
+
errdefer for (replica_tracers) |*tracer| tracer.deinit(allocator);
|
|
280
|
+
|
|
281
|
+
const replicas = try allocator.alloc(Replica, node_count);
|
|
282
|
+
errdefer allocator.free(replicas);
|
|
283
|
+
|
|
284
|
+
const replica_health = try allocator.alloc(ReplicaHealth, node_count);
|
|
285
|
+
errdefer allocator.free(replica_health);
|
|
286
|
+
@memset(replica_health, .{ .up = .{ .paused = false } });
|
|
287
|
+
|
|
288
|
+
const replica_upgrades = try allocator.alloc(?vsr.Release, node_count);
|
|
289
|
+
errdefer allocator.free(replica_upgrades);
|
|
290
|
+
@memset(replica_upgrades, null);
|
|
291
|
+
|
|
292
|
+
const replica_reformats =
|
|
293
|
+
try allocator.alloc(?ReplicaReformat, options.cluster.replica_count);
|
|
294
|
+
errdefer allocator.free(replica_reformats);
|
|
295
|
+
@memset(replica_reformats, null);
|
|
296
|
+
|
|
297
|
+
var client_pools = try allocator.alloc(MessagePool, client_count_total);
|
|
298
|
+
errdefer allocator.free(client_pools);
|
|
299
|
+
|
|
300
|
+
for (client_pools, 0..) |*pool, i| {
|
|
301
|
+
errdefer for (client_pools[0..i]) |*p| p.deinit(allocator);
|
|
302
|
+
pool.* = try MessagePool.init(allocator, .client);
|
|
303
|
+
}
|
|
304
|
+
errdefer for (client_pools) |*pool| pool.deinit(allocator);
|
|
305
|
+
|
|
306
|
+
const client_eviction_reasons =
|
|
307
|
+
try allocator.alloc(?vsr.Header.Eviction.Reason, client_count_total);
|
|
308
|
+
errdefer allocator.free(client_eviction_reasons);
|
|
309
|
+
@memset(client_eviction_reasons, null);
|
|
310
|
+
|
|
311
|
+
const client_times = try allocator.alloc(TimeSim, client_count_total);
|
|
312
|
+
errdefer allocator.free(client_times);
|
|
313
|
+
@memset(client_times, .{
|
|
314
|
+
.resolution = constants.tick_ms * std.time.ns_per_ms,
|
|
315
|
+
.offset_type = .linear,
|
|
316
|
+
.offset_coefficient_A = 0,
|
|
317
|
+
.offset_coefficient_B = 0,
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
const client_id_permutation = IdPermutation.generate(&prng);
|
|
321
|
+
var clients = try allocator.alloc(?Client, client_count_total);
|
|
322
|
+
errdefer allocator.free(clients);
|
|
323
|
+
|
|
324
|
+
for (clients, 0..) |*client, i| {
|
|
325
|
+
errdefer for (clients[0..i]) |*c| c.*.?.deinit(allocator);
|
|
326
|
+
client.* = try Client.init(
|
|
327
|
+
allocator,
|
|
328
|
+
client_times[i].time(),
|
|
329
|
+
&client_pools[i],
|
|
330
|
+
.{
|
|
331
|
+
.id = client_id_permutation.encode(i + client_id_permutation_shift),
|
|
332
|
+
.cluster = options.cluster.cluster_id,
|
|
333
|
+
.replica_count = options.cluster.replica_count,
|
|
334
|
+
.aof_recovery = false,
|
|
335
|
+
.message_bus_options = .{ .network = network },
|
|
336
|
+
.eviction_callback = client_on_eviction,
|
|
337
|
+
},
|
|
338
|
+
);
|
|
339
|
+
client.*.?.release = options.cluster.client_release;
|
|
340
|
+
}
|
|
341
|
+
errdefer for (clients) |*client| client.*.?.deinit(allocator);
|
|
342
|
+
|
|
343
|
+
var state_checker = try StateChecker.init(allocator, .{
|
|
344
|
+
.cluster_id = options.cluster.cluster_id,
|
|
345
|
+
.replicas = replicas,
|
|
346
|
+
.replica_count = options.cluster.replica_count,
|
|
347
|
+
.clients = clients,
|
|
348
|
+
});
|
|
349
|
+
errdefer state_checker.deinit();
|
|
350
|
+
|
|
351
|
+
var storage_checker = try StorageChecker.init(allocator);
|
|
352
|
+
errdefer storage_checker.deinit(allocator);
|
|
353
|
+
|
|
354
|
+
var manifest_checker = ManifestChecker.init(allocator);
|
|
355
|
+
errdefer manifest_checker.deinit();
|
|
356
|
+
|
|
357
|
+
// Format each replica's storage (equivalent to "tigerbeetle format ...").
|
|
358
|
+
for (storages, 0..) |*storage, replica_index| {
|
|
359
|
+
try vsr.format(
|
|
360
|
+
Storage,
|
|
361
|
+
allocator,
|
|
362
|
+
storage,
|
|
363
|
+
.{
|
|
364
|
+
.cluster = options.cluster.cluster_id,
|
|
365
|
+
.release = options.cluster.releases[0].release,
|
|
366
|
+
.replica = @intCast(replica_index),
|
|
367
|
+
.replica_count = options.cluster.replica_count,
|
|
368
|
+
.view = null,
|
|
369
|
+
},
|
|
370
|
+
);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
const replica_releases_bundled = try allocator.alloc(vsr.ReleaseList, node_count);
|
|
374
|
+
errdefer allocator.free(replica_releases_bundled);
|
|
375
|
+
|
|
376
|
+
// We must heap-allocate the cluster since its pointer will be attached to the replica.
|
|
377
|
+
// TODO(Zig) @returnAddress().
|
|
378
|
+
var cluster = try allocator.create(Cluster);
|
|
379
|
+
errdefer allocator.destroy(cluster);
|
|
380
|
+
|
|
381
|
+
cluster.aofs = try allocator.alloc(AOF, node_count);
|
|
382
|
+
errdefer allocator.free(cluster.aofs);
|
|
383
|
+
|
|
384
|
+
cluster.aof_io_files = try allocator.alloc([1]IO.File, node_count);
|
|
385
|
+
errdefer allocator.free(cluster.aof_io_files);
|
|
386
|
+
|
|
387
|
+
cluster.aof_ios = try allocator.alloc(IO, node_count);
|
|
388
|
+
errdefer allocator.free(cluster.aof_ios);
|
|
389
|
+
|
|
390
|
+
for (
|
|
391
|
+
cluster.aofs,
|
|
392
|
+
cluster.aof_ios,
|
|
393
|
+
cluster.aof_io_files,
|
|
394
|
+
0..,
|
|
395
|
+
) |*aof, *aof_io, *aof_io_file, i| {
|
|
396
|
+
const buffer = try allocator.alignedAlloc(
|
|
397
|
+
u8,
|
|
398
|
+
constants.sector_size,
|
|
399
|
+
// Arbitrary value.
|
|
400
|
+
32 * MiB,
|
|
401
|
+
);
|
|
402
|
+
errdefer allocator.free(buffer);
|
|
403
|
+
|
|
404
|
+
aof_io_file[0] = .{ .buffer = buffer };
|
|
405
|
+
aof_io.* = try IO.init(aof_io_file, .{
|
|
406
|
+
.seed = options.cluster.seed,
|
|
407
|
+
.larger_than_logical_sector_read_fault_probability = Ratio.zero(),
|
|
408
|
+
});
|
|
409
|
+
errdefer for (cluster.aof_ios[0..i]) |*io| io.deinit();
|
|
410
|
+
|
|
411
|
+
aof.* = AOF{
|
|
412
|
+
.io = aof_io,
|
|
413
|
+
.path = "test.aof",
|
|
414
|
+
.fd = 0,
|
|
415
|
+
};
|
|
416
|
+
errdefer for (cluster.aofs[0..i]) |*aof_| aof_.deinit(allocator);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
cluster.* = Cluster{
|
|
420
|
+
.allocator = allocator,
|
|
421
|
+
.prng = prng,
|
|
422
|
+
.options = options.cluster,
|
|
423
|
+
.callbacks = options.callbacks,
|
|
424
|
+
.network = network,
|
|
425
|
+
.storages = storages,
|
|
426
|
+
.aofs = cluster.aofs,
|
|
427
|
+
.aof_ios = cluster.aof_ios,
|
|
428
|
+
.aof_io_files = cluster.aof_io_files,
|
|
429
|
+
.storage_fault_atlas = storage_fault_atlas,
|
|
430
|
+
.replicas = replicas,
|
|
431
|
+
.replica_pools = replica_pools,
|
|
432
|
+
.replica_times = replica_times,
|
|
433
|
+
.replica_tracers = replica_tracers,
|
|
434
|
+
.replica_health = replica_health,
|
|
435
|
+
.replica_upgrades = replica_upgrades,
|
|
436
|
+
.replica_reformats = replica_reformats,
|
|
437
|
+
.replica_pipeline_requests_limit = pipeline_requests_limit,
|
|
438
|
+
.replica_releases_bundled = replica_releases_bundled,
|
|
439
|
+
.replica_count = options.cluster.replica_count,
|
|
440
|
+
.standby_count = options.cluster.standby_count,
|
|
441
|
+
.clients = clients,
|
|
442
|
+
.client_pools = client_pools,
|
|
443
|
+
.client_times = client_times,
|
|
444
|
+
.client_eviction_reasons = client_eviction_reasons,
|
|
445
|
+
.client_id_permutation = client_id_permutation,
|
|
446
|
+
.state_checker = state_checker,
|
|
447
|
+
.storage_checker = storage_checker,
|
|
448
|
+
.grid_checker = grid_checker,
|
|
449
|
+
.manifest_checker = manifest_checker,
|
|
450
|
+
};
|
|
451
|
+
|
|
452
|
+
for (cluster.replicas, 0..) |_, replica_index| {
|
|
453
|
+
errdefer for (replicas[0..replica_index]) |*r| r.deinit(allocator);
|
|
454
|
+
|
|
455
|
+
cluster.replica_releases_bundled[replica_index] = .empty;
|
|
456
|
+
cluster.replica_releases_bundled[replica_index].push(
|
|
457
|
+
options.cluster.releases[0].release,
|
|
458
|
+
);
|
|
459
|
+
|
|
460
|
+
// Nonces are incremented on restart, so spread them out across 128 bit space
|
|
461
|
+
// to avoid collisions.
|
|
462
|
+
const nonce = (@as(u128, replica_index) << 64) + 1;
|
|
463
|
+
try cluster.replica_open(@intCast(replica_index), .{
|
|
464
|
+
.nonce = nonce,
|
|
465
|
+
.release = options.cluster.releases[0].release,
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
errdefer for (cluster.replicas) |*replica| replica.deinit(allocator);
|
|
469
|
+
|
|
470
|
+
for (clients) |*client| {
|
|
471
|
+
client.*.?.on_reply_context = cluster;
|
|
472
|
+
client.*.?.on_reply_callback = client_on_reply;
|
|
473
|
+
network.link(client.*.?.message_bus.process, &client.*.?.message_bus);
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
return cluster;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
pub fn deinit(cluster: *Cluster) void {
|
|
480
|
+
cluster.manifest_checker.deinit();
|
|
481
|
+
cluster.storage_checker.deinit(cluster.allocator);
|
|
482
|
+
cluster.state_checker.deinit();
|
|
483
|
+
cluster.network.deinit();
|
|
484
|
+
|
|
485
|
+
for (cluster.clients) |*client_maybe| {
|
|
486
|
+
if (client_maybe.*) |*client| {
|
|
487
|
+
client.deinit(cluster.allocator);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
for (cluster.client_pools) |*pool| pool.deinit(cluster.allocator);
|
|
492
|
+
for (cluster.replicas, 0..) |*replica, i| {
|
|
493
|
+
switch (cluster.replica_health[i]) {
|
|
494
|
+
.up => replica.deinit(cluster.allocator),
|
|
495
|
+
.down => {},
|
|
496
|
+
.reformatting => cluster.replica_reformats[i].?.deinit(cluster.allocator),
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
for (cluster.replica_tracers) |*tracer| tracer.deinit(cluster.allocator);
|
|
500
|
+
for (cluster.replica_pools) |*pool| pool.deinit(cluster.allocator);
|
|
501
|
+
for (cluster.storages) |*storage| storage.deinit(cluster.allocator);
|
|
502
|
+
|
|
503
|
+
for (cluster.aofs) |*aof| aof.close();
|
|
504
|
+
|
|
505
|
+
for (cluster.aof_ios) |*io| io.deinit();
|
|
506
|
+
cluster.allocator.free(cluster.aof_ios);
|
|
507
|
+
|
|
508
|
+
for (cluster.aof_io_files) |*io_file| {
|
|
509
|
+
for (io_file) |file| cluster.allocator.free(file.buffer);
|
|
510
|
+
}
|
|
511
|
+
cluster.allocator.free(cluster.aof_io_files);
|
|
512
|
+
|
|
513
|
+
cluster.storage_fault_atlas.deinit(cluster.allocator);
|
|
514
|
+
cluster.grid_checker.deinit(); // (Storage references this.)
|
|
515
|
+
|
|
516
|
+
cluster.allocator.free(cluster.clients);
|
|
517
|
+
cluster.allocator.free(cluster.client_times);
|
|
518
|
+
cluster.allocator.free(cluster.client_eviction_reasons);
|
|
519
|
+
cluster.allocator.free(cluster.client_pools);
|
|
520
|
+
cluster.allocator.free(cluster.replicas);
|
|
521
|
+
cluster.allocator.free(cluster.replica_reformats);
|
|
522
|
+
cluster.allocator.free(cluster.replica_upgrades);
|
|
523
|
+
cluster.allocator.free(cluster.replica_health);
|
|
524
|
+
cluster.allocator.free(cluster.replica_times);
|
|
525
|
+
cluster.allocator.free(cluster.replica_tracers);
|
|
526
|
+
cluster.allocator.free(cluster.replica_pools);
|
|
527
|
+
cluster.allocator.free(cluster.storages);
|
|
528
|
+
cluster.allocator.free(cluster.aofs);
|
|
529
|
+
cluster.allocator.free(cluster.replica_releases_bundled);
|
|
530
|
+
cluster.allocator.destroy(cluster.grid_checker);
|
|
531
|
+
cluster.allocator.destroy(cluster.storage_fault_atlas);
|
|
532
|
+
cluster.allocator.destroy(cluster.network);
|
|
533
|
+
cluster.allocator.destroy(cluster);
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
pub fn tick(cluster: *Cluster) void {
|
|
537
|
+
// Interleave storage and network steps, to allow for faster-than-a-tick IO.
|
|
538
|
+
while (true) {
|
|
539
|
+
var advanced = false;
|
|
540
|
+
advanced = cluster.network.step() or advanced;
|
|
541
|
+
|
|
542
|
+
for (cluster.clients, cluster.client_eviction_reasons) |*client, eviction_reason| {
|
|
543
|
+
if (client.* != null and eviction_reason != null) {
|
|
544
|
+
client.*.?.deinit(cluster.allocator);
|
|
545
|
+
client.* = null;
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
for (
|
|
550
|
+
cluster.storages,
|
|
551
|
+
cluster.replica_health,
|
|
552
|
+
cluster.replica_upgrades,
|
|
553
|
+
0..,
|
|
554
|
+
) |*storage, *health, *upgrade, i| {
|
|
555
|
+
if (health.* == .up and health.*.up.paused) continue;
|
|
556
|
+
// Upgrades immediately follow storage.step(), since upgrades occur at
|
|
557
|
+
// checkpoint completion. (Downgrades are triggered separately – see
|
|
558
|
+
// replica_restart()).
|
|
559
|
+
advanced = storage.step() or advanced;
|
|
560
|
+
if (upgrade.*) |_| cluster.replica_release_execute(@intCast(i));
|
|
561
|
+
assert(upgrade.* == null);
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
if (!advanced) break;
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
cluster.network.tick();
|
|
568
|
+
|
|
569
|
+
for (cluster.clients) |*client_maybe| {
|
|
570
|
+
if (client_maybe.*) |*client| client.tick();
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
for (
|
|
574
|
+
cluster.storages,
|
|
575
|
+
cluster.replicas,
|
|
576
|
+
cluster.aof_ios,
|
|
577
|
+
cluster.replica_times,
|
|
578
|
+
cluster.replica_health,
|
|
579
|
+
0..,
|
|
580
|
+
) |*storage, *replica, *aof_io, *time_sim, *health, replica_index| {
|
|
581
|
+
const time = time_sim.time();
|
|
582
|
+
|
|
583
|
+
if (health.* == .up and health.*.up.paused) {
|
|
584
|
+
// Tick the time even in a paused state, to simulate VM migration.
|
|
585
|
+
time.tick();
|
|
586
|
+
} else {
|
|
587
|
+
storage.tick();
|
|
588
|
+
switch (health.*) {
|
|
589
|
+
.reformatting => {
|
|
590
|
+
cluster.tick_reformat(@intCast(replica_index));
|
|
591
|
+
time.tick();
|
|
592
|
+
},
|
|
593
|
+
.up => |up| {
|
|
594
|
+
assert(!up.paused);
|
|
595
|
+
|
|
596
|
+
replica.tick();
|
|
597
|
+
aof_io.run() catch |err| {
|
|
598
|
+
std.debug.panic("{}: io.run() failed: error={}", .{
|
|
599
|
+
replica.replica,
|
|
600
|
+
err,
|
|
601
|
+
});
|
|
602
|
+
};
|
|
603
|
+
|
|
604
|
+
// For performance, don't run every tick.
|
|
605
|
+
if (cluster.prng.chance(ratio(1, 100))) {
|
|
606
|
+
JournalChecker.check(replica);
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
cluster.state_checker.check_state(replica.replica) catch |err| {
|
|
610
|
+
fatal(.correctness, "state checker error: {}", .{err});
|
|
611
|
+
};
|
|
612
|
+
},
|
|
613
|
+
.down => {
|
|
614
|
+
// Keep ticking the time so that it won't have diverged too far to
|
|
615
|
+
// synchronize when the replica restarts.
|
|
616
|
+
time.tick();
|
|
617
|
+
},
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
fn tick_reformat(cluster: *Cluster, replica_index: u8) void {
|
|
624
|
+
assert(cluster.replica_health[replica_index] == .reformatting);
|
|
625
|
+
|
|
626
|
+
const reformat = &cluster.replica_reformats[replica_index].?;
|
|
627
|
+
const result = reformat.done() orelse return;
|
|
628
|
+
assert(result == .ok);
|
|
629
|
+
|
|
630
|
+
reformat.deinit(cluster.allocator);
|
|
631
|
+
cluster.replica_reformats[replica_index] = null;
|
|
632
|
+
cluster.replica_health[replica_index] = .down;
|
|
633
|
+
cluster.replica_restart(replica_index) catch unreachable;
|
|
634
|
+
cluster.state_checker.reformat(replica_index);
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
pub fn replica_set_releases(
|
|
638
|
+
cluster: *Cluster,
|
|
639
|
+
replica_index: u8,
|
|
640
|
+
releases: *const vsr.ReleaseList,
|
|
641
|
+
) void {
|
|
642
|
+
cluster.replica_releases_bundled[replica_index] = releases.*;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
pub fn replica_pause(cluster: *Cluster, replica_index: u8) void {
|
|
646
|
+
assert(cluster.replica_health[replica_index] == .up);
|
|
647
|
+
assert(!cluster.replica_health[replica_index].up.paused);
|
|
648
|
+
cluster.replica_health[replica_index].up.paused = true;
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
pub fn replica_unpause(cluster: *Cluster, replica_index: u8) void {
|
|
652
|
+
assert(cluster.replica_health[replica_index] == .up);
|
|
653
|
+
assert(cluster.replica_health[replica_index].up.paused);
|
|
654
|
+
cluster.replica_health[replica_index].up.paused = false;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
/// Returns an error when the replica was unable to recover (open).
|
|
658
|
+
pub fn replica_restart(
|
|
659
|
+
cluster: *Cluster,
|
|
660
|
+
replica_index: u8,
|
|
661
|
+
) !void {
|
|
662
|
+
assert(cluster.replica_health[replica_index] == .down);
|
|
663
|
+
assert(cluster.replica_upgrades[replica_index] == null);
|
|
664
|
+
|
|
665
|
+
defer maybe(cluster.replica_health[replica_index] == .up);
|
|
666
|
+
defer assert(cluster.replica_upgrades[replica_index] == null);
|
|
667
|
+
|
|
668
|
+
try cluster.replica_open(replica_index, .{
|
|
669
|
+
.nonce = cluster.replicas[replica_index].nonce + 1,
|
|
670
|
+
.release = cluster.replica_releases_bundled[replica_index].last(),
|
|
671
|
+
});
|
|
672
|
+
cluster.replica_enable(replica_index);
|
|
673
|
+
|
|
674
|
+
if (cluster.replica_upgrades[replica_index]) |_| {
|
|
675
|
+
// Upgrade the replica promptly, rather than waiting until the next tick().
|
|
676
|
+
// This ensures that the restart completes synchronously, as the caller expects.
|
|
677
|
+
cluster.replica_release_execute(replica_index);
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
/// Reset a replica to its initial state, simulating a random crash/panic.
|
|
682
|
+
/// Leave the persistent storage untouched, and leave any currently
|
|
683
|
+
/// inflight messages to/from the replica in the network.
|
|
684
|
+
pub fn replica_crash(cluster: *Cluster, replica_index: u8) void {
|
|
685
|
+
assert(cluster.replica_health[replica_index] == .up);
|
|
686
|
+
|
|
687
|
+
// Reset the storage before the replica so that pending writes can (partially) finish.
|
|
688
|
+
cluster.storages[replica_index].reset();
|
|
689
|
+
|
|
690
|
+
cluster.replicas[replica_index].deinit(cluster.allocator);
|
|
691
|
+
cluster.network.process_disable(.{ .replica = replica_index });
|
|
692
|
+
cluster.replica_health[replica_index] = .down;
|
|
693
|
+
cluster.log_replica(.crash, replica_index);
|
|
694
|
+
|
|
695
|
+
// Ensure that none of the replica's messages leaked when it was deinitialized.
|
|
696
|
+
const message_bus = cluster.network.get_message_bus(.{ .replica = replica_index });
|
|
697
|
+
assert(message_bus.pool.free_list.count() == message_bus.pool.messages_max);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
fn replica_enable(cluster: *Cluster, replica_index: u8) void {
|
|
701
|
+
assert(cluster.replica_health[replica_index] == .down);
|
|
702
|
+
|
|
703
|
+
cluster.network.process_enable(.{ .replica = replica_index });
|
|
704
|
+
cluster.replica_health[replica_index] = .{ .up = .{ .paused = false } };
|
|
705
|
+
cluster.log_replica(.recover, replica_index);
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
fn replica_open(cluster: *Cluster, replica_index: u8, options: struct {
|
|
709
|
+
nonce: u128,
|
|
710
|
+
release: vsr.Release,
|
|
711
|
+
}) !void {
|
|
712
|
+
const release_client_min = for (cluster.options.releases) |release| {
|
|
713
|
+
if (release.release.value == options.release.value) {
|
|
714
|
+
break release.release_client_min;
|
|
715
|
+
}
|
|
716
|
+
} else unreachable;
|
|
717
|
+
|
|
718
|
+
// Re-initialize the trace to get a clean state.
|
|
719
|
+
cluster.replica_tracers[replica_index].deinit(cluster.allocator);
|
|
720
|
+
cluster.replica_tracers[replica_index] = try Tracer.init(
|
|
721
|
+
cluster.allocator,
|
|
722
|
+
cluster.replica_times[replica_index].time(),
|
|
723
|
+
.{ .replica = .{
|
|
724
|
+
.cluster = cluster.replicas[replica_index].cluster,
|
|
725
|
+
.replica = @intCast(replica_index),
|
|
726
|
+
} },
|
|
727
|
+
.{},
|
|
728
|
+
);
|
|
729
|
+
|
|
730
|
+
cluster.aofs[replica_index].reset();
|
|
731
|
+
cluster.aof_ios[replica_index].reset();
|
|
732
|
+
var replica = &cluster.replicas[replica_index];
|
|
733
|
+
try replica.open(
|
|
734
|
+
cluster.allocator,
|
|
735
|
+
cluster.replica_times[replica_index].time(),
|
|
736
|
+
&cluster.storages[replica_index],
|
|
737
|
+
&cluster.replica_pools[replica_index],
|
|
738
|
+
.{
|
|
739
|
+
.node_count = cluster.options.replica_count + cluster.options.standby_count,
|
|
740
|
+
.pipeline_requests_limit = cluster.replica_pipeline_requests_limit,
|
|
741
|
+
.aof = &cluster.aofs[replica_index],
|
|
742
|
+
.aof_recovery = false,
|
|
743
|
+
// TODO Test restarting with a higher storage limit.
|
|
744
|
+
.storage_size_limit = cluster.options.storage_size_limit,
|
|
745
|
+
.nonce = options.nonce,
|
|
746
|
+
.state_machine_options = cluster.options.state_machine,
|
|
747
|
+
.message_bus_options = .{ .network = cluster.network },
|
|
748
|
+
.release = options.release,
|
|
749
|
+
.release_client_min = release_client_min,
|
|
750
|
+
.multiversion = replica_multiversion(replica),
|
|
751
|
+
.test_context = cluster,
|
|
752
|
+
.tracer = &cluster.replica_tracers[replica_index],
|
|
753
|
+
.replicate_options = cluster.options.replicate_options,
|
|
754
|
+
.commit_stall_probability = null,
|
|
755
|
+
},
|
|
756
|
+
);
|
|
757
|
+
assert(replica.cluster == cluster.options.cluster_id);
|
|
758
|
+
assert(replica.replica == replica_index);
|
|
759
|
+
assert(replica.replica_count == cluster.replica_count);
|
|
760
|
+
assert(replica.standby_count == cluster.standby_count);
|
|
761
|
+
|
|
762
|
+
replica.event_callback = on_replica_event;
|
|
763
|
+
cluster.network.link(replica.message_bus.process, &replica.message_bus);
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
fn replica_multiversion(replica_context: *Replica) Multiversion {
|
|
767
|
+
const vtable = struct {
|
|
768
|
+
fn releases_bundled(context: *anyopaque) vsr.ReleaseList {
|
|
769
|
+
const replica: *Replica = @ptrCast(@alignCast(context));
|
|
770
|
+
const cluster: *Cluster = @ptrCast(@alignCast(replica.test_context.?));
|
|
771
|
+
return cluster.replica_releases_bundled[replica.replica];
|
|
772
|
+
}
|
|
773
|
+
fn release_execute(context: *anyopaque, release_next: vsr.Release) void {
|
|
774
|
+
const replica: *Replica = @ptrCast(@alignCast(context));
|
|
775
|
+
const cluster: *Cluster = @ptrCast(@alignCast(replica.test_context.?));
|
|
776
|
+
cluster.replica_release_execute_soon(replica, release_next);
|
|
777
|
+
}
|
|
778
|
+
fn tick(_: *anyopaque) void {}
|
|
779
|
+
};
|
|
780
|
+
|
|
781
|
+
return .{
|
|
782
|
+
.context = replica_context,
|
|
783
|
+
.vtable = &.{
|
|
784
|
+
.releases_bundled = vtable.releases_bundled,
|
|
785
|
+
.release_execute = vtable.release_execute,
|
|
786
|
+
.tick = vtable.tick,
|
|
787
|
+
},
|
|
788
|
+
};
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
fn replica_release_execute_soon(
|
|
792
|
+
cluster: *Cluster,
|
|
793
|
+
replica: *Replica,
|
|
794
|
+
release: vsr.Release,
|
|
795
|
+
) void {
|
|
796
|
+
assert(replica.release.value != release.value);
|
|
797
|
+
assert(cluster.replica_upgrades[replica.replica] == null);
|
|
798
|
+
|
|
799
|
+
log.debug("{}: release_execute_soon: release={}..{}", .{
|
|
800
|
+
replica.replica,
|
|
801
|
+
replica.release,
|
|
802
|
+
release,
|
|
803
|
+
});
|
|
804
|
+
|
|
805
|
+
if (cluster.replica_health[replica.replica] == .up) {
|
|
806
|
+
// The replica is trying to upgrade to a newer release at runtime.
|
|
807
|
+
assert(replica.journal.status != .init);
|
|
808
|
+
assert(replica.release.value < release.value);
|
|
809
|
+
} else {
|
|
810
|
+
assert(replica.journal.status == .init);
|
|
811
|
+
maybe(replica.release.value < release.value);
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
cluster.storages[replica.replica].reset();
|
|
815
|
+
cluster.replica_upgrades[replica.replica] = release;
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
/// `replica_upgrades` defers upgrades to the next tick (rather than executing it
|
|
819
|
+
/// immediately in replica_release_execute_soon()). Since we don't actually exec() to a new
|
|
820
|
+
/// version, this allows the replica to clean up properly (e.g. release Message's via
|
|
821
|
+
/// `defer`).
|
|
822
|
+
fn replica_release_execute(cluster: *Cluster, replica_index: u8) void {
|
|
823
|
+
const replica = &cluster.replicas[replica_index];
|
|
824
|
+
assert(cluster.replica_health[replica_index] == .up);
|
|
825
|
+
|
|
826
|
+
const release = cluster.replica_upgrades[replica_index].?;
|
|
827
|
+
defer cluster.replica_upgrades[replica_index] = null;
|
|
828
|
+
|
|
829
|
+
log.debug("{}: release_execute: release={}..{}", .{
|
|
830
|
+
replica_index,
|
|
831
|
+
replica.release,
|
|
832
|
+
release,
|
|
833
|
+
});
|
|
834
|
+
|
|
835
|
+
cluster.replica_crash(replica_index);
|
|
836
|
+
|
|
837
|
+
if (replica.multiversion.releases_bundled().contains(release)) {
|
|
838
|
+
// Disable faults while restarting to ensure that the cluster doesn't get stuck due
|
|
839
|
+
// to too many replicas in status=recovering_head.
|
|
840
|
+
const faulty = cluster.storages[replica_index].faulty;
|
|
841
|
+
cluster.storages[replica_index].faulty = false;
|
|
842
|
+
defer cluster.storages[replica_index].faulty = faulty;
|
|
843
|
+
|
|
844
|
+
cluster.replica_open(replica_index, .{
|
|
845
|
+
.nonce = cluster.replicas[replica_index].nonce + 1,
|
|
846
|
+
.release = release,
|
|
847
|
+
}) catch |err| {
|
|
848
|
+
log.err("{}: release_execute failed: error={}", .{ replica_index, err });
|
|
849
|
+
@panic("release_execute failed");
|
|
850
|
+
};
|
|
851
|
+
cluster.replica_enable(replica_index);
|
|
852
|
+
} else {
|
|
853
|
+
// The cluster has upgraded to `release`, but this replica does not have that
|
|
854
|
+
// release available yet.
|
|
855
|
+
log.debug("{}: release_execute: target version not available", .{replica_index});
|
|
856
|
+
assert(cluster.replica_health[replica_index] == .down);
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
pub fn replica_reformat(
|
|
861
|
+
cluster: *Cluster,
|
|
862
|
+
replica_index: u8,
|
|
863
|
+
) !void {
|
|
864
|
+
assert(cluster.reformat_count < cluster.options.reformats_max);
|
|
865
|
+
assert(cluster.replica_health[replica_index] == .down);
|
|
866
|
+
assert(cluster.replica_reformats[replica_index] == null);
|
|
867
|
+
assert(replica_index < cluster.options.replica_count);
|
|
868
|
+
|
|
869
|
+
cluster.replica_health[replica_index] = .reformatting;
|
|
870
|
+
cluster.log_replica(.reformat, replica_index);
|
|
871
|
+
|
|
872
|
+
const storage = &cluster.storages[replica_index];
|
|
873
|
+
const storage_options = storage.options;
|
|
874
|
+
storage.deinit(cluster.allocator);
|
|
875
|
+
storage.* = try Storage.init(cluster.allocator, storage_options);
|
|
876
|
+
|
|
877
|
+
const client_index = cluster.options.client_count + cluster.reformat_count;
|
|
878
|
+
cluster.reformat_count += 1;
|
|
879
|
+
cluster.replica_reformats[replica_index] = try ReplicaReformat.init(
|
|
880
|
+
cluster.allocator,
|
|
881
|
+
&cluster.clients[client_index].?,
|
|
882
|
+
storage,
|
|
883
|
+
.{
|
|
884
|
+
.cluster = cluster.options.cluster_id,
|
|
885
|
+
.release = cluster.options.releases[0].release,
|
|
886
|
+
.replica = @intCast(replica_index),
|
|
887
|
+
.replica_count = cluster.options.replica_count,
|
|
888
|
+
.view = null,
|
|
889
|
+
},
|
|
890
|
+
);
|
|
891
|
+
cluster.replica_reformats[replica_index].?.start();
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
pub fn register(cluster: *Cluster, client_index: usize) void {
|
|
895
|
+
const client = &cluster.clients[client_index].?;
|
|
896
|
+
client.register(register_callback, undefined);
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
/// See request_callback().
|
|
900
|
+
fn register_callback(
|
|
901
|
+
user_data: u128,
|
|
902
|
+
result: *const vsr.RegisterResult,
|
|
903
|
+
) void {
|
|
904
|
+
_ = user_data;
|
|
905
|
+
_ = result;
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
pub fn request(
|
|
909
|
+
cluster: *Cluster,
|
|
910
|
+
client_index: usize,
|
|
911
|
+
request_operation: StateMachine.Operation,
|
|
912
|
+
request_message: *Message,
|
|
913
|
+
request_body_size: usize,
|
|
914
|
+
) void {
|
|
915
|
+
assert(cluster.client_eviction_reasons[client_index] == null);
|
|
916
|
+
|
|
917
|
+
const client = &cluster.clients[client_index].?;
|
|
918
|
+
const message = request_message.build(.request);
|
|
919
|
+
|
|
920
|
+
message.header.* = .{
|
|
921
|
+
.release = client.release,
|
|
922
|
+
.client = client.id,
|
|
923
|
+
.request = 0, // Set by client.raw_request.
|
|
924
|
+
.cluster = client.cluster,
|
|
925
|
+
.command = .request,
|
|
926
|
+
.operation = request_operation.to_vsr(),
|
|
927
|
+
.size = @intCast(@sizeOf(vsr.Header) + request_body_size),
|
|
928
|
+
.previous_request_latency = cluster.prng.int(u32),
|
|
929
|
+
};
|
|
930
|
+
|
|
931
|
+
client.raw_request(
|
|
932
|
+
request_callback,
|
|
933
|
+
undefined,
|
|
934
|
+
message,
|
|
935
|
+
);
|
|
936
|
+
assert(message.header.request != 0);
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
/// The `request_callback` is not used — Cluster uses `Client.on_reply_{context,callback}`
|
|
940
|
+
/// instead because:
|
|
941
|
+
/// - Cluster needs access to the request
|
|
942
|
+
/// - Cluster needs access to the reply message (not just the body)
|
|
943
|
+
///
|
|
944
|
+
/// See `on_reply`.
|
|
945
|
+
fn request_callback(
|
|
946
|
+
user_data: u128,
|
|
947
|
+
operation: vsr.Operation,
|
|
948
|
+
timestamp: u64,
|
|
949
|
+
result: []u8,
|
|
950
|
+
) void {
|
|
951
|
+
_ = user_data;
|
|
952
|
+
_ = operation;
|
|
953
|
+
_ = timestamp;
|
|
954
|
+
_ = result;
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
fn client_on_reply(
|
|
958
|
+
client: *Client,
|
|
959
|
+
request_message: *Message.Request,
|
|
960
|
+
reply_message: *Message.Reply,
|
|
961
|
+
) void {
|
|
962
|
+
const cluster: *Cluster = @ptrCast(@alignCast(client.on_reply_context.?));
|
|
963
|
+
assert(reply_message.header.invalid() == null);
|
|
964
|
+
assert(reply_message.header.cluster == cluster.options.cluster_id);
|
|
965
|
+
assert(reply_message.header.client == client.id);
|
|
966
|
+
assert(reply_message.header.request == request_message.header.request);
|
|
967
|
+
assert(reply_message.header.command == .reply);
|
|
968
|
+
assert(reply_message.header.operation == request_message.header.operation);
|
|
969
|
+
|
|
970
|
+
const client_index =
|
|
971
|
+
cluster.client_id_permutation.decode(client.id) - client_id_permutation_shift;
|
|
972
|
+
assert(&cluster.clients[client_index].? == client);
|
|
973
|
+
assert(cluster.client_eviction_reasons[client_index] == null);
|
|
974
|
+
|
|
975
|
+
if (cluster.callbacks.on_client_reply) |on_client_reply| {
|
|
976
|
+
on_client_reply(cluster, client_index, request_message, reply_message);
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
fn cluster_on_eviction(cluster: *Cluster, client_id: u128) void {
|
|
981
|
+
cluster.state_checker.on_client_eviction(client_id);
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
fn client_on_eviction(client: *Client, eviction: *const Message.Eviction) void {
|
|
985
|
+
const cluster: *Cluster = @ptrCast(@alignCast(client.on_reply_context.?));
|
|
986
|
+
assert(eviction.header.invalid() == null);
|
|
987
|
+
assert(eviction.header.cluster == cluster.options.cluster_id);
|
|
988
|
+
assert(eviction.header.client == client.id);
|
|
989
|
+
assert(eviction.header.command == .eviction);
|
|
990
|
+
|
|
991
|
+
const client_index =
|
|
992
|
+
cluster.client_id_permutation.decode(client.id) - client_id_permutation_shift;
|
|
993
|
+
assert(&cluster.clients[client_index].? == client);
|
|
994
|
+
assert(cluster.client_eviction_reasons[client_index] == null);
|
|
995
|
+
|
|
996
|
+
cluster.client_eviction_reasons[client_index] = eviction.header.reason;
|
|
997
|
+
cluster.network.process_disable(.{ .client = client.id });
|
|
998
|
+
|
|
999
|
+
cluster.client_eviction_requests_cancelled +=
|
|
1000
|
+
@intFromBool(client.request_inflight != null and
|
|
1001
|
+
client.request_inflight.?.message.header.operation != .register and
|
|
1002
|
+
client.request_inflight.?.message.header.operation != .noop);
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
fn on_replica_event(replica: *const Replica, event: vsr.ReplicaEvent) void {
|
|
1006
|
+
const cluster: *Cluster = @ptrCast(@alignCast(replica.test_context.?));
|
|
1007
|
+
assert(cluster.replica_health[replica.replica] == .up);
|
|
1008
|
+
|
|
1009
|
+
switch (event) {
|
|
1010
|
+
.message_sent => |message| {
|
|
1011
|
+
cluster.state_checker.on_message(message);
|
|
1012
|
+
},
|
|
1013
|
+
.state_machine_opened => {
|
|
1014
|
+
cluster.manifest_checker.forest_open(&replica.state_machine.forest);
|
|
1015
|
+
},
|
|
1016
|
+
.committed => |data| {
|
|
1017
|
+
assert(data.reply.header.client == data.prepare.header.client);
|
|
1018
|
+
|
|
1019
|
+
cluster.log_replica(.commit, replica.replica);
|
|
1020
|
+
cluster.state_checker.check_state(replica.replica) catch |err| {
|
|
1021
|
+
fatal(.correctness, "state checker error: {}", .{err});
|
|
1022
|
+
};
|
|
1023
|
+
|
|
1024
|
+
if (cluster.callbacks.on_cluster_reply) |on_cluster_reply| {
|
|
1025
|
+
const client_index = if (data.prepare.header.client == 0)
|
|
1026
|
+
null
|
|
1027
|
+
else
|
|
1028
|
+
cluster.client_id_permutation.decode(data.prepare.header.client) -
|
|
1029
|
+
client_id_permutation_shift;
|
|
1030
|
+
on_cluster_reply(cluster, client_index, data.prepare, data.reply);
|
|
1031
|
+
}
|
|
1032
|
+
},
|
|
1033
|
+
.compaction_completed => {
|
|
1034
|
+
cluster.storage_checker.replica_compact(Replica, replica) catch |err| {
|
|
1035
|
+
fatal(.correctness, "storage checker error: {}", .{err});
|
|
1036
|
+
};
|
|
1037
|
+
},
|
|
1038
|
+
.checkpoint_commenced => {
|
|
1039
|
+
cluster.log_replica(.checkpoint_commenced, replica.replica);
|
|
1040
|
+
},
|
|
1041
|
+
.checkpoint_completed => {
|
|
1042
|
+
cluster.log_replica(.checkpoint_completed, replica.replica);
|
|
1043
|
+
cluster.manifest_checker.forest_checkpoint(&replica.state_machine.forest);
|
|
1044
|
+
cluster.storage_checker.replica_checkpoint(Replica, replica) catch |err| {
|
|
1045
|
+
fatal(.correctness, "storage checker error: {}", .{err});
|
|
1046
|
+
};
|
|
1047
|
+
},
|
|
1048
|
+
.sync_stage_changed => switch (replica.syncing) {
|
|
1049
|
+
.idle => cluster.log_replica(.sync, replica.replica),
|
|
1050
|
+
.updating_checkpoint => {
|
|
1051
|
+
cluster.state_checker.check_state(replica.replica) catch |err| {
|
|
1052
|
+
fatal(.correctness, "state checker error: {}", .{err});
|
|
1053
|
+
};
|
|
1054
|
+
},
|
|
1055
|
+
else => {},
|
|
1056
|
+
},
|
|
1057
|
+
.client_evicted => |client_id| cluster.cluster_on_eviction(client_id),
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
/// Print an error message and then exit with an exit code.
|
|
1062
|
+
fn fatal(failure: Failure, comptime fmt_string: []const u8, args: anytype) noreturn {
|
|
1063
|
+
std.log.scoped(.state_checker).err(fmt_string, args);
|
|
1064
|
+
std.posix.exit(@intFromEnum(failure));
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
/// Print the current state of the cluster, intended for printf debugging.
|
|
1068
|
+
pub fn log_cluster(cluster: *const Cluster) void {
|
|
1069
|
+
var replica: u8 = 0;
|
|
1070
|
+
while (replica < cluster.replicas.len) : (replica += 1) {
|
|
1071
|
+
cluster.log_replica(.commit, replica);
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
fn log_replica(
|
|
1076
|
+
cluster: *const Cluster,
|
|
1077
|
+
event: enum(u8) {
|
|
1078
|
+
crash = '!',
|
|
1079
|
+
recover = '^',
|
|
1080
|
+
reformat = 'X',
|
|
1081
|
+
commit = ' ',
|
|
1082
|
+
sync = '$',
|
|
1083
|
+
checkpoint_commenced = '[',
|
|
1084
|
+
checkpoint_completed = ']',
|
|
1085
|
+
},
|
|
1086
|
+
replica_index: u8,
|
|
1087
|
+
) void {
|
|
1088
|
+
const replica = &cluster.replicas[replica_index];
|
|
1089
|
+
|
|
1090
|
+
var statuses: [constants.members_max]u8 = @splat(' ');
|
|
1091
|
+
statuses[replica_index] = switch (cluster.replica_health[replica_index]) {
|
|
1092
|
+
.reformatting => ' ',
|
|
1093
|
+
.down => '#',
|
|
1094
|
+
.up => switch (replica.status) {
|
|
1095
|
+
.normal => @as(u8, '.'),
|
|
1096
|
+
.view_change => @as(u8, 'v'),
|
|
1097
|
+
.recovering => @as(u8, 'r'),
|
|
1098
|
+
.recovering_head => @as(u8, 'h'),
|
|
1099
|
+
},
|
|
1100
|
+
};
|
|
1101
|
+
|
|
1102
|
+
const role: u8 = role: {
|
|
1103
|
+
if (cluster.replica_health[replica_index] == .down) break :role '#';
|
|
1104
|
+
if (cluster.replica_health[replica_index] == .reformatting) break :role 'F';
|
|
1105
|
+
if (replica.syncing != .idle) break :role '~';
|
|
1106
|
+
if (replica.standby()) break :role '|';
|
|
1107
|
+
if (replica.primary_index(replica.view) == replica.replica) break :role '/';
|
|
1108
|
+
break :role '\\';
|
|
1109
|
+
};
|
|
1110
|
+
|
|
1111
|
+
var info_buffer: [128]u8 = undefined;
|
|
1112
|
+
var info: []u8 = "";
|
|
1113
|
+
var pipeline_buffer: [16]u8 = undefined;
|
|
1114
|
+
var pipeline: []u8 = "";
|
|
1115
|
+
|
|
1116
|
+
if (cluster.replica_health[replica_index] == .up) {
|
|
1117
|
+
var journal_op_min: u64 = std.math.maxInt(u64);
|
|
1118
|
+
var journal_op_max: u64 = 0;
|
|
1119
|
+
if (replica.journal.status == .init) {
|
|
1120
|
+
// `journal.headers` is junk data when we are upgrading from Replica.open().
|
|
1121
|
+
assert(event == .recover);
|
|
1122
|
+
assert(cluster.replica_upgrades[replica_index] != null);
|
|
1123
|
+
journal_op_min = 0;
|
|
1124
|
+
} else {
|
|
1125
|
+
for (replica.journal.headers) |*header| {
|
|
1126
|
+
if (header.operation != .reserved) {
|
|
1127
|
+
if (journal_op_min > header.op) journal_op_min = header.op;
|
|
1128
|
+
if (journal_op_max < header.op) journal_op_max = header.op;
|
|
1129
|
+
}
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
var wal_op_min: u64 = std.math.maxInt(u64);
|
|
1134
|
+
var wal_op_max: u64 = 0;
|
|
1135
|
+
for (cluster.storages[replica_index].wal_prepares()) |*prepare| {
|
|
1136
|
+
if (prepare.header.valid_checksum() and
|
|
1137
|
+
prepare.header.command == .prepare)
|
|
1138
|
+
{
|
|
1139
|
+
if (wal_op_min > prepare.header.op) wal_op_min = prepare.header.op;
|
|
1140
|
+
if (wal_op_max < prepare.header.op) wal_op_max = prepare.header.op;
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
info = std.fmt.bufPrint(&info_buffer, "" ++
|
|
1145
|
+
"{[view]:>4}V " ++
|
|
1146
|
+
"{[op_checkpoint]:>3}/{[commit_min]:_>3}/{[commit_max]:_>3}C " ++
|
|
1147
|
+
"{[journal_op_min]:>3}:{[journal_op_max]:_>3}Jo " ++
|
|
1148
|
+
"{[journal_faulty]:>2}/{[journal_dirty]:_>2}J! " ++
|
|
1149
|
+
"{[wal_op_min]:>3}:{[wal_op_max]:_>3}Wo " ++
|
|
1150
|
+
"<{[sync_op_min]:_>3}:{[sync_op_max]:_>3}> " ++
|
|
1151
|
+
"v{[release]}:{[release_max]} " ++
|
|
1152
|
+
"{[grid_blocks_acquired]?:>5}Ga " ++
|
|
1153
|
+
"{[grid_blocks_global]:>2}G! " ++
|
|
1154
|
+
"{[grid_blocks_repair]:>3}G?", .{
|
|
1155
|
+
.view = replica.view,
|
|
1156
|
+
.op_checkpoint = replica.op_checkpoint(),
|
|
1157
|
+
.commit_min = replica.commit_min,
|
|
1158
|
+
.commit_max = replica.commit_max,
|
|
1159
|
+
.journal_op_min = journal_op_min,
|
|
1160
|
+
.journal_op_max = journal_op_max,
|
|
1161
|
+
.journal_dirty = replica.journal.dirty.count,
|
|
1162
|
+
.journal_faulty = replica.journal.faulty.count,
|
|
1163
|
+
.wal_op_min = wal_op_min,
|
|
1164
|
+
.wal_op_max = wal_op_max,
|
|
1165
|
+
.sync_op_min = replica.superblock.working.vsr_state.sync_op_min,
|
|
1166
|
+
.sync_op_max = replica.superblock.working.vsr_state.sync_op_max,
|
|
1167
|
+
.release = replica.release.triple().patch,
|
|
1168
|
+
.release_max = replica.multiversion.releases_bundled().last().triple().patch,
|
|
1169
|
+
.grid_blocks_acquired = if (replica.grid.free_set.opened)
|
|
1170
|
+
replica.grid.free_set.count_acquired()
|
|
1171
|
+
else
|
|
1172
|
+
null,
|
|
1173
|
+
.grid_blocks_global = replica.grid.read_global_queue.count(),
|
|
1174
|
+
.grid_blocks_repair = replica.grid.blocks_missing.faulty_blocks.count(),
|
|
1175
|
+
}) catch unreachable;
|
|
1176
|
+
|
|
1177
|
+
if (replica.pipeline == .queue) {
|
|
1178
|
+
pipeline = std.fmt.bufPrint(&pipeline_buffer, " {:>2}/{}Pp {:>2}/{}Rq", .{
|
|
1179
|
+
replica.pipeline.queue.prepare_queue.count,
|
|
1180
|
+
constants.pipeline_prepare_queue_max,
|
|
1181
|
+
replica.pipeline.queue.request_queue.count,
|
|
1182
|
+
constants.pipeline_request_queue_max,
|
|
1183
|
+
}) catch unreachable;
|
|
1184
|
+
}
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
log.info("{[replica]: >2} {[event]c} {[role]c} {[statuses]s}" ++
|
|
1188
|
+
" {[info]s}{[pipeline]s}", .{
|
|
1189
|
+
.replica = replica.replica,
|
|
1190
|
+
.event = @intFromEnum(event),
|
|
1191
|
+
.role = role,
|
|
1192
|
+
.statuses = statuses[0 .. cluster.replica_count + cluster.standby_count],
|
|
1193
|
+
.info = info,
|
|
1194
|
+
.pipeline = pipeline,
|
|
1195
|
+
});
|
|
1196
|
+
}
|
|
1197
|
+
};
|
|
1198
|
+
}
|