tigerbeetle 0.0.34 → 0.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/tb_client/extconf.rb +13 -13
- data/ext/tb_client/tigerbeetle/LICENSE +177 -0
- data/ext/tb_client/tigerbeetle/build.zig +2327 -0
- data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
- data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
- data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1084 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
- data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
- data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
- data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
- data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
- data/ext/tb_client/tigerbeetle/src/direction.zig +11 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
- data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
- data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
- data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
- data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
- data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
- data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
- data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
- data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
- data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
- data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
- data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
- data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
- data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
- data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +362 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +1036 -0
- data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
- data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
- data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +557 -0
- data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
- data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
- data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
- data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
- data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
- data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
- data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
- data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
- data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
- data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
- data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
- data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
- data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
- data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
- data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
- data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
- data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
- data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
- data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
- data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
- data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
- data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
- data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
- data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
- data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
- data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
- data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
- data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
- data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
- data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
- data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
- data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
- data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
- data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
- data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
- data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
- data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
- data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
- data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
- data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
- data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
- data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
- data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
- data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
- data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
- data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
- data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
- data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
- data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
- data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
- data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
- data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
- data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
- data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
- data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
- data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
- data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2910 -0
- data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
- data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
- data/lib/tb_client/shared_lib.rb +12 -5
- data/lib/tigerbeetle/client.rb +1 -1
- data/lib/tigerbeetle/platforms.rb +9 -0
- data/lib/tigerbeetle/version.rb +2 -2
- data/tigerbeetle.gemspec +22 -5
- metadata +242 -3
- data/ext/tb_client/pkg.tar.gz +0 -0
|
@@ -0,0 +1,1214 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
const mem = std.mem;
|
|
4
|
+
|
|
5
|
+
const constants = @import("constants.zig");
|
|
6
|
+
const log = std.log.scoped(.message_bus);
|
|
7
|
+
|
|
8
|
+
const vsr = @import("vsr.zig");
|
|
9
|
+
|
|
10
|
+
const stdx = @import("stdx");
|
|
11
|
+
const maybe = stdx.maybe;
|
|
12
|
+
const RingBufferType = stdx.RingBufferType;
|
|
13
|
+
const MessagePool = @import("message_pool.zig").MessagePool;
|
|
14
|
+
const Message = MessagePool.Message;
|
|
15
|
+
const MessageBuffer = @import("./message_buffer.zig").MessageBuffer;
|
|
16
|
+
const QueueType = @import("./queue.zig").QueueType;
|
|
17
|
+
const Tracer = vsr.trace.Tracer;
|
|
18
|
+
|
|
19
|
+
pub fn MessageBusType(comptime IO: type) type {
|
|
20
|
+
// Slice points to a subslice of send_queue_buffer.
|
|
21
|
+
const SendQueue = RingBufferType(*Message, .slice);
|
|
22
|
+
|
|
23
|
+
const ProcessID = union(vsr.ProcessType) {
|
|
24
|
+
replica: u8,
|
|
25
|
+
client: u128,
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
return struct {
|
|
29
|
+
pool: *MessagePool,
|
|
30
|
+
io: *IO,
|
|
31
|
+
|
|
32
|
+
process: ProcessID,
|
|
33
|
+
/// Prefix for log messages.
|
|
34
|
+
id: u128,
|
|
35
|
+
|
|
36
|
+
/// The file descriptor for the process on which to accept connections.
|
|
37
|
+
accept_fd: ?IO.socket_t = null,
|
|
38
|
+
/// Address the accept_fd is bound to, as reported by `getsockname`.
|
|
39
|
+
///
|
|
40
|
+
/// This allows passing port 0 as an address for the OS to pick an open port for us
|
|
41
|
+
/// in a TOCTOU immune way and logging the resulting port number.
|
|
42
|
+
accept_address: ?Address = null,
|
|
43
|
+
accept_completion: IO.Completion = undefined,
|
|
44
|
+
/// The connection reserved for the currently in progress accept operation.
|
|
45
|
+
/// This is non-null exactly when an accept operation is submitted.
|
|
46
|
+
accept_connection: ?*Connection = null,
|
|
47
|
+
|
|
48
|
+
/// The callback to be called when a message is received.
|
|
49
|
+
on_messages_callback: *const fn (message_bus: *MessageBus, buffer: *MessageBuffer) void,
|
|
50
|
+
|
|
51
|
+
/// SendQueue storage shared by all connections.
|
|
52
|
+
send_queue_buffer: []*Message,
|
|
53
|
+
/// This slice is allocated with a fixed size in the init function and never reallocated.
|
|
54
|
+
connections: []Connection,
|
|
55
|
+
/// Number of connections currently in use (i.e. connection.state != .free).
|
|
56
|
+
connections_used: u32 = 0,
|
|
57
|
+
connections_suspended: QueueType(Connection) = QueueType(Connection).init(.{
|
|
58
|
+
.name = null,
|
|
59
|
+
}),
|
|
60
|
+
resume_receive_completion: IO.Completion = undefined,
|
|
61
|
+
resume_receive_submitted: bool = false,
|
|
62
|
+
|
|
63
|
+
/// Map from replica index to the currently active connection for that replica, if any.
|
|
64
|
+
/// The connection for the process replica if any will always be null.
|
|
65
|
+
replicas: []?*Connection,
|
|
66
|
+
replicas_addresses: []Address,
|
|
67
|
+
/// The number of outgoing `connect()` attempts for a given replica:
|
|
68
|
+
/// Reset to zero after a successful `on_connect()`.
|
|
69
|
+
replicas_connect_attempts: []u64,
|
|
70
|
+
|
|
71
|
+
/// Map from client id to the currently active connection for that client.
|
|
72
|
+
/// This is used to make lookup of client connections when sending messages
|
|
73
|
+
/// efficient and to ensure old client connections are dropped if a new one
|
|
74
|
+
/// is established.
|
|
75
|
+
clients: std.AutoHashMapUnmanaged(u128, *Connection) = .{},
|
|
76
|
+
|
|
77
|
+
/// Used to apply jitter when calculating exponential backoff:
|
|
78
|
+
/// Seeded with the process' replica index or client ID.
|
|
79
|
+
prng: stdx.PRNG,
|
|
80
|
+
|
|
81
|
+
trace: ?*Tracer,
|
|
82
|
+
|
|
83
|
+
comptime {
|
|
84
|
+
// Assert it is correct to use u32 to track sizes.
|
|
85
|
+
assert(constants.message_size_max < std.math.maxInt(u32));
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
pub const Options = struct {
|
|
89
|
+
configuration: []const Address,
|
|
90
|
+
io: *IO,
|
|
91
|
+
trace: ?*Tracer,
|
|
92
|
+
clients_limit: ?u32 = null,
|
|
93
|
+
};
|
|
94
|
+
const Address = std.net.Address;
|
|
95
|
+
const MessageBus = @This();
|
|
96
|
+
|
|
97
|
+
/// Initialize the MessageBus for the given configuration and replica/client process.
|
|
98
|
+
pub fn init(
|
|
99
|
+
allocator: mem.Allocator,
|
|
100
|
+
process_id: ProcessID,
|
|
101
|
+
message_pool: *MessagePool,
|
|
102
|
+
on_messages_callback: *const fn (message_bus: *MessageBus, buffer: *MessageBuffer) void,
|
|
103
|
+
options: Options,
|
|
104
|
+
) !MessageBus {
|
|
105
|
+
switch (process_id) {
|
|
106
|
+
.replica => assert(options.clients_limit.? > 0),
|
|
107
|
+
.client => assert(options.clients_limit == null),
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const connections_max: u32 = switch (process_id) {
|
|
111
|
+
// The maximum number of connections that can be held open by the server at any
|
|
112
|
+
// time. -1 since we don't need a connection to ourself.
|
|
113
|
+
.replica => @intCast(options.configuration.len - 1 + options.clients_limit.?),
|
|
114
|
+
.client => @intCast(options.configuration.len),
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
const send_queue_max = switch (process_id) {
|
|
118
|
+
.replica => constants.connection_send_queue_max_replica,
|
|
119
|
+
.client => constants.connection_send_queue_max_client,
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const send_queue_buffer = try allocator.alloc(
|
|
123
|
+
*Message,
|
|
124
|
+
connections_max * send_queue_max,
|
|
125
|
+
);
|
|
126
|
+
@memset(send_queue_buffer, undefined);
|
|
127
|
+
errdefer allocator.free(send_queue_buffer);
|
|
128
|
+
|
|
129
|
+
const connections = try allocator.alloc(Connection, connections_max);
|
|
130
|
+
errdefer allocator.free(connections);
|
|
131
|
+
for (connections, 0..) |*connection, index| {
|
|
132
|
+
connection.* = .{
|
|
133
|
+
.send_queue = .{
|
|
134
|
+
.buffer = send_queue_buffer[index * send_queue_max ..][0..send_queue_max],
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const replicas = try allocator.alloc(?*Connection, options.configuration.len);
|
|
140
|
+
errdefer allocator.free(replicas);
|
|
141
|
+
@memset(replicas, null);
|
|
142
|
+
|
|
143
|
+
const replicas_addresses = try allocator.alloc(Address, options.configuration.len);
|
|
144
|
+
errdefer allocator.free(replicas_addresses);
|
|
145
|
+
stdx.copy_disjoint(.exact, Address, replicas_addresses, options.configuration);
|
|
146
|
+
|
|
147
|
+
const replicas_connect_attempts = try allocator.alloc(u64, options.configuration.len);
|
|
148
|
+
errdefer allocator.free(replicas_connect_attempts);
|
|
149
|
+
@memset(replicas_connect_attempts, 0);
|
|
150
|
+
|
|
151
|
+
const prng_seed = switch (process_id) {
|
|
152
|
+
.replica => |replica| replica,
|
|
153
|
+
.client => |client| @as(u64, @truncate(client)),
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
var bus: MessageBus = .{
|
|
157
|
+
.pool = message_pool,
|
|
158
|
+
.io = options.io,
|
|
159
|
+
.process = process_id,
|
|
160
|
+
.id = switch (process_id) {
|
|
161
|
+
.replica => |index| @as(u128, index),
|
|
162
|
+
.client => |id| id,
|
|
163
|
+
},
|
|
164
|
+
.on_messages_callback = on_messages_callback,
|
|
165
|
+
.send_queue_buffer = send_queue_buffer,
|
|
166
|
+
.connections = connections,
|
|
167
|
+
.replicas = replicas,
|
|
168
|
+
.replicas_addresses = replicas_addresses,
|
|
169
|
+
.replicas_connect_attempts = replicas_connect_attempts,
|
|
170
|
+
.prng = stdx.PRNG.from_seed(prng_seed),
|
|
171
|
+
.trace = options.trace,
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
switch (process_id) {
|
|
175
|
+
.replica => {
|
|
176
|
+
// Pre-allocate enough memory to hold all possible connections
|
|
177
|
+
// in the client map.
|
|
178
|
+
try bus.clients.ensureTotalCapacity(allocator, connections_max);
|
|
179
|
+
errdefer bus.clients.deinit(allocator);
|
|
180
|
+
|
|
181
|
+
return bus;
|
|
182
|
+
},
|
|
183
|
+
.client => return bus,
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
pub fn deinit(bus: *MessageBus, allocator: std.mem.Allocator) void {
|
|
188
|
+
bus.clients.deinit(allocator);
|
|
189
|
+
|
|
190
|
+
if (bus.accept_fd) |fd| {
|
|
191
|
+
assert(bus.process == .replica);
|
|
192
|
+
assert(bus.accept_address != null);
|
|
193
|
+
bus.io.close_socket(fd);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const send_queue_max = switch (bus.process) {
|
|
197
|
+
.replica => constants.connection_send_queue_max_replica,
|
|
198
|
+
.client => constants.connection_send_queue_max_client,
|
|
199
|
+
};
|
|
200
|
+
var send_queue_buffer_previous: ?[]*Message = null;
|
|
201
|
+
for (bus.connections) |*connection| {
|
|
202
|
+
if (connection.fd) |fd| {
|
|
203
|
+
bus.io.close_socket(fd);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (connection.recv_buffer) |*buffer| buffer.deinit(bus.pool);
|
|
207
|
+
connection.recv_buffer = null;
|
|
208
|
+
while (connection.send_queue.pop()) |message| bus.unref(message);
|
|
209
|
+
|
|
210
|
+
assert(connection.send_queue.buffer.len == send_queue_max);
|
|
211
|
+
if (send_queue_buffer_previous) |previous| {
|
|
212
|
+
assert(connection.send_queue.buffer.ptr == previous.ptr + previous.len);
|
|
213
|
+
} else {
|
|
214
|
+
assert(connection.send_queue.buffer.ptr == bus.send_queue_buffer.ptr);
|
|
215
|
+
}
|
|
216
|
+
send_queue_buffer_previous = connection.send_queue.buffer;
|
|
217
|
+
}
|
|
218
|
+
assert(bus.send_queue_buffer.ptr + bus.send_queue_buffer.len ==
|
|
219
|
+
send_queue_buffer_previous.?.ptr + send_queue_buffer_previous.?.len);
|
|
220
|
+
|
|
221
|
+
allocator.free(bus.replicas_connect_attempts);
|
|
222
|
+
allocator.free(bus.replicas_addresses);
|
|
223
|
+
allocator.free(bus.replicas);
|
|
224
|
+
allocator.free(bus.connections);
|
|
225
|
+
allocator.free(bus.send_queue_buffer);
|
|
226
|
+
bus.* = undefined;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
fn init_tcp(io: *IO, process: vsr.ProcessType, family: u32) !IO.socket_t {
|
|
230
|
+
return try io.open_socket_tcp(family, .{
|
|
231
|
+
.rcvbuf = constants.tcp_rcvbuf,
|
|
232
|
+
.sndbuf = switch (process) {
|
|
233
|
+
.replica => constants.tcp_sndbuf_replica,
|
|
234
|
+
.client => constants.tcp_sndbuf_client,
|
|
235
|
+
},
|
|
236
|
+
.keepalive = if (constants.tcp_keepalive) .{
|
|
237
|
+
.keepidle = constants.tcp_keepidle,
|
|
238
|
+
.keepintvl = constants.tcp_keepintvl,
|
|
239
|
+
.keepcnt = constants.tcp_keepcnt,
|
|
240
|
+
} else null,
|
|
241
|
+
.user_timeout_ms = constants.tcp_user_timeout_ms,
|
|
242
|
+
.nodelay = constants.tcp_nodelay,
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
pub fn listen(bus: *MessageBus) !void {
|
|
247
|
+
assert(bus.process == .replica);
|
|
248
|
+
assert(bus.accept_fd == null);
|
|
249
|
+
assert(bus.accept_address == null);
|
|
250
|
+
|
|
251
|
+
const address = bus.replicas_addresses[bus.process.replica];
|
|
252
|
+
const fd = try init_tcp(bus.io, .replica, address.any.family);
|
|
253
|
+
errdefer bus.io.close_socket(fd);
|
|
254
|
+
|
|
255
|
+
const accept_address = try bus.io.listen(fd, address, .{
|
|
256
|
+
.backlog = constants.tcp_backlog,
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
bus.accept_fd = fd;
|
|
260
|
+
bus.accept_address = accept_address;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
pub fn tick(bus: *MessageBus) void {
|
|
264
|
+
assert(bus.process == .replica);
|
|
265
|
+
bus.tick_connect();
|
|
266
|
+
bus.tick_accept(); // Only replicas accept connections from other replicas and clients.
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
pub fn trace_gauge(bus: *MessageBus) void {
|
|
270
|
+
if (bus.trace) |trace| {
|
|
271
|
+
var counts = std.enums.EnumArray(std.meta.Tag(vsr.Peer), u32).initFill(0);
|
|
272
|
+
for (bus.connections) |*connection| {
|
|
273
|
+
if (connection.state == .connected) {
|
|
274
|
+
counts.getPtr(connection.peer).* += 1;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
var counts_iterator = counts.iterator();
|
|
279
|
+
while (counts_iterator.next()) |entry| {
|
|
280
|
+
trace.gauge(
|
|
281
|
+
.{ .message_bus_connections = .{ .peer = entry.key } },
|
|
282
|
+
entry.value.*,
|
|
283
|
+
);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
trace.gauge(.message_bus_connections_max, bus.connections.len);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// The same as tick, but asserts a client and avoids accept, allowing Zig's lazy semantics
|
|
291
|
+
// to not add dead accept code to client libraries.
|
|
292
|
+
pub fn tick_client(bus: *MessageBus) void {
|
|
293
|
+
assert(bus.process == .client);
|
|
294
|
+
bus.tick_connect();
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
fn tick_connect(bus: *MessageBus) void {
|
|
298
|
+
const replica_next = switch (bus.process) {
|
|
299
|
+
// Each replica is responsible for connecting to replicas that come
|
|
300
|
+
// after it in the configuration. This ensures that replicas never try
|
|
301
|
+
// to connect to each other at the same time.
|
|
302
|
+
.replica => |replica| replica + 1,
|
|
303
|
+
// The client connects to all replicas.
|
|
304
|
+
.client => 0,
|
|
305
|
+
};
|
|
306
|
+
for (bus.replicas[replica_next..], replica_next..) |*connection, replica| {
|
|
307
|
+
if (connection.* == null) bus.connect(@intCast(replica));
|
|
308
|
+
}
|
|
309
|
+
assert(bus.connections_used >= bus.replicas.len - replica_next);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
fn tick_accept(bus: *MessageBus) void {
|
|
313
|
+
assert(bus.process == .replica);
|
|
314
|
+
assert(bus.accept_fd != null); // Must listen before tick.
|
|
315
|
+
bus.accept();
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
fn accept(bus: *MessageBus) void {
|
|
319
|
+
assert(bus.process == .replica);
|
|
320
|
+
assert(bus.accept_fd != null);
|
|
321
|
+
|
|
322
|
+
if (bus.accept_connection != null) return;
|
|
323
|
+
// All connections are currently in use, do nothing.
|
|
324
|
+
if (bus.connections_used == bus.connections.len) return;
|
|
325
|
+
assert(bus.connections_used < bus.connections.len);
|
|
326
|
+
bus.accept_connection = for (bus.connections) |*connection| {
|
|
327
|
+
if (connection.state == .free) {
|
|
328
|
+
connection.state = .accepting;
|
|
329
|
+
break connection;
|
|
330
|
+
}
|
|
331
|
+
} else unreachable;
|
|
332
|
+
bus.io.accept(
|
|
333
|
+
*MessageBus,
|
|
334
|
+
bus,
|
|
335
|
+
accept_callback,
|
|
336
|
+
&bus.accept_completion,
|
|
337
|
+
bus.accept_fd.?,
|
|
338
|
+
);
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
fn accept_callback(
|
|
342
|
+
bus: *MessageBus,
|
|
343
|
+
_: *IO.Completion,
|
|
344
|
+
result: IO.AcceptError!IO.socket_t,
|
|
345
|
+
) void {
|
|
346
|
+
assert(bus.process == .replica);
|
|
347
|
+
|
|
348
|
+
assert(bus.accept_connection != null);
|
|
349
|
+
const connection: *Connection = bus.accept_connection.?;
|
|
350
|
+
bus.accept_connection = null;
|
|
351
|
+
|
|
352
|
+
assert(connection.peer == .unknown);
|
|
353
|
+
assert(connection.fd == null);
|
|
354
|
+
assert(connection.state == .accepting);
|
|
355
|
+
defer assert(connection.state == .connected or connection.state == .free);
|
|
356
|
+
|
|
357
|
+
if (result) |fd| {
|
|
358
|
+
connection.state = .connected;
|
|
359
|
+
connection.fd = fd;
|
|
360
|
+
bus.connections_used += 1;
|
|
361
|
+
|
|
362
|
+
bus.assert_connection_initial_state(connection);
|
|
363
|
+
assert(connection.recv_buffer == null);
|
|
364
|
+
connection.recv_buffer = MessageBuffer.init(bus.pool);
|
|
365
|
+
bus.recv(connection);
|
|
366
|
+
// Don't start send loop yet --- on accept, we don't know which peer this is.
|
|
367
|
+
assert(connection.send_queue.empty());
|
|
368
|
+
assert(connection.state == .connected);
|
|
369
|
+
} else |err| {
|
|
370
|
+
connection.state = .free;
|
|
371
|
+
// TODO: some errors should probably be fatal
|
|
372
|
+
log.warn("{}: on_accept: {}", .{ bus.id, err });
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
fn connect(bus: *MessageBus, replica: u8) void {
|
|
377
|
+
assert(bus.replicas[replica] == null);
|
|
378
|
+
|
|
379
|
+
// Obtain a connection struct for our new replica connection.
|
|
380
|
+
// If there is a free connection, use that. Otherwise drop
|
|
381
|
+
// a client or unknown connection to make space. Prefer dropping
|
|
382
|
+
// a client connection to an unknown one as the unknown peer may
|
|
383
|
+
// be a replica. Since shutting a connection down does not happen
|
|
384
|
+
// instantly, simply return after starting the shutdown and try again
|
|
385
|
+
// on the next tick().
|
|
386
|
+
const connection_free: *Connection = for (bus.connections) |*connection| {
|
|
387
|
+
if (connection.state == .free) break connection;
|
|
388
|
+
} else {
|
|
389
|
+
bus.connect_reclaim_connection();
|
|
390
|
+
return;
|
|
391
|
+
};
|
|
392
|
+
|
|
393
|
+
assert(connection_free.state == .free);
|
|
394
|
+
// This will immediately add the connection to bus.replicas,
|
|
395
|
+
// or else will return early if a socket file descriptor cannot be obtained:
|
|
396
|
+
bus.connect_connection(connection_free, replica);
|
|
397
|
+
switch (connection_free.state) {
|
|
398
|
+
.connecting => assert(bus.replicas[replica] != null),
|
|
399
|
+
.free => assert(bus.replicas[replica] == null),
|
|
400
|
+
else => unreachable,
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
fn connect_reclaim_connection(bus: *MessageBus) void {
|
|
405
|
+
for (bus.connections) |*connection| assert(connection.state != .free);
|
|
406
|
+
|
|
407
|
+
// If there is already a connection being shut down, no need to kill another.
|
|
408
|
+
for (bus.connections) |*connection| {
|
|
409
|
+
if (connection.state == .terminating) return;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
log.info("{}: connect_to_replica: no free connection, disconnecting a client", .{
|
|
413
|
+
bus.id,
|
|
414
|
+
});
|
|
415
|
+
for (bus.connections) |*connection| {
|
|
416
|
+
if (connection.peer == .client) {
|
|
417
|
+
bus.terminate(connection, .shutdown);
|
|
418
|
+
return;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
log.info("{}: connect_to_replica: no free connection, disconnecting unknown peer", .{
|
|
423
|
+
bus.id,
|
|
424
|
+
});
|
|
425
|
+
for (bus.connections) |*connection| {
|
|
426
|
+
if (connection.peer == .unknown) {
|
|
427
|
+
bus.terminate(connection, .shutdown);
|
|
428
|
+
return;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// We assert that the max number of connections is greater
|
|
433
|
+
// than the number of replicas in init().
|
|
434
|
+
unreachable;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
/// Attempt to connect to a replica.
|
|
438
|
+
/// The slot in the Message.replicas slices is immediately reserved.
|
|
439
|
+
/// Failure is silent and returns the connection to an unused state.
|
|
440
|
+
fn connect_connection(bus: *MessageBus, connection: *Connection, replica: u8) void {
|
|
441
|
+
if (bus.process == .replica) assert(replica > bus.process.replica);
|
|
442
|
+
|
|
443
|
+
assert(connection.state == .free);
|
|
444
|
+
assert(connection.fd == null);
|
|
445
|
+
|
|
446
|
+
const family = bus.replicas_addresses[replica].any.family;
|
|
447
|
+
connection.fd = init_tcp(bus.io, bus.process, family) catch |err| {
|
|
448
|
+
log.err("{}: connect_to_replica: init_tcp error={s}", .{
|
|
449
|
+
bus.id,
|
|
450
|
+
@errorName(err),
|
|
451
|
+
});
|
|
452
|
+
return;
|
|
453
|
+
};
|
|
454
|
+
connection.peer = .{ .replica = replica };
|
|
455
|
+
connection.state = .connecting;
|
|
456
|
+
bus.connections_used += 1;
|
|
457
|
+
|
|
458
|
+
assert(bus.replicas[replica] == null);
|
|
459
|
+
bus.replicas[replica] = connection;
|
|
460
|
+
|
|
461
|
+
const attempts = &bus.replicas_connect_attempts[replica];
|
|
462
|
+
const ms = vsr.exponential_backoff_with_jitter(
|
|
463
|
+
&bus.prng,
|
|
464
|
+
constants.connection_delay_min_ms,
|
|
465
|
+
constants.connection_delay_max_ms,
|
|
466
|
+
attempts.*,
|
|
467
|
+
);
|
|
468
|
+
attempts.* += 1;
|
|
469
|
+
|
|
470
|
+
log.debug("{}: connect_to_replica: connecting to={} after={}ms", .{
|
|
471
|
+
bus.id,
|
|
472
|
+
connection.peer.replica,
|
|
473
|
+
ms,
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
assert(!connection.recv_submitted);
|
|
477
|
+
connection.recv_submitted = true;
|
|
478
|
+
|
|
479
|
+
bus.io.timeout(
|
|
480
|
+
*MessageBus,
|
|
481
|
+
bus,
|
|
482
|
+
connect_timeout_callback,
|
|
483
|
+
// We use `recv_completion` for the connection `timeout()` and `connect()` calls
|
|
484
|
+
&connection.recv_completion,
|
|
485
|
+
@as(u63, @intCast(ms * std.time.ns_per_ms)),
|
|
486
|
+
);
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
fn connect_timeout_callback(
|
|
490
|
+
bus: *MessageBus,
|
|
491
|
+
completion: *IO.Completion,
|
|
492
|
+
result: IO.TimeoutError!void,
|
|
493
|
+
) void {
|
|
494
|
+
const connection: *Connection = @alignCast(
|
|
495
|
+
@fieldParentPtr("recv_completion", completion),
|
|
496
|
+
);
|
|
497
|
+
assert(connection.recv_submitted);
|
|
498
|
+
connection.recv_submitted = false;
|
|
499
|
+
if (connection.state == .terminating) {
|
|
500
|
+
bus.terminate_join(connection);
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
assert(connection.state == .connecting);
|
|
504
|
+
result catch unreachable;
|
|
505
|
+
|
|
506
|
+
log.debug("{}: on_connect_with_exponential_backoff: to={}", .{
|
|
507
|
+
bus.id,
|
|
508
|
+
connection.peer.replica,
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
assert(!connection.recv_submitted);
|
|
512
|
+
connection.recv_submitted = true;
|
|
513
|
+
|
|
514
|
+
bus.io.connect(
|
|
515
|
+
*MessageBus,
|
|
516
|
+
bus,
|
|
517
|
+
connect_callback,
|
|
518
|
+
// We use `recv_completion` for the connection `timeout()` and `connect()` calls
|
|
519
|
+
&connection.recv_completion,
|
|
520
|
+
connection.fd.?,
|
|
521
|
+
bus.replicas_addresses[connection.peer.replica],
|
|
522
|
+
);
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
fn connect_callback(
|
|
526
|
+
bus: *MessageBus,
|
|
527
|
+
completion: *IO.Completion,
|
|
528
|
+
result: IO.ConnectError!void,
|
|
529
|
+
) void {
|
|
530
|
+
const connection: *Connection = @alignCast(
|
|
531
|
+
@fieldParentPtr("recv_completion", completion),
|
|
532
|
+
);
|
|
533
|
+
assert(connection.recv_submitted);
|
|
534
|
+
connection.recv_submitted = false;
|
|
535
|
+
|
|
536
|
+
if (connection.state == .terminating) {
|
|
537
|
+
bus.terminate_join(connection);
|
|
538
|
+
return;
|
|
539
|
+
}
|
|
540
|
+
assert(connection.state == .connecting);
|
|
541
|
+
connection.state = .connected;
|
|
542
|
+
|
|
543
|
+
result catch |err| {
|
|
544
|
+
log.warn("{}: on_connect: error to={} {}", .{
|
|
545
|
+
bus.id,
|
|
546
|
+
connection.peer.replica,
|
|
547
|
+
err,
|
|
548
|
+
});
|
|
549
|
+
bus.terminate(connection, .no_shutdown);
|
|
550
|
+
return;
|
|
551
|
+
};
|
|
552
|
+
|
|
553
|
+
log.info("{}: on_connect: connected to={}", .{ bus.id, connection.peer.replica });
|
|
554
|
+
bus.replicas_connect_attempts[connection.peer.replica] = 0;
|
|
555
|
+
|
|
556
|
+
bus.assert_connection_initial_state(connection);
|
|
557
|
+
assert(connection.recv_buffer == null);
|
|
558
|
+
connection.recv_buffer = MessageBuffer.init(bus.pool);
|
|
559
|
+
bus.recv(connection);
|
|
560
|
+
bus.send(connection);
|
|
561
|
+
assert(connection.state == .connected);
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
fn assert_connection_initial_state(bus: *MessageBus, connection: *Connection) void {
|
|
565
|
+
assert(bus.connections_used > 0);
|
|
566
|
+
|
|
567
|
+
assert(connection.peer == .unknown or connection.peer == .replica);
|
|
568
|
+
assert(connection.state == .connected);
|
|
569
|
+
assert(connection.fd != null);
|
|
570
|
+
|
|
571
|
+
assert(connection.recv_submitted == false);
|
|
572
|
+
assert(connection.recv_buffer == null);
|
|
573
|
+
|
|
574
|
+
assert(connection.send_submitted == false);
|
|
575
|
+
assert(connection.send_progress == 0);
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
/// The recv loop.
|
|
579
|
+
///
|
|
580
|
+
/// Kickstarted by `accept` and `connect`, and loops onto itself via `recv_buffer_drain`.
|
|
581
|
+
fn recv(bus: *MessageBus, connection: *Connection) void {
|
|
582
|
+
assert(connection.state == .connected);
|
|
583
|
+
assert(connection.fd != null);
|
|
584
|
+
assert(connection.recv_buffer != null);
|
|
585
|
+
|
|
586
|
+
assert(!connection.recv_submitted);
|
|
587
|
+
connection.recv_submitted = true;
|
|
588
|
+
|
|
589
|
+
bus.io.recv(
|
|
590
|
+
*MessageBus,
|
|
591
|
+
bus,
|
|
592
|
+
recv_callback,
|
|
593
|
+
&connection.recv_completion,
|
|
594
|
+
connection.fd.?,
|
|
595
|
+
connection.recv_buffer.?.recv_slice(),
|
|
596
|
+
);
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
fn recv_callback(
|
|
600
|
+
bus: *MessageBus,
|
|
601
|
+
completion: *IO.Completion,
|
|
602
|
+
result: IO.RecvError!usize,
|
|
603
|
+
) void {
|
|
604
|
+
const connection: *Connection = @alignCast(
|
|
605
|
+
@fieldParentPtr("recv_completion", completion),
|
|
606
|
+
);
|
|
607
|
+
assert(connection.recv_submitted);
|
|
608
|
+
connection.recv_submitted = false;
|
|
609
|
+
if (connection.state == .terminating) {
|
|
610
|
+
bus.terminate_join(connection);
|
|
611
|
+
return;
|
|
612
|
+
}
|
|
613
|
+
assert(connection.state == .connected);
|
|
614
|
+
const bytes_received = result catch |err| {
|
|
615
|
+
// TODO: maybe don't need to close on *every* error
|
|
616
|
+
log.warn("{}: on_recv: from={} {}", .{ bus.id, connection.peer, err });
|
|
617
|
+
bus.terminate(connection, .shutdown);
|
|
618
|
+
return;
|
|
619
|
+
};
|
|
620
|
+
// No bytes received means that the peer closed its side of the connection.
|
|
621
|
+
if (bytes_received == 0) {
|
|
622
|
+
log.info("{}: on_recv: from={} orderly shutdown", .{ bus.id, connection.peer });
|
|
623
|
+
bus.terminate(connection, .no_shutdown);
|
|
624
|
+
return;
|
|
625
|
+
}
|
|
626
|
+
assert(bytes_received <= constants.message_size_max);
|
|
627
|
+
assert(connection.recv_buffer != null);
|
|
628
|
+
connection.recv_buffer.?.recv_advance(@intCast(bytes_received));
|
|
629
|
+
|
|
630
|
+
switch (bus.process) {
|
|
631
|
+
// Replicas may forward messages from clients or from other replicas so we
|
|
632
|
+
// may receive messages from a peer before we know who they are:
|
|
633
|
+
// This has the same effect as an asymmetric network where, for a short time
|
|
634
|
+
// bounded by the time it takes to ping, we can hear from a peer before we
|
|
635
|
+
// can send back to them.
|
|
636
|
+
.replica => {
|
|
637
|
+
while (connection.recv_buffer.?.next_header()) |header| {
|
|
638
|
+
if (bus.recv_update_peer(connection, header.peer_type())) {
|
|
639
|
+
connection.recv_buffer.?.suspend_message(&header);
|
|
640
|
+
} else {
|
|
641
|
+
log.warn("{}: on_recv: invalid peer transition {any} -> {any}", .{
|
|
642
|
+
bus.id,
|
|
643
|
+
connection.peer,
|
|
644
|
+
header.peer_type(),
|
|
645
|
+
});
|
|
646
|
+
connection.recv_buffer.?.invalidate(.misdirected);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
},
|
|
650
|
+
// The client connects only to replicas and should set peer when connecting:
|
|
651
|
+
.client => assert(connection.peer == .replica),
|
|
652
|
+
}
|
|
653
|
+
bus.recv_buffer_drain(connection);
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
fn recv_update_peer(bus: *MessageBus, connection: *Connection, peer: vsr.Peer) bool {
|
|
657
|
+
assert(bus.process == .replica);
|
|
658
|
+
assert(bus.clients.capacity() > 0);
|
|
659
|
+
|
|
660
|
+
assert(bus.connections_used > 0);
|
|
661
|
+
|
|
662
|
+
assert(connection.state == .connected);
|
|
663
|
+
assert(connection.fd != null);
|
|
664
|
+
assert(connection.recv_buffer != null);
|
|
665
|
+
|
|
666
|
+
switch (vsr.Peer.transition(connection.peer, peer)) {
|
|
667
|
+
.retain => return true,
|
|
668
|
+
.reject => return false,
|
|
669
|
+
.update => {},
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
switch (peer) {
|
|
673
|
+
.replica => |replica_index| {
|
|
674
|
+
if (replica_index >= bus.replicas_addresses.len) return false;
|
|
675
|
+
|
|
676
|
+
// Allowed transitions:
|
|
677
|
+
// * unknown → replica
|
|
678
|
+
// * client_likely → replica
|
|
679
|
+
assert(connection.peer == .unknown or connection.peer == .client_likely);
|
|
680
|
+
|
|
681
|
+
// If there is a connection to this replica, terminate and replace it.
|
|
682
|
+
if (bus.replicas[replica_index]) |old| {
|
|
683
|
+
assert(old != connection);
|
|
684
|
+
assert(old.peer == .replica);
|
|
685
|
+
assert(old.peer.replica == replica_index);
|
|
686
|
+
assert(old.state != .free);
|
|
687
|
+
if (old.state != .terminating) bus.terminate(old, .shutdown);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
switch (connection.peer) {
|
|
691
|
+
.unknown => {},
|
|
692
|
+
// If this connection was misclassified to a client due to a forwarded
|
|
693
|
+
// request message (see `peer_type` in message_header.zig), it may
|
|
694
|
+
// reside in the clients map. If so, it must be popped and mapped to the
|
|
695
|
+
// correct replica.
|
|
696
|
+
.client_likely => |existing| {
|
|
697
|
+
if (bus.clients.get(existing)) |existing_connection| {
|
|
698
|
+
if (existing_connection == connection) {
|
|
699
|
+
assert(bus.clients.remove(existing));
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
},
|
|
703
|
+
.replica, .client => unreachable,
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
bus.replicas[replica_index] = connection;
|
|
707
|
+
log.info("{}: set_and_verify_peer: connection from replica={}", .{
|
|
708
|
+
bus.id,
|
|
709
|
+
replica_index,
|
|
710
|
+
});
|
|
711
|
+
},
|
|
712
|
+
.client => |client_id| {
|
|
713
|
+
assert(client_id != 0);
|
|
714
|
+
|
|
715
|
+
// Allowed transitions:
|
|
716
|
+
// * unknown → client
|
|
717
|
+
// * client_likely → client
|
|
718
|
+
assert(connection.peer == .unknown or connection.peer == .client_likely);
|
|
719
|
+
|
|
720
|
+
// If there is a connection to this client, terminate and replace it.
|
|
721
|
+
const result = bus.clients.getOrPutAssumeCapacity(client_id);
|
|
722
|
+
if (result.found_existing) {
|
|
723
|
+
const old = result.value_ptr.*;
|
|
724
|
+
assert(old.state == .connected or old.state == .terminating);
|
|
725
|
+
if (connection.peer == .unknown) assert(old != connection);
|
|
726
|
+
|
|
727
|
+
switch (old.peer) {
|
|
728
|
+
.client, .client_likely => |client| {
|
|
729
|
+
assert(client == client_id);
|
|
730
|
+
},
|
|
731
|
+
.unknown, .replica => unreachable,
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
if (old != connection and old.state != .terminating) {
|
|
735
|
+
bus.terminate(old, .shutdown);
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
result.value_ptr.* = connection;
|
|
740
|
+
log.info("{}: set_and_verify_peer connection from client={}", .{
|
|
741
|
+
bus.id,
|
|
742
|
+
client_id,
|
|
743
|
+
});
|
|
744
|
+
},
|
|
745
|
+
|
|
746
|
+
.client_likely => |client_id| {
|
|
747
|
+
assert(client_id != 0);
|
|
748
|
+
switch (connection.peer) {
|
|
749
|
+
.unknown => {
|
|
750
|
+
// If the peer transitions from unknown -> client_likely, either
|
|
751
|
+
// a replica or a client may be sending a request message. Instead
|
|
752
|
+
// of terminating an existing connection and replacing it, if one
|
|
753
|
+
// exists in the client map, we wait for it to get resolved to
|
|
754
|
+
// either a replica or a client.
|
|
755
|
+
const result =
|
|
756
|
+
bus.clients.getOrPutAssumeCapacity(client_id);
|
|
757
|
+
if (!result.found_existing) {
|
|
758
|
+
result.value_ptr.* = connection;
|
|
759
|
+
log.info("{}: set_and_verify_peer connection from " ++
|
|
760
|
+
"client_likely={}", .{ bus.id, client_id });
|
|
761
|
+
}
|
|
762
|
+
},
|
|
763
|
+
.replica, .client, .client_likely => unreachable,
|
|
764
|
+
}
|
|
765
|
+
},
|
|
766
|
+
.unknown => {},
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
connection.peer = peer;
|
|
770
|
+
|
|
771
|
+
return true;
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
/// Attempt moving messages from recv buffer into replcia for processing. Called when recv
|
|
775
|
+
/// syscall completes, or when a replica signals readiness to consume previously suspended
|
|
776
|
+
/// messages.
|
|
777
|
+
fn recv_buffer_drain(bus: *MessageBus, connection: *Connection) void {
|
|
778
|
+
assert(connection.recv_buffer != null);
|
|
779
|
+
|
|
780
|
+
if (connection.recv_buffer.?.has_message()) {
|
|
781
|
+
bus.on_messages_callback(bus, &connection.recv_buffer.?);
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
if (connection.recv_buffer.?.invalid) |reason| {
|
|
785
|
+
log.warn("{}: on_recv: from={} terminating connection: invalid {s}", .{
|
|
786
|
+
bus.id,
|
|
787
|
+
connection.peer,
|
|
788
|
+
@tagName(reason),
|
|
789
|
+
});
|
|
790
|
+
bus.terminate(connection, .no_shutdown);
|
|
791
|
+
return;
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
if (connection.recv_buffer.?.has_message()) {
|
|
795
|
+
maybe(connection.state == .terminating);
|
|
796
|
+
bus.connections_suspended.push(connection);
|
|
797
|
+
} else {
|
|
798
|
+
if (connection.state == .terminating) {
|
|
799
|
+
bus.terminate_join(connection);
|
|
800
|
+
} else {
|
|
801
|
+
bus.recv(connection);
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
pub fn send_message_to_replica(bus: *MessageBus, replica: u8, message: *Message) void {
|
|
807
|
+
// Messages sent by a replica to itself should never be passed to the message bus.
|
|
808
|
+
if (bus.process == .replica) assert(replica != bus.process.replica);
|
|
809
|
+
|
|
810
|
+
if (bus.replicas[replica]) |connection| {
|
|
811
|
+
bus.send_message(connection, message);
|
|
812
|
+
} else {
|
|
813
|
+
log.debug("{}: send_message_to_replica: no connection to={} header={}", .{
|
|
814
|
+
bus.id,
|
|
815
|
+
replica,
|
|
816
|
+
message.header,
|
|
817
|
+
});
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
/// Try to send the message to the client with the given id.
|
|
822
|
+
/// If the client is not currently connected, the message is silently dropped.
|
|
823
|
+
pub fn send_message_to_client(bus: *MessageBus, client_id: u128, message: *Message) void {
|
|
824
|
+
assert(bus.process == .replica);
|
|
825
|
+
assert(bus.clients.capacity() > 0);
|
|
826
|
+
|
|
827
|
+
if (bus.clients.get(client_id)) |connection| {
|
|
828
|
+
bus.send_message(connection, message);
|
|
829
|
+
} else {
|
|
830
|
+
log.debug(
|
|
831
|
+
"{}: send_message_to_client: no connection to={}",
|
|
832
|
+
.{ bus.id, client_id },
|
|
833
|
+
);
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
/// Add a message to the connection's send queue, starting a send operation
|
|
838
|
+
/// if the queue was previously empty.
|
|
839
|
+
fn send_message(bus: *MessageBus, connection: *Connection, message: *Message) void {
|
|
840
|
+
assert(connection.peer != .unknown);
|
|
841
|
+
|
|
842
|
+
switch (connection.state) {
|
|
843
|
+
.connected, .connecting => {},
|
|
844
|
+
.terminating => return,
|
|
845
|
+
.free, .accepting => unreachable,
|
|
846
|
+
}
|
|
847
|
+
if (connection.send_queue.full()) {
|
|
848
|
+
log.info("{}: send_message: to={} queue full, dropping command={s}", .{
|
|
849
|
+
bus.id,
|
|
850
|
+
connection.peer,
|
|
851
|
+
@tagName(message.header.command),
|
|
852
|
+
});
|
|
853
|
+
return;
|
|
854
|
+
}
|
|
855
|
+
connection.send_queue.push_assume_capacity(message.ref());
|
|
856
|
+
// If the connection has not yet been established we can't send yet.
|
|
857
|
+
// Instead on_connect() will call send().
|
|
858
|
+
if (connection.state == .connecting) {
|
|
859
|
+
assert(connection.peer == .replica);
|
|
860
|
+
return;
|
|
861
|
+
}
|
|
862
|
+
// If there is no send operation currently in progress, start one.
|
|
863
|
+
if (!connection.send_submitted) bus.send(connection);
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
/// Send loop.
|
|
867
|
+
///
|
|
868
|
+
/// Kickstarted by `connect` and loops onto itself until all enqueue messages are sent.
|
|
869
|
+
/// `accept` doesn't start the send loop because it doesn't know the identity of the peer.
|
|
870
|
+
fn send(bus: *MessageBus, connection: *Connection) void {
|
|
871
|
+
assert(connection.peer != .unknown);
|
|
872
|
+
assert(connection.state == .connected);
|
|
873
|
+
assert(connection.fd != null);
|
|
874
|
+
assert(!connection.send_submitted);
|
|
875
|
+
|
|
876
|
+
bus.send_now(connection);
|
|
877
|
+
|
|
878
|
+
const message = connection.send_queue.head() orelse
|
|
879
|
+
return; // Nothing more to send, break out of the send loop.
|
|
880
|
+
connection.send_submitted = true;
|
|
881
|
+
bus.io.send(
|
|
882
|
+
*MessageBus,
|
|
883
|
+
bus,
|
|
884
|
+
send_callback,
|
|
885
|
+
&connection.send_completion,
|
|
886
|
+
connection.fd.?,
|
|
887
|
+
message.buffer[connection.send_progress..message.header.size],
|
|
888
|
+
);
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
// Optimization/fast path: try to immediately copy the send queue over to the in-kernel
|
|
892
|
+
// send buffer, falling back to asynchronous send if that's not possible.
|
|
893
|
+
fn send_now(bus: *MessageBus, connection: *Connection) void {
|
|
894
|
+
assert(connection.state == .connected);
|
|
895
|
+
assert(connection.fd != null);
|
|
896
|
+
assert(!connection.send_submitted);
|
|
897
|
+
|
|
898
|
+
for (0..connection.send_queue.count) |_| {
|
|
899
|
+
const message = connection.send_queue.head().?;
|
|
900
|
+
assert(connection.send_progress < message.header.size);
|
|
901
|
+
const write_size = bus.io.send_now(
|
|
902
|
+
connection.fd.?,
|
|
903
|
+
message.buffer[connection.send_progress..message.header.size],
|
|
904
|
+
) orelse return;
|
|
905
|
+
assert(write_size <= constants.message_size_max);
|
|
906
|
+
connection.send_progress += @intCast(write_size);
|
|
907
|
+
assert(connection.send_progress <= message.header.size);
|
|
908
|
+
if (connection.send_progress == message.header.size) {
|
|
909
|
+
_ = connection.send_queue.pop();
|
|
910
|
+
bus.unref(message);
|
|
911
|
+
connection.send_progress = 0;
|
|
912
|
+
} else {
|
|
913
|
+
assert(connection.send_progress < message.header.size);
|
|
914
|
+
return;
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
fn send_callback(
|
|
920
|
+
bus: *MessageBus,
|
|
921
|
+
completion: *IO.Completion,
|
|
922
|
+
result: IO.SendError!usize,
|
|
923
|
+
) void {
|
|
924
|
+
const connection: *Connection = @alignCast(
|
|
925
|
+
@fieldParentPtr("send_completion", completion),
|
|
926
|
+
);
|
|
927
|
+
assert(connection.send_submitted);
|
|
928
|
+
connection.send_submitted = false;
|
|
929
|
+
assert(connection.peer != .unknown);
|
|
930
|
+
if (connection.state == .terminating) {
|
|
931
|
+
bus.terminate_join(connection);
|
|
932
|
+
return;
|
|
933
|
+
}
|
|
934
|
+
assert(connection.state == .connected);
|
|
935
|
+
const write_size = result catch |err| {
|
|
936
|
+
// TODO: maybe don't need to close on *every* error
|
|
937
|
+
log.warn("{}: on_send: to={} {}", .{
|
|
938
|
+
bus.id,
|
|
939
|
+
connection.peer,
|
|
940
|
+
err,
|
|
941
|
+
});
|
|
942
|
+
bus.terminate(connection, .shutdown);
|
|
943
|
+
return;
|
|
944
|
+
};
|
|
945
|
+
assert(write_size <= constants.message_size_max);
|
|
946
|
+
connection.send_progress += @intCast(write_size);
|
|
947
|
+
assert(connection.send_progress <= connection.send_queue.head().?.header.size);
|
|
948
|
+
// If the message has been fully sent, move on to the next one.
|
|
949
|
+
if (connection.send_progress == connection.send_queue.head().?.header.size) {
|
|
950
|
+
connection.send_progress = 0;
|
|
951
|
+
const message = connection.send_queue.pop().?;
|
|
952
|
+
bus.unref(message);
|
|
953
|
+
}
|
|
954
|
+
bus.send(connection);
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
/// Clean up an active connection and reset it to its initial, unused, state.
|
|
958
|
+
/// This reset does not happen instantly as currently in progress operations
|
|
959
|
+
/// must first be stopped. The `how` arg allows the caller to specify if a
|
|
960
|
+
/// shutdown syscall should be made or not before proceeding to wait for
|
|
961
|
+
/// currently in progress operations to complete and close the socket.
|
|
962
|
+
/// I'll be back! (when the Connection is reused after being fully closed)
|
|
963
|
+
fn terminate(
|
|
964
|
+
bus: *MessageBus,
|
|
965
|
+
connection: *Connection,
|
|
966
|
+
how: enum { shutdown, no_shutdown },
|
|
967
|
+
) void {
|
|
968
|
+
assert(connection.state != .free);
|
|
969
|
+
assert(connection.fd != null);
|
|
970
|
+
switch (how) {
|
|
971
|
+
.shutdown => {
|
|
972
|
+
// The shutdown syscall will cause currently in progress send/recv
|
|
973
|
+
// operations to be gracefully closed while keeping the fd open.
|
|
974
|
+
//
|
|
975
|
+
// TODO: Investigate differences between shutdown() on Linux vs Darwin.
|
|
976
|
+
// Especially how this interacts with our assumptions around pending I/O.
|
|
977
|
+
bus.io.shutdown(connection.fd.?, .both) catch |err| switch (err) {
|
|
978
|
+
error.SocketNotConnected => {
|
|
979
|
+
// This should only happen if we for some reason decide to terminate
|
|
980
|
+
// a connection while a connect operation is in progress.
|
|
981
|
+
// This is fine though, we simply continue with the logic below and
|
|
982
|
+
// wait for the connect operation to finish.
|
|
983
|
+
|
|
984
|
+
// TODO: This currently happens in other cases if the
|
|
985
|
+
// connection was closed due to an error. We need to intelligently
|
|
986
|
+
// decide whether to shutdown or close directly based on the error
|
|
987
|
+
// before these assertions may be re-enabled.
|
|
988
|
+
|
|
989
|
+
//assert(connection.state == .connecting);
|
|
990
|
+
//assert(connection.recv_submitted);
|
|
991
|
+
//assert(!connection.send_submitted);
|
|
992
|
+
},
|
|
993
|
+
// Ignore all the remaining errors for now
|
|
994
|
+
error.ConnectionAborted,
|
|
995
|
+
error.ConnectionResetByPeer,
|
|
996
|
+
error.BlockingOperationInProgress,
|
|
997
|
+
error.NetworkSubsystemFailed,
|
|
998
|
+
error.SystemResources,
|
|
999
|
+
error.Unexpected,
|
|
1000
|
+
=> {},
|
|
1001
|
+
};
|
|
1002
|
+
},
|
|
1003
|
+
.no_shutdown => {},
|
|
1004
|
+
}
|
|
1005
|
+
assert(connection.state != .terminating);
|
|
1006
|
+
connection.state = .terminating;
|
|
1007
|
+
bus.terminate_join(connection);
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
fn terminate_join(bus: *MessageBus, connection: *Connection) void {
|
|
1011
|
+
assert(connection.state == .terminating);
|
|
1012
|
+
// If a recv or send operation is currently submitted to the kernel,
|
|
1013
|
+
// submitting a close would cause a race. Therefore we must wait for
|
|
1014
|
+
// any currently submitted operation to complete.
|
|
1015
|
+
if (connection.recv_submitted or connection.send_submitted) return;
|
|
1016
|
+
// Even if there's no active physical IO in progress, we want to wait until all
|
|
1017
|
+
// messages already received are consumed, to prevent graceful termination of
|
|
1018
|
+
// connection from dropping messages.
|
|
1019
|
+
if (connection.recv_buffer) |*receive_buffer| {
|
|
1020
|
+
if (receive_buffer.has_message()) return;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
bus.terminate_close(connection);
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
fn terminate_close(bus: *MessageBus, connection: *Connection) void {
|
|
1027
|
+
assert(connection.state == .terminating);
|
|
1028
|
+
assert(!connection.recv_submitted);
|
|
1029
|
+
assert(!connection.send_submitted);
|
|
1030
|
+
if (connection.recv_buffer) |receive_buffer| assert(!receive_buffer.has_message());
|
|
1031
|
+
assert(connection.fd != null);
|
|
1032
|
+
|
|
1033
|
+
connection.send_submitted = true;
|
|
1034
|
+
connection.recv_submitted = true;
|
|
1035
|
+
// We can free resources now that there is no longer any I/O in progress.
|
|
1036
|
+
while (connection.send_queue.pop()) |message| {
|
|
1037
|
+
bus.unref(message);
|
|
1038
|
+
}
|
|
1039
|
+
if (connection.recv_buffer) |*buffer| buffer.deinit(bus.pool);
|
|
1040
|
+
connection.recv_buffer = null;
|
|
1041
|
+
const fd = connection.fd.?;
|
|
1042
|
+
connection.fd = null;
|
|
1043
|
+
// It's OK to use the send completion here as we know that no send
|
|
1044
|
+
// operation is currently in progress.
|
|
1045
|
+
bus.io.close(
|
|
1046
|
+
*MessageBus,
|
|
1047
|
+
bus,
|
|
1048
|
+
terminate_close_callback,
|
|
1049
|
+
&connection.send_completion,
|
|
1050
|
+
fd,
|
|
1051
|
+
);
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
fn terminate_close_callback(
|
|
1055
|
+
bus: *MessageBus,
|
|
1056
|
+
completion: *IO.Completion,
|
|
1057
|
+
result: IO.CloseError!void,
|
|
1058
|
+
) void {
|
|
1059
|
+
const connection: *Connection = @alignCast(
|
|
1060
|
+
@fieldParentPtr("send_completion", completion),
|
|
1061
|
+
);
|
|
1062
|
+
assert(connection.state == .terminating);
|
|
1063
|
+
assert(connection.recv_submitted);
|
|
1064
|
+
assert(connection.send_submitted);
|
|
1065
|
+
assert(connection.recv_buffer == null);
|
|
1066
|
+
assert(connection.send_queue.empty());
|
|
1067
|
+
assert(connection.fd == null);
|
|
1068
|
+
|
|
1069
|
+
result catch |err| {
|
|
1070
|
+
log.warn("{}: on_close: to={} {}", .{ bus.id, connection.peer, err });
|
|
1071
|
+
};
|
|
1072
|
+
|
|
1073
|
+
// Reset the connection to its initial state.
|
|
1074
|
+
switch (connection.peer) {
|
|
1075
|
+
.unknown => {},
|
|
1076
|
+
.client, .client_likely => |client_id| {
|
|
1077
|
+
assert(bus.process == .replica);
|
|
1078
|
+
// A newer client connection may have replaced this one:
|
|
1079
|
+
if (bus.clients.get(client_id)) |existing_connection| {
|
|
1080
|
+
if (existing_connection == connection) {
|
|
1081
|
+
assert(bus.clients.remove(client_id));
|
|
1082
|
+
}
|
|
1083
|
+
} else {
|
|
1084
|
+
// A newer client connection may even leapfrog this connection
|
|
1085
|
+
// and then be terminated and set to null before we can get
|
|
1086
|
+
// here.
|
|
1087
|
+
}
|
|
1088
|
+
},
|
|
1089
|
+
.replica => |replica| {
|
|
1090
|
+
// A newer replica connection may have replaced this one:
|
|
1091
|
+
if (bus.replicas[replica] == connection) {
|
|
1092
|
+
bus.replicas[replica] = null;
|
|
1093
|
+
} else {
|
|
1094
|
+
// A newer replica connection may even leapfrog this connection and
|
|
1095
|
+
// then be terminated and set to null before we can get here:
|
|
1096
|
+
stdx.maybe(bus.replicas[replica] == null);
|
|
1097
|
+
}
|
|
1098
|
+
},
|
|
1099
|
+
}
|
|
1100
|
+
bus.connections_used -= 1;
|
|
1101
|
+
connection.* = .{
|
|
1102
|
+
.send_queue = .{
|
|
1103
|
+
.buffer = connection.send_queue.buffer,
|
|
1104
|
+
},
|
|
1105
|
+
};
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
pub fn get_message(
|
|
1109
|
+
bus: *MessageBus,
|
|
1110
|
+
comptime command: ?vsr.Command,
|
|
1111
|
+
) MessagePool.GetMessageType(command) {
|
|
1112
|
+
return bus.pool.get_message(command);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
/// `@TypeOf(message)` is one of:
|
|
1116
|
+
/// - `*Message`
|
|
1117
|
+
/// - `MessageType(command)` for any `command`.
|
|
1118
|
+
pub fn unref(bus: *MessageBus, message: anytype) void {
|
|
1119
|
+
bus.pool.unref(message);
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
pub fn resume_needed(bus: *MessageBus) bool {
|
|
1123
|
+
if (bus.connections_suspended.empty()) return false;
|
|
1124
|
+
if (bus.resume_receive_submitted) return false;
|
|
1125
|
+
return true;
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
pub fn resume_receive(bus: *MessageBus) void {
|
|
1129
|
+
if (!bus.resume_needed()) return;
|
|
1130
|
+
|
|
1131
|
+
bus.resume_receive_submitted = true;
|
|
1132
|
+
bus.io.timeout(
|
|
1133
|
+
*MessageBus,
|
|
1134
|
+
bus,
|
|
1135
|
+
ready_to_receive_callback,
|
|
1136
|
+
&bus.resume_receive_completion,
|
|
1137
|
+
0, // Zero timeout means next tick.
|
|
1138
|
+
);
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
fn ready_to_receive_callback(
|
|
1142
|
+
bus: *MessageBus,
|
|
1143
|
+
completion: *IO.Completion,
|
|
1144
|
+
result: IO.TimeoutError!void,
|
|
1145
|
+
) void {
|
|
1146
|
+
assert(completion == &bus.resume_receive_completion);
|
|
1147
|
+
_ = result catch |e| switch (e) {
|
|
1148
|
+
error.Canceled => unreachable,
|
|
1149
|
+
error.Unexpected => unreachable,
|
|
1150
|
+
};
|
|
1151
|
+
assert(bus.resume_receive_submitted);
|
|
1152
|
+
bus.resume_receive_submitted = false;
|
|
1153
|
+
maybe(bus.connections_suspended.empty());
|
|
1154
|
+
|
|
1155
|
+
// Steal the queue to avoid an infinite loop.
|
|
1156
|
+
var connections_suspended = bus.connections_suspended;
|
|
1157
|
+
bus.connections_suspended.reset();
|
|
1158
|
+
|
|
1159
|
+
while (connections_suspended.pop()) |connection| {
|
|
1160
|
+
assert(connection.recv_buffer != null);
|
|
1161
|
+
assert(connection.recv_buffer.?.advance_size >= @sizeOf(vsr.Header));
|
|
1162
|
+
assert(connection.recv_buffer.?.has_message());
|
|
1163
|
+
bus.recv_buffer_drain(connection);
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
/// Used to send/receive messages to/from a client or fellow replica.
|
|
1168
|
+
const Connection = struct {
|
|
1169
|
+
/// The peer is determined by inspecting all message headers received on this
|
|
1170
|
+
/// connection. If the peer changes unexpectedly (for example, due to a misdirected
|
|
1171
|
+
/// message), we terminate the connection.
|
|
1172
|
+
peer: vsr.Peer = .unknown,
|
|
1173
|
+
|
|
1174
|
+
state: enum {
|
|
1175
|
+
/// The connection is not in use, with peer set to `.unknown`.
|
|
1176
|
+
free,
|
|
1177
|
+
/// The connection has been reserved for an in progress accept operation,
|
|
1178
|
+
/// with peer set to `.unknown`.
|
|
1179
|
+
accepting,
|
|
1180
|
+
/// The peer is a replica and a connect operation has been started
|
|
1181
|
+
/// but not yet completed.
|
|
1182
|
+
connecting,
|
|
1183
|
+
/// The peer is fully connected and may be a client, replica, or unknown.
|
|
1184
|
+
connected,
|
|
1185
|
+
/// The connection is being terminated but cleanup has not yet finished.
|
|
1186
|
+
terminating,
|
|
1187
|
+
} = .free,
|
|
1188
|
+
/// This is guaranteed to be valid only while state is connected.
|
|
1189
|
+
/// It will be reset to null during the shutdown process and is always null if the
|
|
1190
|
+
/// connection is unused (i.e. peer == .unknown).
|
|
1191
|
+
fd: ?IO.socket_t = null,
|
|
1192
|
+
|
|
1193
|
+
/// This completion is used for all recv operations.
|
|
1194
|
+
/// It is also used for the initial connect when establishing a replica connection.
|
|
1195
|
+
recv_completion: IO.Completion = undefined,
|
|
1196
|
+
/// True exactly when the recv_completion has been submitted to the IO abstraction
|
|
1197
|
+
/// but the callback has not yet been run.
|
|
1198
|
+
recv_submitted: bool = false,
|
|
1199
|
+
recv_buffer: ?MessageBuffer = null,
|
|
1200
|
+
|
|
1201
|
+
/// This completion is used for all send operations.
|
|
1202
|
+
send_completion: IO.Completion = undefined,
|
|
1203
|
+
/// True exactly when the send_completion has been submitted to the IO abstraction
|
|
1204
|
+
/// but the callback has not yet been run.
|
|
1205
|
+
send_submitted: bool = false,
|
|
1206
|
+
/// Number of bytes of the current message that have already been sent.
|
|
1207
|
+
send_progress: u32 = 0,
|
|
1208
|
+
/// The queue of messages to send to the client or replica peer.
|
|
1209
|
+
send_queue: SendQueue,
|
|
1210
|
+
/// For connections_suspended.
|
|
1211
|
+
link: QueueType(Connection).Link = .{},
|
|
1212
|
+
};
|
|
1213
|
+
};
|
|
1214
|
+
}
|