tigerbeetle 0.0.36 → 0.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/tb_client/extconf.rb +13 -13
- data/ext/tb_client/tigerbeetle/LICENSE +177 -0
- data/ext/tb_client/tigerbeetle/build.zig +2327 -0
- data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
- data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
- data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1092 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
- data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
- data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
- data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
- data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
- data/ext/tb_client/tigerbeetle/src/direction.zig +120 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
- data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
- data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
- data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
- data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
- data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
- data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
- data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
- data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
- data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
- data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
- data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
- data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
- data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
- data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +359 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +962 -0
- data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
- data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
- data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +534 -0
- data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
- data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
- data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
- data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
- data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
- data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
- data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
- data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
- data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
- data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
- data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
- data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
- data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
- data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
- data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
- data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
- data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
- data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
- data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
- data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
- data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
- data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
- data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
- data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
- data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
- data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
- data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
- data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
- data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
- data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
- data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
- data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
- data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
- data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
- data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
- data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
- data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
- data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
- data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
- data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
- data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
- data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
- data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
- data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
- data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
- data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
- data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
- data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
- data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
- data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
- data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
- data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
- data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
- data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
- data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
- data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
- data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2928 -0
- data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
- data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
- data/lib/tb_client/shared_lib.rb +12 -5
- data/lib/tigerbeetle/platforms.rb +9 -0
- data/lib/tigerbeetle/version.rb +2 -2
- data/tigerbeetle.gemspec +22 -5
- metadata +242 -3
- data/ext/tb_client/pkg.tar.gz +0 -0
|
@@ -0,0 +1,1019 @@
|
|
|
1
|
+
//! Cluster-wide synchronized clock, aggregating timing information from all replicas.
|
|
2
|
+
//!
|
|
3
|
+
//! Time plays a central role in TigerBeetle data model. Because it is so important, TigerBeetle
|
|
4
|
+
//! defines its own time. In other words, we don't use time to drive consensus, we use consensus to
|
|
5
|
+
//! drive time!
|
|
6
|
+
//!
|
|
7
|
+
//! Time is important for the domain of accounting (e.g., pending transfers can expire with time),
|
|
8
|
+
//! but it can't be supplied by the client, as its clock can be unreliable. For this reason,
|
|
9
|
+
//! TigerBeetle needs to expose a "time service" to the state machine logic.
|
|
10
|
+
//!
|
|
11
|
+
//! Additionally, TigerBeetle needs to assign some kind of a sequence number to every event in the
|
|
12
|
+
//! system, to make it easy to say whether A happened before B or vice versa.
|
|
13
|
+
//!
|
|
14
|
+
//! Finally, to maintain indices, the LSM tree could benefit from a compact synthetic primary key.
|
|
15
|
+
//!
|
|
16
|
+
//! Time solves _all_ of these problems at once: each object in TigerBeetle gets tagged with a u64
|
|
17
|
+
//! nanosecond-precision creation timestamp. These timestamps are unique across all objects (an
|
|
18
|
+
//! Account and a Transfer can never have the same timestamp), consistent with linearization order
|
|
19
|
+
//! of the events (earlier events get smaller timestamps), and closely match the real wall-clock
|
|
20
|
+
//! time. Timestamps are used as internal synthetic primary keys instead of user-supplied random
|
|
21
|
+
//! u128 ids because they are smaller and also expose temporal locality.
|
|
22
|
+
//!
|
|
23
|
+
//! Implementation:
|
|
24
|
+
//!
|
|
25
|
+
//! The ultimate source of timestamps is each replica's operating system. This time is backed by a
|
|
26
|
+
//! replica-local drifty hardware clock which is periodically synchronized through NTP with high
|
|
27
|
+
//! quality clocks elsewhere. Using system time directly as a source of TigerBeetle timestamps
|
|
28
|
+
//! doesn't work:
|
|
29
|
+
//!
|
|
30
|
+
//! First, system time differs across replicas. To solve this problem, only the primary assigns
|
|
31
|
+
//! timestamps. Specifically, when the primary converts a request to a prepare, it assigns its
|
|
32
|
+
//! current time to the prepare. The state machine then assigns `prepare_timestamp + object_index`
|
|
33
|
+
//! as the creation timestamp for each object in a batch.
|
|
34
|
+
//!
|
|
35
|
+
//! Second, system time is not monotonic: due to NTP it can easily go backwards. To solve this
|
|
36
|
+
//! problem, the primary just takes the max between the current time and the previous timestamp
|
|
37
|
+
//! used. Notably, this ends up preserving monotonicity across restarts --- it is when replaying
|
|
38
|
+
//! past prepares from the WAL that a replica learns about the latest timestamp before restart.
|
|
39
|
+
//!
|
|
40
|
+
//! Third, replica's system time lacks high availability: if a primary is isolated from NTP servers
|
|
41
|
+
//! its local clock can drift significantly. Another problematic scenario is an operator error
|
|
42
|
+
//! which incorrectly adjusts primary's local clock to be far in the future, which, due to
|
|
43
|
+
//! monotonicity requirement, could render the cluster completely unusable.
|
|
44
|
+
//!
|
|
45
|
+
//! To solve the last problem, the primary aggregates clock information from the entire cluster and
|
|
46
|
+
//! calculates a timestamp value which is consistent with clocks on at least half of the replicas.
|
|
47
|
+
//!
|
|
48
|
+
//! Sketch of the algorithm:
|
|
49
|
+
//!
|
|
50
|
+
//! Assume you have six different clocks. Each clock shows a different time. Most are close, but
|
|
51
|
+
//! there could be outliers. How do you estimate the "true" time?
|
|
52
|
+
//!
|
|
53
|
+
//! The key insight is to think in intervals, rather than points. If a clock shows time t and
|
|
54
|
+
//! claims error margin Δ, it means the true time is in the [t-Δ;t+Δ] interval. If you have two
|
|
55
|
+
//! clocks, you can intersect their intervals to narrow down the true time interval. If the
|
|
56
|
+
//! intervals are disjoint, that means that at least one of the clocks is malfunctioning. This gives
|
|
57
|
+
//! an algorithm for identifying cluster time --- collect clock measurements from all replicas
|
|
58
|
+
//! together with the respective error margins and find an interval which is consistent with at
|
|
59
|
+
//! least half of the clocks.
|
|
60
|
+
//!
|
|
61
|
+
//! The first problem with the above plan is that clocks' error margins are not known. To solve
|
|
62
|
+
//! this, flip the problem around and find the smallest error margin that still allows for half of
|
|
63
|
+
//! the clocks' intervals to intersect. If this minimal error margin still ends up too large,
|
|
64
|
+
//! declare that the clocks are unsynchronized and wait for NTP to fix things up.
|
|
65
|
+
//!
|
|
66
|
+
//! The second problem with the plan is that a replica can only read its own clock. To learn other
|
|
67
|
+
//! replica's clock, the following algorithm is used:
|
|
68
|
+
//!
|
|
69
|
+
//! - A sends a ping message to B, including A's current time.
|
|
70
|
+
//! - B replies with a pong message, which includes a copy of the original ping timestamp, as well
|
|
71
|
+
//! as B's current time.
|
|
72
|
+
//! - When A receives a pong, it uses the attached ping time to estimate the network delay and infer
|
|
73
|
+
//! the clock offset from that.
|
|
74
|
+
//!
|
|
75
|
+
//! Further reading:
|
|
76
|
+
//!
|
|
77
|
+
//! [Three Clocks are Better than One](https://tigerbeetle.com/blog/2021-08-30-three-clocks-are-better-than-one)
|
|
78
|
+
//!
|
|
79
|
+
//! And watching:
|
|
80
|
+
//!
|
|
81
|
+
//! [Detecting Clock Sync Failure in Highly Available Systems](https://youtu.be/7R-Iz6sJG6Q?si=9sD2TpfD29AxUjOY)
|
|
82
|
+
const std = @import("std");
|
|
83
|
+
const assert = std.debug.assert;
|
|
84
|
+
const fmt = std.fmt;
|
|
85
|
+
|
|
86
|
+
const stdx = @import("stdx");
|
|
87
|
+
const log = stdx.log.scoped(.clock);
|
|
88
|
+
const constants = @import("../constants.zig");
|
|
89
|
+
const ratio = stdx.PRNG.ratio;
|
|
90
|
+
const Instant = stdx.Instant;
|
|
91
|
+
const Time = @import("../time.zig").Time;
|
|
92
|
+
const TimeSim = @import("../testing/time.zig").TimeSim;
|
|
93
|
+
const Tracer = @import("../trace.zig").Tracer;
|
|
94
|
+
|
|
95
|
+
const clock_offset_tolerance_max: u64 =
|
|
96
|
+
constants.clock_offset_tolerance_max_ms * std.time.ns_per_ms;
|
|
97
|
+
const epoch_max: u64 = constants.clock_epoch_max_ms * std.time.ns_per_ms;
|
|
98
|
+
const window_min: u64 = constants.clock_synchronization_window_min_ms * std.time.ns_per_ms;
|
|
99
|
+
const window_max: u64 = constants.clock_synchronization_window_max_ms * std.time.ns_per_ms;
|
|
100
|
+
|
|
101
|
+
const Marzullo = @import("marzullo.zig").Marzullo;
|
|
102
|
+
|
|
103
|
+
pub const Clock = @This();
|
|
104
|
+
|
|
105
|
+
const Sample = struct {
|
|
106
|
+
/// The relative difference between our wall clock reading and that of the remote clock source.
|
|
107
|
+
clock_offset: i64,
|
|
108
|
+
one_way_delay: u64,
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
const Epoch = struct {
|
|
112
|
+
/// The best clock offset sample per remote clock source (with minimum one way delay) collected
|
|
113
|
+
/// over the course of a window period of several seconds.
|
|
114
|
+
sources: []?Sample,
|
|
115
|
+
|
|
116
|
+
/// The total number of samples learned while synchronizing this epoch.
|
|
117
|
+
samples: usize,
|
|
118
|
+
|
|
119
|
+
/// The monotonic clock timestamp when this epoch began. We use this to measure elapsed time.
|
|
120
|
+
monotonic: u64,
|
|
121
|
+
|
|
122
|
+
/// The wall clock timestamp when this epoch began. We add the elapsed monotonic time to this
|
|
123
|
+
/// plus the synchronized clock offset to arrive at a synchronized realtime timestamp. We
|
|
124
|
+
/// capture this realtime when starting the epoch, before we take any samples, to guard against
|
|
125
|
+
/// any jumps in the system's realtime clock from impacting our measurements.
|
|
126
|
+
realtime: i64,
|
|
127
|
+
|
|
128
|
+
/// Once we have enough source clock offset samples in agreement, the epoch is / synchronized.
|
|
129
|
+
/// We then have lower and upper bounds on the true cluster time, and can / install this epoch
|
|
130
|
+
/// for subsequent clock readings. This epoch is then valid for / several seconds, while clock
|
|
131
|
+
/// drift has not had enough time to accumulate into any / significant clock skew, and while we
|
|
132
|
+
/// collect samples for the next epoch to refresh / and replace this one.
|
|
133
|
+
synchronized: ?Marzullo.Interval,
|
|
134
|
+
|
|
135
|
+
/// A guard to prevent synchronizing too often without having learned any new samples.
|
|
136
|
+
learned: bool = false,
|
|
137
|
+
|
|
138
|
+
fn elapsed(epoch: *Epoch, clock: *Clock) u64 {
|
|
139
|
+
return clock.monotonic().ns - epoch.monotonic;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
fn reset(epoch: *Epoch, clock: *Clock) void {
|
|
143
|
+
@memset(epoch.sources, null);
|
|
144
|
+
// A replica always has zero clock offset and network delay to its own system time
|
|
145
|
+
// reading:
|
|
146
|
+
epoch.sources[clock.replica] = Sample{
|
|
147
|
+
.clock_offset = 0,
|
|
148
|
+
.one_way_delay = 0,
|
|
149
|
+
};
|
|
150
|
+
epoch.samples = 1;
|
|
151
|
+
epoch.monotonic = clock.monotonic().ns;
|
|
152
|
+
epoch.realtime = clock.realtime();
|
|
153
|
+
epoch.synchronized = null;
|
|
154
|
+
epoch.learned = false;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
fn sources_sampled(epoch: *Epoch) usize {
|
|
158
|
+
var count: usize = 0;
|
|
159
|
+
for (epoch.sources) |sampled| {
|
|
160
|
+
if (sampled != null) count += 1;
|
|
161
|
+
}
|
|
162
|
+
return count;
|
|
163
|
+
}
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
/// The index of the replica using this clock to provide synchronized time.
|
|
167
|
+
replica: u8,
|
|
168
|
+
/// Minimal number of distinct clock sources required for synchronization.
|
|
169
|
+
quorum: u8,
|
|
170
|
+
|
|
171
|
+
/// The underlying time source for this clock (system time or deterministic time).
|
|
172
|
+
time: Time,
|
|
173
|
+
|
|
174
|
+
/// An epoch from which the clock can read synchronized clock timestamps within safe bounds.
|
|
175
|
+
/// At least `constants.clock_synchronization_window_min_ms` is needed for this to be ready to use.
|
|
176
|
+
epoch: Epoch,
|
|
177
|
+
|
|
178
|
+
/// The next epoch (collecting samples and being synchronized) to replace the current epoch.
|
|
179
|
+
window: Epoch,
|
|
180
|
+
|
|
181
|
+
/// A static allocation to convert window samples into tuple bounds for Marzullo's
|
|
182
|
+
/// algorithm.
|
|
183
|
+
marzullo_tuples: []Marzullo.Tuple,
|
|
184
|
+
|
|
185
|
+
/// A kill switch to revert to unsynchronized realtime.
|
|
186
|
+
synchronization_disabled: bool,
|
|
187
|
+
|
|
188
|
+
trace: ?*Tracer,
|
|
189
|
+
|
|
190
|
+
pub fn init(
|
|
191
|
+
allocator: std.mem.Allocator,
|
|
192
|
+
time: Time,
|
|
193
|
+
tracer: ?*Tracer,
|
|
194
|
+
options: struct {
|
|
195
|
+
/// The size of the cluster, i.e. the number of clock sources (including this
|
|
196
|
+
/// replica).
|
|
197
|
+
replica_count: u8,
|
|
198
|
+
replica: u8,
|
|
199
|
+
quorum: u8,
|
|
200
|
+
},
|
|
201
|
+
) !Clock {
|
|
202
|
+
assert(options.replica_count > 0);
|
|
203
|
+
assert(options.replica < options.replica_count);
|
|
204
|
+
assert(options.quorum > 0);
|
|
205
|
+
assert(options.quorum <= options.replica_count);
|
|
206
|
+
if (options.replica_count > 1) assert(options.quorum > 1);
|
|
207
|
+
|
|
208
|
+
var epoch: Epoch = undefined;
|
|
209
|
+
epoch.sources = try allocator.alloc(?Sample, options.replica_count);
|
|
210
|
+
errdefer allocator.free(epoch.sources);
|
|
211
|
+
|
|
212
|
+
var window: Epoch = undefined;
|
|
213
|
+
window.sources = try allocator.alloc(?Sample, options.replica_count);
|
|
214
|
+
errdefer allocator.free(window.sources);
|
|
215
|
+
|
|
216
|
+
// There are two Marzullo tuple bounds (lower and upper) per source clock offset sample:
|
|
217
|
+
const marzullo_tuples = try allocator.alloc(Marzullo.Tuple, options.replica_count * 2);
|
|
218
|
+
errdefer allocator.free(marzullo_tuples);
|
|
219
|
+
|
|
220
|
+
var self = Clock{
|
|
221
|
+
.replica = options.replica,
|
|
222
|
+
.quorum = options.quorum,
|
|
223
|
+
.time = time,
|
|
224
|
+
.epoch = epoch,
|
|
225
|
+
.window = window,
|
|
226
|
+
.marzullo_tuples = marzullo_tuples,
|
|
227
|
+
// A cluster of one cannot synchronize.
|
|
228
|
+
.synchronization_disabled = options.replica_count == 1,
|
|
229
|
+
|
|
230
|
+
.trace = tracer,
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
// Reset the current epoch to be unsynchronized,
|
|
234
|
+
self.epoch.reset(&self);
|
|
235
|
+
// and open a new epoch window to start collecting samples...
|
|
236
|
+
self.window.reset(&self);
|
|
237
|
+
|
|
238
|
+
return self;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
pub fn deinit(self: *Clock, allocator: std.mem.Allocator) void {
|
|
242
|
+
allocator.free(self.epoch.sources);
|
|
243
|
+
allocator.free(self.window.sources);
|
|
244
|
+
allocator.free(self.marzullo_tuples);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/// Called by `Replica.on_pong()` with:
|
|
248
|
+
/// * the index of the `replica` that has replied to our ping with a pong,
|
|
249
|
+
/// * our monotonic timestamp `m0` embedded in the ping we sent, carried over into this pong,
|
|
250
|
+
/// * the remote replica's `realtime()` timestamp `t1`, and
|
|
251
|
+
/// * our monotonic timestamp `m2` as captured by our `Replica.on_pong()` handler.
|
|
252
|
+
pub fn learn(self: *Clock, replica: u8, m0: u64, t1: i64, m2: u64) void {
|
|
253
|
+
assert(replica != self.replica);
|
|
254
|
+
|
|
255
|
+
if (self.synchronization_disabled) return;
|
|
256
|
+
|
|
257
|
+
// Our m0 and m2 readings should always be monotonically increasing if not equal.
|
|
258
|
+
// Crucially, it is possible for a very fast network to have m0 == m2, especially where
|
|
259
|
+
// `constants.tick_ms` is at a more course granularity. We must therefore tolerate RTT=0 or
|
|
260
|
+
// otherwise we would have a liveness bug simply because we would be throwing away perfectly
|
|
261
|
+
// good clock samples.
|
|
262
|
+
// This condition should never be true. Reject this as a bad sample:
|
|
263
|
+
if (m0 > m2) {
|
|
264
|
+
log.warn("{}: learn: m0={} > m2={}", .{ self.replica, m0, m2 });
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// The window was reset between a ping and the corresponding pong.
|
|
269
|
+
if (m0 < self.window.monotonic) {
|
|
270
|
+
log.debug("{}: learn: m0={} < window.monotonic={}", .{
|
|
271
|
+
self.replica,
|
|
272
|
+
m0,
|
|
273
|
+
self.window.monotonic,
|
|
274
|
+
});
|
|
275
|
+
return;
|
|
276
|
+
}
|
|
277
|
+
assert(m2 >= self.window.monotonic); // Guaranteed by monotonicity of our local Time.
|
|
278
|
+
|
|
279
|
+
const elapsed: u64 = m2 - self.window.monotonic;
|
|
280
|
+
if (elapsed > window_max) {
|
|
281
|
+
log.warn("{}: learn: elapsed={} > window_max={}", .{
|
|
282
|
+
self.replica,
|
|
283
|
+
elapsed,
|
|
284
|
+
window_max,
|
|
285
|
+
});
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const round_trip_time: u64 = m2 - m0;
|
|
290
|
+
const one_way_delay: u64 = round_trip_time / 2;
|
|
291
|
+
const t2: i64 = self.window.realtime + @as(i64, @intCast(elapsed));
|
|
292
|
+
const clock_offset: i64 = t1 + @as(i64, @intCast(one_way_delay)) - t2;
|
|
293
|
+
const asymmetric_delay = self.estimate_asymmetric_delay(
|
|
294
|
+
replica,
|
|
295
|
+
one_way_delay,
|
|
296
|
+
clock_offset,
|
|
297
|
+
);
|
|
298
|
+
const clock_offset_corrected = clock_offset + asymmetric_delay;
|
|
299
|
+
|
|
300
|
+
log.debug("{}: learn: replica={} m0={} t1={} m2={} t2={} one_way_delay={} " ++
|
|
301
|
+
"asymmetric_delay={} clock_offset={}", .{
|
|
302
|
+
self.replica,
|
|
303
|
+
replica,
|
|
304
|
+
m0,
|
|
305
|
+
t1,
|
|
306
|
+
m2,
|
|
307
|
+
t2,
|
|
308
|
+
one_way_delay,
|
|
309
|
+
asymmetric_delay,
|
|
310
|
+
clock_offset_corrected,
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
// The less network delay, the more likely we have an accurate clock offset measurement:
|
|
314
|
+
self.window.sources[replica] = minimum_one_way_delay(
|
|
315
|
+
self.window.sources[replica],
|
|
316
|
+
Sample{
|
|
317
|
+
.clock_offset = clock_offset_corrected,
|
|
318
|
+
.one_way_delay = one_way_delay,
|
|
319
|
+
},
|
|
320
|
+
);
|
|
321
|
+
|
|
322
|
+
self.window.samples += 1;
|
|
323
|
+
|
|
324
|
+
// We decouple calls to `synchronize()` so that it's not triggered by these network events.
|
|
325
|
+
// Otherwise, excessive duplicate network packets would burn the CPU.
|
|
326
|
+
self.window.learned = true;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/// Called by `Replica.on_ping_timeout()` to provide `m0` when we decide to send a ping.
|
|
330
|
+
/// Called by `Replica.on_pong()` to provide `m2` when we receive a pong.
|
|
331
|
+
/// Called by `Replica.on_commit_message_timeout()` to allow backups to discard
|
|
332
|
+
/// duplicate/misdirected heartbeats.
|
|
333
|
+
pub fn monotonic(self: *Clock) Instant {
|
|
334
|
+
return self.time.monotonic();
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/// Called by `Replica.on_ping()` when responding to a ping with a pong.
|
|
338
|
+
/// This should never be used by the state machine, only for measuring clock offsets.
|
|
339
|
+
pub fn realtime(self: *Clock) i64 {
|
|
340
|
+
return self.time.realtime();
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/// Called by `Replica.on_request()` when the primary wants to timestamp a batch. If the primary's
|
|
344
|
+
/// clock is not synchronized with the cluster, it must wait until it is.
|
|
345
|
+
/// Returns the system time clamped to be within our synchronized lower and upper bounds.
|
|
346
|
+
/// This is complementary to NTP and allows clusters with very accurate time to make use of it,
|
|
347
|
+
/// while providing guard rails for when NTP is partitioned or unable to correct quickly enough.
|
|
348
|
+
pub fn realtime_synchronized(self: *Clock) ?i64 {
|
|
349
|
+
if (self.synchronization_disabled) {
|
|
350
|
+
return self.realtime();
|
|
351
|
+
} else if (self.epoch.synchronized) |interval| {
|
|
352
|
+
const elapsed = @as(i64, @intCast(self.epoch.elapsed(self)));
|
|
353
|
+
return std.math.clamp(
|
|
354
|
+
self.realtime(),
|
|
355
|
+
self.epoch.realtime + elapsed + interval.lower_bound,
|
|
356
|
+
self.epoch.realtime + elapsed + interval.upper_bound,
|
|
357
|
+
);
|
|
358
|
+
} else {
|
|
359
|
+
return null;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
pub fn round_trip_time_median_ns(self: *const Clock) ?u64 {
|
|
364
|
+
// +1 to allow for the standby.
|
|
365
|
+
var one_way_delays = stdx.BoundedArrayType(u64, constants.replicas_max + 1){};
|
|
366
|
+
for (self.window.sources, 0..) |source, replica_index| {
|
|
367
|
+
if (self.replica != replica_index) {
|
|
368
|
+
if (source) |sampled| {
|
|
369
|
+
one_way_delays.push(sampled.one_way_delay);
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
if (one_way_delays.count() < self.quorum) {
|
|
375
|
+
return null;
|
|
376
|
+
} else {
|
|
377
|
+
std.mem.sort(u64, one_way_delays.slice(), {}, std.sort.asc(u64));
|
|
378
|
+
const one_way_delay_median =
|
|
379
|
+
one_way_delays.get(@divFloor(one_way_delays.count(), 2));
|
|
380
|
+
return one_way_delay_median * 2;
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
pub fn tick(self: *Clock) void {
|
|
385
|
+
self.time.tick();
|
|
386
|
+
|
|
387
|
+
if (self.synchronization_disabled) return;
|
|
388
|
+
self.synchronize();
|
|
389
|
+
// Expire the current epoch if successive windows failed to synchronize:
|
|
390
|
+
// Gradual clock drift prevents us from using an epoch for more than a few seconds.
|
|
391
|
+
if (self.epoch.elapsed(self) >= epoch_max) {
|
|
392
|
+
log.err(
|
|
393
|
+
"{}: no agreement on cluster time (partitioned or too many clock faults)",
|
|
394
|
+
.{self.replica},
|
|
395
|
+
);
|
|
396
|
+
self.epoch.reset(self);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/// Estimates the asymmetric delay for a sample compared to the previous window, according to
|
|
401
|
+
/// Algorithm 1 from Section 4.2,
|
|
402
|
+
/// "A System for Clock Synchronization in an Internet of Things".
|
|
403
|
+
///
|
|
404
|
+
/// Note that it is impossible to estimate persistent asymmetric delay, as these two situations are
|
|
405
|
+
/// indistinguishable:
|
|
406
|
+
/// - A and B have synchronized clocks and a 50ms symmetrical delay.
|
|
407
|
+
/// - B's clock is 50ms ahead, A → B delay is 0ms, B → A delay is 100ms.
|
|
408
|
+
///
|
|
409
|
+
/// In both of these cases, A and B observe that a ping-pong round trip takes 100ms and that
|
|
410
|
+
/// a pong's timestamp is 50ms ahead of ping's timestamp.
|
|
411
|
+
///
|
|
412
|
+
/// Instead, the model here is of a one-time delay --- a particular ping or pong message got delayed
|
|
413
|
+
/// because it had a large prepare message in front of it in the send queue, a network packet got
|
|
414
|
+
/// lost, or a pigeon got eaten by a cat.
|
|
415
|
+
///
|
|
416
|
+
/// The delay happened either for the ping (forward path) or for the pong (reverse path) message.
|
|
417
|
+
/// Assuming that the minimum RTT seen before is a no-delay situation, the magnitude of a delay for
|
|
418
|
+
/// the current sample can be estimated as RTT - min(RTT), and the direction (forward/reverse)
|
|
419
|
+
/// distinguished by comparing unadjusted clock offsets.
|
|
420
|
+
///
|
|
421
|
+
/// Previous window is used to determine min(RTT).
|
|
422
|
+
fn estimate_asymmetric_delay(
|
|
423
|
+
self: *Clock,
|
|
424
|
+
replica: u8,
|
|
425
|
+
one_way_delay: u64,
|
|
426
|
+
clock_offset: i64,
|
|
427
|
+
) i64 {
|
|
428
|
+
// Note that `one_way_delay` may be 0 for very fast networks.
|
|
429
|
+
|
|
430
|
+
const error_margin = 10 * std.time.ns_per_ms;
|
|
431
|
+
|
|
432
|
+
if (self.epoch.sources[replica]) |epoch| {
|
|
433
|
+
if (one_way_delay <= epoch.one_way_delay) {
|
|
434
|
+
return 0;
|
|
435
|
+
} else if (clock_offset > epoch.clock_offset + error_margin) {
|
|
436
|
+
// The asymmetric error is on the forward network path.
|
|
437
|
+
return 0 - @as(i64, @intCast(one_way_delay - epoch.one_way_delay));
|
|
438
|
+
} else if (clock_offset < epoch.clock_offset - error_margin) {
|
|
439
|
+
// The asymmetric error is on the reverse network path.
|
|
440
|
+
return 0 + @as(i64, @intCast(one_way_delay - epoch.one_way_delay));
|
|
441
|
+
} else {
|
|
442
|
+
return 0;
|
|
443
|
+
}
|
|
444
|
+
} else {
|
|
445
|
+
return 0;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
fn synchronize(self: *Clock) void {
|
|
450
|
+
assert(self.window.synchronized == null);
|
|
451
|
+
|
|
452
|
+
// Wait until the window has enough accurate samples:
|
|
453
|
+
const elapsed = self.window.elapsed(self);
|
|
454
|
+
if (elapsed < window_min) return;
|
|
455
|
+
if (elapsed >= window_max) {
|
|
456
|
+
// We took too long to synchronize the window, expire stale samples...
|
|
457
|
+
const sources_sampled = self.window.sources_sampled();
|
|
458
|
+
if (sources_sampled <= @divTrunc(self.window.sources.len, 2)) {
|
|
459
|
+
log.warn("{}: synchronization failed, partitioned (sources={} samples={})", .{
|
|
460
|
+
self.replica,
|
|
461
|
+
sources_sampled,
|
|
462
|
+
self.window.samples,
|
|
463
|
+
});
|
|
464
|
+
} else {
|
|
465
|
+
log.warn("{}: synchronization failed, no agreement (sources={} samples={})", .{
|
|
466
|
+
self.replica,
|
|
467
|
+
sources_sampled,
|
|
468
|
+
self.window.samples,
|
|
469
|
+
});
|
|
470
|
+
}
|
|
471
|
+
self.window.reset(self);
|
|
472
|
+
return;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (!self.window.learned) return;
|
|
476
|
+
// Do not reset `learned` any earlier than this (before we have attempted to synchronize).
|
|
477
|
+
self.window.learned = false;
|
|
478
|
+
|
|
479
|
+
// Starting with the most clock offset tolerance, while we have a quorum, find the best smallest
|
|
480
|
+
// interval with the least clock offset tolerance, reducing tolerance at each step:
|
|
481
|
+
var tolerance: u64 = clock_offset_tolerance_max;
|
|
482
|
+
var terminate = false;
|
|
483
|
+
var rounds: usize = 0;
|
|
484
|
+
// Do at least one round if tolerance=0 and cap the number of rounds to avoid runaway loops.
|
|
485
|
+
while (!terminate and rounds < 64) : (tolerance /= 2) {
|
|
486
|
+
if (tolerance == 0) terminate = true;
|
|
487
|
+
rounds += 1;
|
|
488
|
+
|
|
489
|
+
const interval = Marzullo.smallest_interval(self.window_tuples(tolerance));
|
|
490
|
+
if (interval.sources_true < self.quorum) break;
|
|
491
|
+
|
|
492
|
+
// The new interval may reduce the number of `sources_true` while also decreasing error. In
|
|
493
|
+
// other words, provided we maintain a quorum, we prefer tighter tolerance bounds.
|
|
494
|
+
self.window.synchronized = interval;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// Wait for more accurate samples or until we timeout the window for lack of quorum:
|
|
498
|
+
if (self.window.synchronized == null) return;
|
|
499
|
+
|
|
500
|
+
// Transitioning from not being synchronized to being synchronized - log out a message for the
|
|
501
|
+
// operator, as the counterpoint to `no agreement on cluster time`.
|
|
502
|
+
if (self.epoch.synchronized == null and self.window.synchronized != null) {
|
|
503
|
+
const new_interval = self.window.synchronized.?;
|
|
504
|
+
log.info("{}: synchronized: accuracy={}", .{
|
|
505
|
+
self.replica,
|
|
506
|
+
fmt.fmtDurationSigned(new_interval.upper_bound - new_interval.lower_bound),
|
|
507
|
+
});
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
var new_window = self.epoch;
|
|
511
|
+
new_window.reset(self);
|
|
512
|
+
self.epoch = self.window;
|
|
513
|
+
self.window = new_window;
|
|
514
|
+
|
|
515
|
+
self.after_synchronization();
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
fn after_synchronization(self: *Clock) void {
|
|
519
|
+
const new_interval = self.epoch.synchronized.?;
|
|
520
|
+
|
|
521
|
+
log.debug("{}: synchronized: truechimers={}/{} clock_offset={}..{} accuracy={}", .{
|
|
522
|
+
self.replica,
|
|
523
|
+
new_interval.sources_true,
|
|
524
|
+
self.epoch.sources.len,
|
|
525
|
+
fmt.fmtDurationSigned(new_interval.lower_bound),
|
|
526
|
+
fmt.fmtDurationSigned(new_interval.upper_bound),
|
|
527
|
+
fmt.fmtDurationSigned(new_interval.upper_bound - new_interval.lower_bound),
|
|
528
|
+
});
|
|
529
|
+
|
|
530
|
+
const elapsed: i64 = @intCast(self.epoch.elapsed(self));
|
|
531
|
+
const system = self.realtime();
|
|
532
|
+
const lower = self.epoch.realtime + elapsed + new_interval.lower_bound;
|
|
533
|
+
const upper = self.epoch.realtime + elapsed + new_interval.upper_bound;
|
|
534
|
+
const cluster = std.math.clamp(system, lower, upper);
|
|
535
|
+
|
|
536
|
+
if (system == cluster) {} else if (system < lower) {
|
|
537
|
+
const delta = lower - system;
|
|
538
|
+
if (self.trace) |trace| trace.gauge(.clock_delta_ns, delta);
|
|
539
|
+
|
|
540
|
+
if (delta < std.time.ns_per_ms) {
|
|
541
|
+
log.debug("{}: system time is {} behind", .{
|
|
542
|
+
self.replica,
|
|
543
|
+
fmt.fmtDurationSigned(delta),
|
|
544
|
+
});
|
|
545
|
+
} else {
|
|
546
|
+
log.warn(
|
|
547
|
+
"{}: system time is {} behind, clamping system time to cluster time",
|
|
548
|
+
.{
|
|
549
|
+
self.replica,
|
|
550
|
+
fmt.fmtDurationSigned(delta),
|
|
551
|
+
},
|
|
552
|
+
);
|
|
553
|
+
}
|
|
554
|
+
} else {
|
|
555
|
+
const delta = system - upper;
|
|
556
|
+
if (self.trace) |trace| trace.gauge(.clock_delta_ns, delta);
|
|
557
|
+
|
|
558
|
+
if (delta < std.time.ns_per_ms) {
|
|
559
|
+
log.debug("{}: system time is {} ahead", .{
|
|
560
|
+
self.replica,
|
|
561
|
+
fmt.fmtDurationSigned(delta),
|
|
562
|
+
});
|
|
563
|
+
} else {
|
|
564
|
+
log.warn("{}: system time is {} ahead, clamping system time to cluster time", .{
|
|
565
|
+
self.replica,
|
|
566
|
+
fmt.fmtDurationSigned(delta),
|
|
567
|
+
});
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
fn window_tuples(self: *Clock, tolerance: u64) []Marzullo.Tuple {
|
|
573
|
+
assert(self.window.sources[self.replica].?.clock_offset == 0);
|
|
574
|
+
assert(self.window.sources[self.replica].?.one_way_delay == 0);
|
|
575
|
+
var count: usize = 0;
|
|
576
|
+
for (self.window.sources, 0..) |sampled, source| {
|
|
577
|
+
if (sampled) |sample| {
|
|
578
|
+
self.marzullo_tuples[count] = Marzullo.Tuple{
|
|
579
|
+
.source = @intCast(source),
|
|
580
|
+
.offset = sample.clock_offset -
|
|
581
|
+
@as(i64, @intCast(sample.one_way_delay + tolerance)),
|
|
582
|
+
.bound = .lower,
|
|
583
|
+
};
|
|
584
|
+
count += 1;
|
|
585
|
+
self.marzullo_tuples[count] = Marzullo.Tuple{
|
|
586
|
+
.source = @intCast(source),
|
|
587
|
+
.offset = sample.clock_offset +
|
|
588
|
+
@as(i64, @intCast(sample.one_way_delay + tolerance)),
|
|
589
|
+
.bound = .upper,
|
|
590
|
+
};
|
|
591
|
+
count += 1;
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
return self.marzullo_tuples[0..count];
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
fn minimum_one_way_delay(a: ?Sample, b: ?Sample) ?Sample {
|
|
598
|
+
if (a == null) return b;
|
|
599
|
+
if (b == null) return a;
|
|
600
|
+
if (a.?.one_way_delay < b.?.one_way_delay) return a;
|
|
601
|
+
// Choose B if B's one way delay is less or the same (we assume B is the newer sample):
|
|
602
|
+
return b;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
const testing = std.testing;
|
|
606
|
+
const OffsetType = @import("../testing/time.zig").OffsetType;
|
|
607
|
+
|
|
608
|
+
const ClockUnitTestContainer = struct {
|
|
609
|
+
time: TimeSim,
|
|
610
|
+
clock: Clock,
|
|
611
|
+
rtt: u64 = 300 * std.time.ns_per_ms,
|
|
612
|
+
owd: u64 = 150 * std.time.ns_per_ms,
|
|
613
|
+
learn_interval: u64 = 5,
|
|
614
|
+
|
|
615
|
+
pub fn init(
|
|
616
|
+
self: *ClockUnitTestContainer,
|
|
617
|
+
allocator: std.mem.Allocator,
|
|
618
|
+
offset_type: OffsetType,
|
|
619
|
+
offset_coefficient_A: i64,
|
|
620
|
+
offset_coefficient_B: i64,
|
|
621
|
+
) !void {
|
|
622
|
+
self.* = .{
|
|
623
|
+
.time = .{
|
|
624
|
+
.resolution = std.time.ns_per_s / 2,
|
|
625
|
+
.offset_type = offset_type,
|
|
626
|
+
.offset_coefficient_A = offset_coefficient_A,
|
|
627
|
+
.offset_coefficient_B = offset_coefficient_B,
|
|
628
|
+
},
|
|
629
|
+
.clock = try Clock.init(allocator, self.time.time(), null, .{
|
|
630
|
+
.replica_count = 3,
|
|
631
|
+
.replica = 0,
|
|
632
|
+
.quorum = 2,
|
|
633
|
+
}),
|
|
634
|
+
};
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
pub fn run_till_tick(self: *ClockUnitTestContainer, tick_stop: u64) void {
|
|
638
|
+
while (self.time.ticks < tick_stop) {
|
|
639
|
+
self.clock.time.tick();
|
|
640
|
+
|
|
641
|
+
if (@mod(self.time.ticks, self.learn_interval) == 0) {
|
|
642
|
+
const on_pong_time = self.clock.monotonic().ns;
|
|
643
|
+
const m0 = on_pong_time - self.rtt;
|
|
644
|
+
const t1: i64 = @intCast(on_pong_time - self.owd);
|
|
645
|
+
|
|
646
|
+
self.clock.learn(1, m0, t1, on_pong_time);
|
|
647
|
+
self.clock.learn(2, m0, t1, on_pong_time);
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
self.clock.synchronize();
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
const AssertionPoint = struct {
|
|
655
|
+
tick: u64,
|
|
656
|
+
expected_offset: i64,
|
|
657
|
+
};
|
|
658
|
+
pub fn ticks_to_perform_assertions(self: *ClockUnitTestContainer) [3]AssertionPoint {
|
|
659
|
+
var ret: [3]AssertionPoint = undefined;
|
|
660
|
+
switch (self.time.offset_type) {
|
|
661
|
+
.linear => {
|
|
662
|
+
// For the first (OWD/drift per tick) ticks, the offset < OWD. This means that the
|
|
663
|
+
// Marzullo interval is [0,0] (the offset and OWD are 0 for a replica w.r.t.
|
|
664
|
+
// itself). Therefore the offset of `clock.realtime_synchronised` will be the
|
|
665
|
+
// analytically prescribed offset at the start of the window.
|
|
666
|
+
// Beyond this, the offset > OWD and the Marzullo interval will be from replica 1
|
|
667
|
+
// and replica 2. The `clock.realtime_synchronized` will be clamped to the lower
|
|
668
|
+
// bound. Therefore the `clock.realtime_synchronized` will be offset by the OWD.
|
|
669
|
+
const threshold = self.owd /
|
|
670
|
+
@as(u64, @intCast(self.time.offset_coefficient_A));
|
|
671
|
+
ret[0] = .{
|
|
672
|
+
.tick = threshold,
|
|
673
|
+
.expected_offset = self.time.offset(threshold - self.learn_interval),
|
|
674
|
+
};
|
|
675
|
+
ret[1] = .{
|
|
676
|
+
.tick = threshold + 100,
|
|
677
|
+
.expected_offset = @intCast(self.owd),
|
|
678
|
+
};
|
|
679
|
+
ret[2] = .{
|
|
680
|
+
.tick = threshold + 200,
|
|
681
|
+
.expected_offset = @intCast(self.owd),
|
|
682
|
+
};
|
|
683
|
+
},
|
|
684
|
+
.periodic => {
|
|
685
|
+
ret[0] = .{
|
|
686
|
+
.tick = @intCast(@divTrunc(self.time.offset_coefficient_B, 4)),
|
|
687
|
+
.expected_offset = @intCast(self.owd),
|
|
688
|
+
};
|
|
689
|
+
ret[1] = .{
|
|
690
|
+
.tick = @intCast(@divTrunc(self.time.offset_coefficient_B, 2)),
|
|
691
|
+
.expected_offset = 0,
|
|
692
|
+
};
|
|
693
|
+
ret[2] = .{
|
|
694
|
+
.tick = @intCast(@divTrunc(self.time.offset_coefficient_B * 3, 4)),
|
|
695
|
+
.expected_offset = -@as(i64, @intCast(self.owd)),
|
|
696
|
+
};
|
|
697
|
+
},
|
|
698
|
+
.step => {
|
|
699
|
+
ret[0] = .{
|
|
700
|
+
.tick = @intCast(self.time.offset_coefficient_B - 10),
|
|
701
|
+
.expected_offset = 0,
|
|
702
|
+
};
|
|
703
|
+
ret[1] = .{
|
|
704
|
+
.tick = @intCast(self.time.offset_coefficient_B + 10),
|
|
705
|
+
.expected_offset = -@as(i64, @intCast(self.owd)),
|
|
706
|
+
};
|
|
707
|
+
ret[2] = .{
|
|
708
|
+
.tick = @intCast(self.time.offset_coefficient_B + 10),
|
|
709
|
+
.expected_offset = -@as(i64, @intCast(self.owd)),
|
|
710
|
+
};
|
|
711
|
+
},
|
|
712
|
+
.non_ideal => unreachable, // use ideal clocks for the unit tests
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
return ret;
|
|
716
|
+
}
|
|
717
|
+
};
|
|
718
|
+
|
|
719
|
+
test "ideal clocks get clamped to cluster time" {
|
|
720
|
+
// Silence all clock logs.
|
|
721
|
+
const level = std.testing.log_level;
|
|
722
|
+
std.testing.log_level = std.log.Level.err;
|
|
723
|
+
defer std.testing.log_level = level;
|
|
724
|
+
|
|
725
|
+
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
726
|
+
defer arena.deinit();
|
|
727
|
+
|
|
728
|
+
const allocator = arena.allocator();
|
|
729
|
+
|
|
730
|
+
var ideal_constant_drift_clock: ClockUnitTestContainer = undefined;
|
|
731
|
+
try ideal_constant_drift_clock.init(
|
|
732
|
+
allocator,
|
|
733
|
+
OffsetType.linear,
|
|
734
|
+
std.time.ns_per_ms, // loses 1ms per tick
|
|
735
|
+
0,
|
|
736
|
+
);
|
|
737
|
+
const linear_clock_assertion_points = ideal_constant_drift_clock.ticks_to_perform_assertions();
|
|
738
|
+
for (linear_clock_assertion_points) |point| {
|
|
739
|
+
ideal_constant_drift_clock.run_till_tick(point.tick);
|
|
740
|
+
try testing.expectEqual(
|
|
741
|
+
point.expected_offset,
|
|
742
|
+
@as(i64, @intCast(ideal_constant_drift_clock.clock.monotonic().ns)) -
|
|
743
|
+
ideal_constant_drift_clock.clock.realtime_synchronized().?,
|
|
744
|
+
);
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
var ideal_periodic_drift_clock: ClockUnitTestContainer = undefined;
|
|
748
|
+
try ideal_periodic_drift_clock.init(
|
|
749
|
+
allocator,
|
|
750
|
+
OffsetType.periodic,
|
|
751
|
+
std.time.ns_per_s, // loses up to 1s
|
|
752
|
+
200, // period of 200 ticks
|
|
753
|
+
);
|
|
754
|
+
const ideal_periodic_drift_clock_assertion_points =
|
|
755
|
+
ideal_periodic_drift_clock.ticks_to_perform_assertions();
|
|
756
|
+
for (ideal_periodic_drift_clock_assertion_points) |point| {
|
|
757
|
+
ideal_periodic_drift_clock.run_till_tick(point.tick);
|
|
758
|
+
try testing.expectEqual(
|
|
759
|
+
point.expected_offset,
|
|
760
|
+
@as(i64, @intCast(ideal_periodic_drift_clock.clock.monotonic().ns)) -
|
|
761
|
+
ideal_periodic_drift_clock.clock.realtime_synchronized().?,
|
|
762
|
+
);
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
var ideal_jumping_clock: ClockUnitTestContainer = undefined;
|
|
766
|
+
try ideal_jumping_clock.init(
|
|
767
|
+
allocator,
|
|
768
|
+
OffsetType.step,
|
|
769
|
+
-5 * std.time.ns_per_day, // jumps 5 days ahead.
|
|
770
|
+
49, // after 49 ticks
|
|
771
|
+
);
|
|
772
|
+
const ideal_jumping_clock_assertion_points = ideal_jumping_clock.ticks_to_perform_assertions();
|
|
773
|
+
for (ideal_jumping_clock_assertion_points) |point| {
|
|
774
|
+
ideal_jumping_clock.run_till_tick(point.tick);
|
|
775
|
+
try testing.expectEqual(
|
|
776
|
+
point.expected_offset,
|
|
777
|
+
@as(i64, @intCast(ideal_jumping_clock.clock.monotonic().ns)) -
|
|
778
|
+
ideal_jumping_clock.clock.realtime_synchronized().?,
|
|
779
|
+
);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
const PacketSimulatorOptions = @import("../testing/packet_simulator.zig").PacketSimulatorOptions;
|
|
784
|
+
const PacketSimulatorType = @import("../testing/packet_simulator.zig").PacketSimulatorType;
|
|
785
|
+
const Path = @import("../testing/packet_simulator.zig").Path;
|
|
786
|
+
const Command = @import("../vsr.zig").Command;
|
|
787
|
+
const ClockSimulator = struct {
|
|
788
|
+
const Packet = struct {
|
|
789
|
+
m0: u64,
|
|
790
|
+
t1: ?i64,
|
|
791
|
+
};
|
|
792
|
+
|
|
793
|
+
const PacketSimulator = PacketSimulatorType(Packet);
|
|
794
|
+
|
|
795
|
+
const Options = struct {
|
|
796
|
+
ping_timeout: u32,
|
|
797
|
+
clock_count: u8,
|
|
798
|
+
network_options: PacketSimulatorOptions,
|
|
799
|
+
};
|
|
800
|
+
|
|
801
|
+
allocator: std.mem.Allocator,
|
|
802
|
+
options: Options,
|
|
803
|
+
ticks: u64 = 0,
|
|
804
|
+
network: PacketSimulatorType(Packet),
|
|
805
|
+
times: []TimeSim,
|
|
806
|
+
clocks: []Clock,
|
|
807
|
+
prng: stdx.PRNG,
|
|
808
|
+
|
|
809
|
+
pub fn init(allocator: std.mem.Allocator, options: Options) !ClockSimulator {
|
|
810
|
+
var network = try PacketSimulator.init(allocator, options.network_options, .{
|
|
811
|
+
.packet_command = &packet_command,
|
|
812
|
+
.packet_clone = &packet_clone,
|
|
813
|
+
.packet_deinit = &packet_deinit,
|
|
814
|
+
.packet_deliver = &packet_deliver,
|
|
815
|
+
});
|
|
816
|
+
errdefer network.deinit(allocator);
|
|
817
|
+
|
|
818
|
+
var times = try allocator.alloc(TimeSim, options.clock_count);
|
|
819
|
+
errdefer allocator.free(times);
|
|
820
|
+
|
|
821
|
+
var clocks = try allocator.alloc(Clock, options.clock_count);
|
|
822
|
+
errdefer allocator.free(clocks);
|
|
823
|
+
|
|
824
|
+
var prng = stdx.PRNG.from_seed(options.network_options.seed);
|
|
825
|
+
|
|
826
|
+
for (clocks, 0..) |*clock, replica| {
|
|
827
|
+
errdefer for (clocks[0..replica]) |*c| c.deinit(allocator);
|
|
828
|
+
|
|
829
|
+
const amplitude = (@as(i64, @intCast(prng.int_inclusive(u64, 10))) - 10) *
|
|
830
|
+
std.time.ns_per_s;
|
|
831
|
+
const phase = @as(i64, @intCast(prng.range_inclusive(u64, 100, 1000))) +
|
|
832
|
+
@as(i64, @intFromFloat(std.Random.init(&prng, stdx.PRNG.fill).floatNorm(f64) * 50));
|
|
833
|
+
times[replica] = .{
|
|
834
|
+
.resolution = std.time.ns_per_s / 2, // delta_t = 0.5s
|
|
835
|
+
.offset_type = OffsetType.non_ideal,
|
|
836
|
+
.offset_coefficient_A = amplitude,
|
|
837
|
+
.offset_coefficient_B = phase,
|
|
838
|
+
.offset_coefficient_C = 10,
|
|
839
|
+
};
|
|
840
|
+
|
|
841
|
+
clock.* = try Clock.init(allocator, times[replica].time(), null, .{
|
|
842
|
+
.replica_count = options.clock_count,
|
|
843
|
+
.replica = @intCast(replica),
|
|
844
|
+
.quorum = @divFloor(options.clock_count, 2) + 1,
|
|
845
|
+
});
|
|
846
|
+
errdefer clock.deinit(allocator);
|
|
847
|
+
}
|
|
848
|
+
errdefer for (clocks) |*clock| clock.deinit(allocator);
|
|
849
|
+
|
|
850
|
+
return ClockSimulator{
|
|
851
|
+
.allocator = allocator,
|
|
852
|
+
.options = options,
|
|
853
|
+
.network = network,
|
|
854
|
+
.times = times,
|
|
855
|
+
.clocks = clocks,
|
|
856
|
+
.prng = prng,
|
|
857
|
+
};
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
pub fn deinit(self: *ClockSimulator) void {
|
|
861
|
+
for (self.clocks) |*clock| clock.deinit(self.allocator);
|
|
862
|
+
self.allocator.free(self.clocks);
|
|
863
|
+
self.allocator.free(self.times);
|
|
864
|
+
self.network.deinit(self.allocator);
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
pub fn tick(self: *ClockSimulator) void {
|
|
868
|
+
self.ticks += 1;
|
|
869
|
+
self.network.tick();
|
|
870
|
+
for (self.clocks) |*clock| {
|
|
871
|
+
clock.tick();
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
for (self.clocks, self.times) |*clock, *time| {
|
|
875
|
+
if (time.ticks % self.options.ping_timeout == 0) {
|
|
876
|
+
const m0 = clock.monotonic().ns;
|
|
877
|
+
for (self.clocks, 0..) |_, target| {
|
|
878
|
+
if (target != clock.replica) {
|
|
879
|
+
self.network.submit_packet(
|
|
880
|
+
.{
|
|
881
|
+
.m0 = m0,
|
|
882
|
+
.t1 = null,
|
|
883
|
+
},
|
|
884
|
+
.{
|
|
885
|
+
.source = clock.replica,
|
|
886
|
+
.target = @intCast(target),
|
|
887
|
+
},
|
|
888
|
+
);
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
fn packet_command(_: *PacketSimulator, _: Packet) Command {
|
|
896
|
+
return .ping; // Value doesn't matter.
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
fn packet_clone(_: *PacketSimulator, packet: Packet) Packet {
|
|
900
|
+
return packet;
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
fn packet_deinit(_: *PacketSimulator, _: Packet) void {}
|
|
904
|
+
|
|
905
|
+
fn packet_deliver(packet_simulator: *PacketSimulator, packet: Packet, path: Path) void {
|
|
906
|
+
const self: *ClockSimulator = @fieldParentPtr("network", packet_simulator);
|
|
907
|
+
const target = &self.clocks[path.target];
|
|
908
|
+
|
|
909
|
+
if (packet.t1) |t1| {
|
|
910
|
+
target.learn(
|
|
911
|
+
path.source,
|
|
912
|
+
packet.m0,
|
|
913
|
+
t1,
|
|
914
|
+
target.monotonic().ns,
|
|
915
|
+
);
|
|
916
|
+
} else {
|
|
917
|
+
self.network.submit_packet(
|
|
918
|
+
.{
|
|
919
|
+
.m0 = packet.m0,
|
|
920
|
+
.t1 = target.realtime(),
|
|
921
|
+
},
|
|
922
|
+
.{
|
|
923
|
+
// send the packet back to where it came from.
|
|
924
|
+
.source = path.target,
|
|
925
|
+
.target = path.source,
|
|
926
|
+
},
|
|
927
|
+
);
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
};
|
|
931
|
+
|
|
932
|
+
test "clock: fuzz test" {
|
|
933
|
+
// Silence all clock logs.
|
|
934
|
+
const level = std.testing.log_level;
|
|
935
|
+
std.testing.log_level = std.log.Level.err;
|
|
936
|
+
defer std.testing.log_level = level;
|
|
937
|
+
|
|
938
|
+
const ticks_max: u64 = 1_000_000;
|
|
939
|
+
const clock_count: u8 = 3;
|
|
940
|
+
const SystemTime = @import("../testing/time.zig").TimeSim;
|
|
941
|
+
var system_time = SystemTime{
|
|
942
|
+
.resolution = constants.tick_ms * std.time.ns_per_ms,
|
|
943
|
+
.offset_type = .linear,
|
|
944
|
+
.offset_coefficient_A = 0,
|
|
945
|
+
.offset_coefficient_B = 0,
|
|
946
|
+
};
|
|
947
|
+
const seed: u64 = @intCast(system_time.time().realtime());
|
|
948
|
+
var min_sync_error: u64 = 1_000_000_000;
|
|
949
|
+
var max_sync_error: u64 = 0;
|
|
950
|
+
var max_clock_offset: u64 = 0;
|
|
951
|
+
var min_clock_offset: u64 = 1_000_000_000;
|
|
952
|
+
var simulator = try ClockSimulator.init(std.testing.allocator, .{
|
|
953
|
+
.network_options = .{
|
|
954
|
+
.node_count = clock_count,
|
|
955
|
+
.client_count = 0,
|
|
956
|
+
.seed = seed,
|
|
957
|
+
|
|
958
|
+
.one_way_delay_mean = .ms(250),
|
|
959
|
+
.one_way_delay_min = .ms(100),
|
|
960
|
+
.packet_loss_probability = ratio(10, 100),
|
|
961
|
+
.path_maximum_capacity = 20,
|
|
962
|
+
.path_clog_duration_mean = .ms(200),
|
|
963
|
+
.path_clog_probability = ratio(2, 100),
|
|
964
|
+
.packet_replay_probability = ratio(2, 100),
|
|
965
|
+
|
|
966
|
+
.partition_mode = .isolate_single,
|
|
967
|
+
.partition_probability = ratio(25, 100),
|
|
968
|
+
.unpartition_probability = ratio(5, 100),
|
|
969
|
+
.partition_stability = 100,
|
|
970
|
+
.unpartition_stability = 10,
|
|
971
|
+
},
|
|
972
|
+
.clock_count = clock_count,
|
|
973
|
+
.ping_timeout = 20,
|
|
974
|
+
});
|
|
975
|
+
defer simulator.deinit();
|
|
976
|
+
|
|
977
|
+
var clock_ticks_without_synchronization: [clock_count]u32 = @splat(0);
|
|
978
|
+
while (simulator.ticks < ticks_max) {
|
|
979
|
+
simulator.tick();
|
|
980
|
+
|
|
981
|
+
for (simulator.clocks, 0..) |*clock, index| {
|
|
982
|
+
const offset = simulator.times[index].offset(simulator.ticks);
|
|
983
|
+
const abs_offset: u64 = if (offset >= 0) @intCast(offset) else @intCast(-offset);
|
|
984
|
+
max_clock_offset = if (abs_offset > max_clock_offset) abs_offset else max_clock_offset;
|
|
985
|
+
min_clock_offset = if (abs_offset < min_clock_offset) abs_offset else min_clock_offset;
|
|
986
|
+
|
|
987
|
+
const synced_time = clock.realtime_synchronized() orelse {
|
|
988
|
+
clock_ticks_without_synchronization[index] += 1;
|
|
989
|
+
continue;
|
|
990
|
+
};
|
|
991
|
+
|
|
992
|
+
for (simulator.clocks, 0..) |*other_clock, other_clock_index| {
|
|
993
|
+
if (index == other_clock_index) continue;
|
|
994
|
+
const other_clock_sync_time = other_clock.realtime_synchronized() orelse {
|
|
995
|
+
continue;
|
|
996
|
+
};
|
|
997
|
+
const err: i64 = synced_time - other_clock_sync_time;
|
|
998
|
+
const abs_err: u64 = if (err >= 0) @intCast(err) else @intCast(-err);
|
|
999
|
+
max_sync_error = if (abs_err > max_sync_error) abs_err else max_sync_error;
|
|
1000
|
+
min_sync_error = if (abs_err < min_sync_error) abs_err else min_sync_error;
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
log.info("seed={}, max ticks={}, clock count={}\n", .{
|
|
1006
|
+
seed,
|
|
1007
|
+
ticks_max,
|
|
1008
|
+
clock_count,
|
|
1009
|
+
});
|
|
1010
|
+
log.info("absolute clock offsets with respect to test time:\n", .{});
|
|
1011
|
+
log.info("maximum={}\n", .{fmt.fmtDurationSigned(@as(i64, @intCast(max_clock_offset)))});
|
|
1012
|
+
log.info("minimum={}\n", .{fmt.fmtDurationSigned(@as(i64, @intCast(min_clock_offset)))});
|
|
1013
|
+
log.info("\nabsolute synchronization errors between clocks:\n", .{});
|
|
1014
|
+
log.info("maximum={}\n", .{fmt.fmtDurationSigned(@as(i64, @intCast(max_sync_error)))});
|
|
1015
|
+
log.info("minimum={}\n", .{fmt.fmtDurationSigned(@as(i64, @intCast(min_sync_error)))});
|
|
1016
|
+
log.info("clock ticks without synchronization={d}\n", .{
|
|
1017
|
+
clock_ticks_without_synchronization,
|
|
1018
|
+
});
|
|
1019
|
+
}
|