tigerbeetle 0.0.40 → 0.17.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +0 -25
- data/README.md +670 -80
- data/docs/migration.md +201 -0
- data/sig/tigerbeetle.rbs +271 -0
- data/src/ext/tigerbeetle/extconf.rb +47 -0
- data/src/ext/tigerbeetle/lib/aarch64-linux-gnu.2.27/libtb_client.so +0 -0
- data/src/ext/tigerbeetle/lib/aarch64-linux-musl/libtb_client.so +0 -0
- data/src/ext/tigerbeetle/lib/aarch64-macos/libtb_client.dylib +0 -0
- data/src/ext/tigerbeetle/lib/x86_64-linux-gnu.2.27/libtb_client.so +0 -0
- data/src/ext/tigerbeetle/lib/x86_64-linux-musl/libtb_client.so +0 -0
- data/src/ext/tigerbeetle/lib/x86_64-macos/libtb_client.dylib +0 -0
- data/src/ext/tigerbeetle/lib/x86_64-windows/tb_client.dll +0 -0
- data/src/ext/tigerbeetle/rb_tb_gen.h +458 -0
- data/{ext/tb_client/tigerbeetle/src/clients/rust/assets → src/ext/tigerbeetle}/tb_client.h +18 -16
- data/src/ext/tigerbeetle/tigerbeetle.c +310 -0
- data/src/tigerbeetle/bindings.rb +347 -0
- data/src/tigerbeetle/client.rb +129 -0
- data/src/tigerbeetle/completion_dispatcher.rb +108 -0
- data/src/tigerbeetle/id.rb +40 -0
- data/src/tigerbeetle/tb.rb +3 -0
- data/src/tigerbeetle/version.rb +3 -0
- data/src/tigerbeetle.rb +39 -0
- metadata +33 -350
- data/CHANGELOG.md +0 -162
- data/ext/tb_client/extconf.rb +0 -41
- data/ext/tb_client/tigerbeetle/LICENSE +0 -177
- data/ext/tb_client/tigerbeetle/build.zig +0 -2296
- data/ext/tb_client/tigerbeetle/src/aof.zig +0 -1000
- data/ext/tb_client/tigerbeetle/src/build/fetch.zig +0 -112
- data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +0 -808
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +0 -1283
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +0 -1704
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +0 -341
- data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +0 -1450
- data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +0 -1659
- data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +0 -406
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +0 -1092
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +0 -286
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +0 -158
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +0 -229
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +0 -110
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +0 -386
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +0 -34
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +0 -281
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +0 -312
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +0 -138
- data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +0 -466
- data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +0 -157
- data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +0 -90
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +0 -203
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +0 -79
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +0 -542
- data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +0 -109
- data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +0 -86
- data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +0 -370
- data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +0 -386
- data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +0 -167
- data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +0 -126
- data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +0 -996
- data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +0 -748
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +0 -3238
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +0 -1718
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +0 -190
- data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +0 -104
- data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +0 -75
- data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +0 -522
- data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +0 -267
- data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +0 -3
- data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +0 -379
- data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +0 -131
- data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +0 -63
- data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +0 -588
- data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +0 -73
- data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +0 -106
- data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +0 -305
- data/ext/tb_client/tigerbeetle/src/config.zig +0 -296
- data/ext/tb_client/tigerbeetle/src/constants.zig +0 -790
- data/ext/tb_client/tigerbeetle/src/copyhound.zig +0 -202
- data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +0 -72
- data/ext/tb_client/tigerbeetle/src/direction.zig +0 -120
- data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +0 -158
- data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +0 -156
- data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +0 -252
- data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +0 -313
- data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +0 -87
- data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +0 -63
- data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +0 -47
- data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +0 -28
- data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +0 -61
- data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +0 -169
- data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +0 -46
- data/ext/tb_client/tigerbeetle/src/ewah.zig +0 -445
- data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +0 -128
- data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +0 -171
- data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +0 -179
- data/ext/tb_client/tigerbeetle/src/integration_tests.zig +0 -662
- data/ext/tb_client/tigerbeetle/src/io/common.zig +0 -155
- data/ext/tb_client/tigerbeetle/src/io/darwin.zig +0 -1093
- data/ext/tb_client/tigerbeetle/src/io/linux.zig +0 -1880
- data/ext/tb_client/tigerbeetle/src/io/test.zig +0 -1005
- data/ext/tb_client/tigerbeetle/src/io/windows.zig +0 -1598
- data/ext/tb_client/tigerbeetle/src/io.zig +0 -34
- data/ext/tb_client/tigerbeetle/src/iops.zig +0 -134
- data/ext/tb_client/tigerbeetle/src/list.zig +0 -236
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +0 -848
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +0 -179
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +0 -424
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +0 -420
- data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +0 -2114
- data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +0 -185
- data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +0 -1146
- data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +0 -1102
- data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +0 -200
- data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +0 -1495
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +0 -739
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +0 -166
- data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +0 -754
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +0 -1294
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +0 -510
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +0 -1241
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -628
- data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +0 -247
- data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +0 -116
- data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +0 -543
- data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +0 -938
- data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +0 -293
- data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +0 -359
- data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +0 -99
- data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +0 -17
- data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +0 -962
- data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +0 -617
- data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +0 -84
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +0 -1500
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -149
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -7
- data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +0 -865
- data/ext/tb_client/tigerbeetle/src/lsm/table.zig +0 -607
- data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +0 -843
- data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +0 -90
- data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +0 -40
- data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +0 -629
- data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +0 -933
- data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +0 -534
- data/ext/tb_client/tigerbeetle/src/message_buffer.zig +0 -469
- data/ext/tb_client/tigerbeetle/src/message_bus.zig +0 -1219
- data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +0 -936
- data/ext/tb_client/tigerbeetle/src/message_pool.zig +0 -343
- data/ext/tb_client/tigerbeetle/src/multiversion.zig +0 -2195
- data/ext/tb_client/tigerbeetle/src/queue.zig +0 -390
- data/ext/tb_client/tigerbeetle/src/repl/completion.zig +0 -201
- data/ext/tb_client/tigerbeetle/src/repl/parser.zig +0 -1356
- data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +0 -496
- data/ext/tb_client/tigerbeetle/src/repl.zig +0 -1034
- data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +0 -973
- data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +0 -1866
- data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +0 -304
- data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +0 -227
- data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +0 -658
- data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +0 -466
- data/ext/tb_client/tigerbeetle/src/scripts/release.zig +0 -1058
- data/ext/tb_client/tigerbeetle/src/scripts.zig +0 -105
- data/ext/tb_client/tigerbeetle/src/shell.zig +0 -1195
- data/ext/tb_client/tigerbeetle/src/stack.zig +0 -260
- data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +0 -911
- data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +0 -2079
- data/ext/tb_client/tigerbeetle/src/state_machine.zig +0 -4872
- data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +0 -288
- data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +0 -3128
- data/ext/tb_client/tigerbeetle/src/static_allocator.zig +0 -82
- data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +0 -157
- data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +0 -292
- data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +0 -65
- data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +0 -1414
- data/ext/tb_client/tigerbeetle/src/stdx/huge_page_allocator.zig +0 -115
- data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +0 -92
- data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +0 -677
- data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +0 -336
- data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +0 -511
- data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +0 -112
- data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +0 -1163
- data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +0 -142
- data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +0 -361
- data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +0 -275
- data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +0 -295
- data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +0 -436
- data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +0 -48
- data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +0 -402
- data/ext/tb_client/tigerbeetle/src/storage.zig +0 -489
- data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +0 -180
- data/ext/tb_client/tigerbeetle/src/testing/bench.zig +0 -146
- data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +0 -53
- data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +0 -61
- data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +0 -76
- data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +0 -110
- data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +0 -412
- data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +0 -331
- data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -458
- data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +0 -1198
- data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +0 -128
- data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +0 -181
- data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +0 -144
- data/ext/tb_client/tigerbeetle/src/testing/id.zig +0 -97
- data/ext/tb_client/tigerbeetle/src/testing/io.zig +0 -317
- data/ext/tb_client/tigerbeetle/src/testing/marks.zig +0 -126
- data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +0 -533
- data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +0 -154
- data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +0 -389
- data/ext/tb_client/tigerbeetle/src/testing/storage.zig +0 -1247
- data/ext/tb_client/tigerbeetle/src/testing/table.zig +0 -249
- data/ext/tb_client/tigerbeetle/src/testing/time.zig +0 -98
- data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +0 -212
- data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +0 -26
- data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +0 -579
- data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +0 -39
- data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +0 -214
- data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +0 -34
- data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +0 -785
- data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +0 -543
- data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +0 -181
- data/ext/tb_client/tigerbeetle/src/tidy.zig +0 -1449
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +0 -227
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +0 -1069
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +0 -1422
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +0 -1658
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +0 -518
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +0 -36
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +0 -646
- data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +0 -958
- data/ext/tb_client/tigerbeetle/src/time.zig +0 -236
- data/ext/tb_client/tigerbeetle/src/trace/event.zig +0 -745
- data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +0 -462
- data/ext/tb_client/tigerbeetle/src/trace.zig +0 -556
- data/ext/tb_client/tigerbeetle/src/unit_tests.zig +0 -321
- data/ext/tb_client/tigerbeetle/src/vopr.zig +0 -1785
- data/ext/tb_client/tigerbeetle/src/vortex.zig +0 -101
- data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +0 -473
- data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +0 -208
- data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +0 -43
- data/ext/tb_client/tigerbeetle/src/vsr/client.zig +0 -768
- data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +0 -532
- data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +0 -338
- data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +0 -1019
- data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +0 -279
- data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +0 -1381
- data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +0 -315
- data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +0 -1460
- data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +0 -757
- data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +0 -797
- data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +0 -2586
- data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +0 -308
- data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +0 -1777
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +0 -715
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +0 -185
- data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +0 -333
- data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +0 -12356
- data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +0 -416
- data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +0 -165
- data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +0 -2928
- data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +0 -1075
- data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +0 -1603
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -484
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +0 -405
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -355
- data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +0 -29
- data/ext/tb_client/tigerbeetle/src/vsr.zig +0 -1727
- data/lib/tb_client/shared_lib.rb +0 -66
- data/lib/tb_client.rb +0 -282
- data/lib/tigerbeetle/account.rb +0 -38
- data/lib/tigerbeetle/account_balance.rb +0 -23
- data/lib/tigerbeetle/account_filter.rb +0 -31
- data/lib/tigerbeetle/atomic_counter.rb +0 -14
- data/lib/tigerbeetle/client.rb +0 -214
- data/lib/tigerbeetle/converters/account.rb +0 -63
- data/lib/tigerbeetle/converters/account_balance.rb +0 -31
- data/lib/tigerbeetle/converters/account_filter.rb +0 -32
- data/lib/tigerbeetle/converters/base.rb +0 -35
- data/lib/tigerbeetle/converters/create_accounts_result.rb +0 -21
- data/lib/tigerbeetle/converters/create_transfers_result.rb +0 -21
- data/lib/tigerbeetle/converters/query_filter.rb +0 -33
- data/lib/tigerbeetle/converters/time.rb +0 -23
- data/lib/tigerbeetle/converters/transfer.rb +0 -64
- data/lib/tigerbeetle/converters/uint_128.rb +0 -24
- data/lib/tigerbeetle/converters.rb +0 -12
- data/lib/tigerbeetle/error.rb +0 -4
- data/lib/tigerbeetle/id.rb +0 -30
- data/lib/tigerbeetle/platforms.rb +0 -9
- data/lib/tigerbeetle/query_filter.rb +0 -31
- data/lib/tigerbeetle/request.rb +0 -7
- data/lib/tigerbeetle/transfer.rb +0 -40
- data/lib/tigerbeetle/version.rb +0 -4
- data/lib/tigerbeetle.rb +0 -13
- data/tigerbeetle.gemspec +0 -60
|
@@ -1,1247 +0,0 @@
|
|
|
1
|
-
//! In-memory storage, with simulated faults and latency.
|
|
2
|
-
//!
|
|
3
|
-
//!
|
|
4
|
-
//! Fault Injection
|
|
5
|
-
//!
|
|
6
|
-
//! Storage injects faults that a fully-connected cluster can (i.e. should be able to) recover from.
|
|
7
|
-
//! Each zone can tolerate a different pattern of faults.
|
|
8
|
-
//!
|
|
9
|
-
//! - superblock:
|
|
10
|
-
//! - One read/write fault is permitted per area (section, free set, …).
|
|
11
|
-
//! - An additional fault is permitted at the target of a pending write during a crash.
|
|
12
|
-
//!
|
|
13
|
-
//! - wal_headers, wal_prepares:
|
|
14
|
-
//! - Read/write faults are distributed between replicas according to ClusterFaultAtlas, to ensure
|
|
15
|
-
//! that at least one replica will have a valid copy to help others repair.
|
|
16
|
-
//! (See: generate_faulty_wal_areas()).
|
|
17
|
-
//! - When a replica crashes, it may fault the WAL outside of ClusterFaultAtlas.
|
|
18
|
-
//! - When replica_count=1, its WAL can only be corrupted by a crash, never a read/write.
|
|
19
|
-
//! (When replica_count=1, there are no other replicas to assist with repair).
|
|
20
|
-
//!
|
|
21
|
-
//! - grid:
|
|
22
|
-
//! - Similarly to prepares and headers, ClusterFaultAtlas ensures that at least one replica will
|
|
23
|
-
//! have a block.
|
|
24
|
-
//! - When replica_count≤2, grid faults are disabled.
|
|
25
|
-
//!
|
|
26
|
-
const std = @import("std");
|
|
27
|
-
const assert = std.debug.assert;
|
|
28
|
-
const panic = std.debug.panic;
|
|
29
|
-
const math = std.math;
|
|
30
|
-
const mem = std.mem;
|
|
31
|
-
const Ratio = stdx.PRNG.Ratio;
|
|
32
|
-
const Duration = stdx.Duration;
|
|
33
|
-
const Instant = stdx.Instant;
|
|
34
|
-
|
|
35
|
-
const QueueType = @import("../queue.zig").QueueType;
|
|
36
|
-
const IOPSType = @import("../iops.zig").IOPSType;
|
|
37
|
-
const constants = @import("../constants.zig");
|
|
38
|
-
const vsr = @import("../vsr.zig");
|
|
39
|
-
const superblock = @import("../vsr/superblock.zig");
|
|
40
|
-
const FreeSet = @import("../vsr/free_set.zig").FreeSet;
|
|
41
|
-
const schema = @import("../lsm/schema.zig");
|
|
42
|
-
const stdx = @import("stdx");
|
|
43
|
-
const maybe = stdx.maybe;
|
|
44
|
-
const fuzz = @import("./fuzz.zig");
|
|
45
|
-
const GridChecker = @import("./cluster/grid_checker.zig").GridChecker;
|
|
46
|
-
|
|
47
|
-
const log = std.log.scoped(.storage);
|
|
48
|
-
|
|
49
|
-
pub const Storage = struct {
|
|
50
|
-
/// Options for fault injection during fuzz testing
|
|
51
|
-
pub const Options = struct {
|
|
52
|
-
size: u64,
|
|
53
|
-
/// Seed for the storage PRNG.
|
|
54
|
-
seed: u64 = 0,
|
|
55
|
-
|
|
56
|
-
/// Required when `fault_atlas` is set.
|
|
57
|
-
replica_index: ?u8 = null,
|
|
58
|
-
|
|
59
|
-
/// Minimum number of ticks it may take to read data.
|
|
60
|
-
read_latency_min: Duration = .{ .ns = 0 },
|
|
61
|
-
/// Average number of ticks it may take to read data. Must be >= read_latency_min.
|
|
62
|
-
read_latency_mean: Duration = .{ .ns = 0 },
|
|
63
|
-
/// Minimum number of ticks it may take to write data.
|
|
64
|
-
write_latency_min: Duration = .{ .ns = 0 },
|
|
65
|
-
/// Average number of ticks it may take to write data. Must be >= write_latency_min.
|
|
66
|
-
write_latency_mean: Duration = .{ .ns = 0 },
|
|
67
|
-
|
|
68
|
-
/// Chance out of 100 that a read will corrupt a sector, if the target memory is within
|
|
69
|
-
/// a faulty area of this replica.
|
|
70
|
-
read_fault_probability: Ratio = Ratio.zero(),
|
|
71
|
-
/// Chance out of 100 that a write will corrupt a sector, if the target memory is within
|
|
72
|
-
/// a faulty area of this replica.
|
|
73
|
-
write_fault_probability: Ratio = Ratio.zero(),
|
|
74
|
-
/// Chance out of 100 that a write will misdirect to the wrong sector, if the target memory
|
|
75
|
-
/// is within a faulty area of this replica.
|
|
76
|
-
write_misdirect_probability: Ratio = Ratio.zero(),
|
|
77
|
-
/// Chance out of 100 that a crash will corrupt a sector of a pending write's target,
|
|
78
|
-
/// if the target memory is within a faulty area of this replica.
|
|
79
|
-
crash_fault_probability: Ratio = Ratio.zero(),
|
|
80
|
-
|
|
81
|
-
/// Enable/disable automatic read/write faults.
|
|
82
|
-
/// Does not impact crash faults or manual faults.
|
|
83
|
-
fault_atlas: ?*const ClusterFaultAtlas = null,
|
|
84
|
-
|
|
85
|
-
/// Accessed by the Grid for extra verification of grid coherence.
|
|
86
|
-
grid_checker: ?*GridChecker = null,
|
|
87
|
-
|
|
88
|
-
iops_read_max: u64 = constants.iops_read_max,
|
|
89
|
-
iops_write_max: u64 = constants.iops_write_max,
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
/// See usage in Journal.write_sectors() for details.
|
|
93
|
-
/// TODO: allow testing in both modes.
|
|
94
|
-
pub const synchronicity: enum {
|
|
95
|
-
always_synchronous,
|
|
96
|
-
always_asynchronous,
|
|
97
|
-
} = .always_asynchronous;
|
|
98
|
-
|
|
99
|
-
pub const Read = struct {
|
|
100
|
-
callback: *const fn (read: *Storage.Read) void,
|
|
101
|
-
buffer: []u8,
|
|
102
|
-
zone: vsr.Zone,
|
|
103
|
-
/// Relative offset within the zone.
|
|
104
|
-
offset: u64,
|
|
105
|
-
/// Tick at which this read is considered "completed" and the callback should be called.
|
|
106
|
-
ready_at: Instant,
|
|
107
|
-
stack_trace: StackTrace,
|
|
108
|
-
|
|
109
|
-
fn less_than(_: void, a: *Read, b: *Read) math.Order {
|
|
110
|
-
return math.order(a.ready_at.ns, b.ready_at.ns);
|
|
111
|
-
}
|
|
112
|
-
};
|
|
113
|
-
|
|
114
|
-
pub const Write = struct {
|
|
115
|
-
callback: *const fn (write: *Storage.Write) void,
|
|
116
|
-
buffer: []const u8,
|
|
117
|
-
zone: vsr.Zone,
|
|
118
|
-
/// Relative offset within the zone.
|
|
119
|
-
offset: u64,
|
|
120
|
-
ready_at: Instant,
|
|
121
|
-
stack_trace: StackTrace,
|
|
122
|
-
|
|
123
|
-
fn less_than(_: void, a: *Write, b: *Write) math.Order {
|
|
124
|
-
return math.order(a.ready_at.ns, b.ready_at.ns);
|
|
125
|
-
}
|
|
126
|
-
};
|
|
127
|
-
|
|
128
|
-
pub const NextTick = struct {
|
|
129
|
-
link: QueueType(NextTick).Link = .{},
|
|
130
|
-
source: NextTickSource,
|
|
131
|
-
callback: *const fn (next_tick: *NextTick) void,
|
|
132
|
-
};
|
|
133
|
-
|
|
134
|
-
pub const NextTickSource = enum { lsm, vsr };
|
|
135
|
-
|
|
136
|
-
pub const Tracer = vsr.trace.Tracer;
|
|
137
|
-
|
|
138
|
-
/// See `Storage.overlays`.
|
|
139
|
-
const overlays_count_max = 2;
|
|
140
|
-
|
|
141
|
-
const OverlayBuffers = [overlays_count_max][constants.message_size_max]u8;
|
|
142
|
-
|
|
143
|
-
allocator: mem.Allocator,
|
|
144
|
-
|
|
145
|
-
size: u64,
|
|
146
|
-
options: Options,
|
|
147
|
-
prng: stdx.PRNG,
|
|
148
|
-
|
|
149
|
-
/// `memory` always contains the pristine data as-written -- it does not include storage faults.
|
|
150
|
-
memory: []align(constants.sector_size) u8,
|
|
151
|
-
/// Set bits correspond to sectors that have ever been written to.
|
|
152
|
-
memory_written: std.DynamicBitSetUnmanaged,
|
|
153
|
-
/// Set bits correspond to faulty sectors. The underlying sectors of `memory` is left clean.
|
|
154
|
-
faults: std.DynamicBitSetUnmanaged,
|
|
155
|
-
|
|
156
|
-
/// Overlays take precedence over the (pristine) data in `memory`.
|
|
157
|
-
///
|
|
158
|
-
/// Each misdirected write creates two overlays.
|
|
159
|
-
/// When a misdirected write is triggered:
|
|
160
|
-
/// - The intended target is overlaid with its old data.
|
|
161
|
-
/// - The intended target's `memory` is set to the `write.buffer` data.
|
|
162
|
-
/// - The mistaken target is overlaid with the `write.buffer` data.
|
|
163
|
-
/// - The mistaken target's `memory` is left untouched.
|
|
164
|
-
///
|
|
165
|
-
/// The reason for all of this is:
|
|
166
|
-
/// - By keeping `memory` pristine, we can trivially disable both sides of the misdirected-write
|
|
167
|
-
/// fault by flipping the `faulty` flag.
|
|
168
|
-
/// - By tracking the overlays separately, they can be repaired separately.
|
|
169
|
-
///
|
|
170
|
-
/// Other notes:
|
|
171
|
-
/// - We allow for (at most) one misdirect fault per Storage for the time being, for simplicity
|
|
172
|
-
/// and because double-faults are not covered by our fault model. This will hopefully match
|
|
173
|
-
/// physical disks – misdirected faults are an order of magnitude less frequent than bit rot,
|
|
174
|
-
/// which in turn is an order of magnitude less frequent than LSEs.
|
|
175
|
-
/// - In order to keep things interesting:
|
|
176
|
-
/// - misdirections are always within the same zone,
|
|
177
|
-
/// - the entire write is misdirected (rather than only some of the sectors), and
|
|
178
|
-
/// - the misdirected write lands on a convenient offset.
|
|
179
|
-
/// Thanks to rigorous checksums, misdirections that break these rules just manifest as
|
|
180
|
-
/// corruptions, and corruption is already well-tested (see `faults`). The goal here is to
|
|
181
|
-
/// test how TigerBeetle handles well-formed but incorrectly-located data.
|
|
182
|
-
/// TODO: Suppose cross-zone misdirects to help find cases where we don't check `command`.
|
|
183
|
-
overlays: IOPSType(struct { zone: vsr.Zone, offset: u64, size: u32 }, overlays_count_max) = .{},
|
|
184
|
-
overlay_buffers: *align(constants.sector_size) OverlayBuffers,
|
|
185
|
-
|
|
186
|
-
/// Whether to enable faults (when false, this supersedes `faulty_wal_areas` &c).
|
|
187
|
-
/// This is used to disable faults during the replica's first startup.
|
|
188
|
-
faulty: bool = true,
|
|
189
|
-
|
|
190
|
-
reads: std.PriorityQueue(*Storage.Read, void, Storage.Read.less_than),
|
|
191
|
-
writes: std.PriorityQueue(*Storage.Write, void, Storage.Write.less_than),
|
|
192
|
-
|
|
193
|
-
ticks: u64 = 0,
|
|
194
|
-
next_tick_queue: QueueType(NextTick) = QueueType(NextTick).init(.{
|
|
195
|
-
.name = "storage_next_tick",
|
|
196
|
-
}),
|
|
197
|
-
|
|
198
|
-
pub fn init(allocator: mem.Allocator, options: Storage.Options) !Storage {
|
|
199
|
-
assert(options.size <= constants.storage_size_limit_max);
|
|
200
|
-
assert(options.write_latency_mean.ns >= options.write_latency_min.ns);
|
|
201
|
-
assert(options.read_latency_mean.ns >= options.read_latency_min.ns);
|
|
202
|
-
if (options.fault_atlas != null) assert(options.replica_index != null);
|
|
203
|
-
|
|
204
|
-
const prng = stdx.PRNG.from_seed(options.seed);
|
|
205
|
-
const sector_count = @divExact(options.size, constants.sector_size);
|
|
206
|
-
const memory = try allocator.alignedAlloc(u8, constants.sector_size, options.size);
|
|
207
|
-
errdefer allocator.free(memory);
|
|
208
|
-
|
|
209
|
-
var memory_written = try std.DynamicBitSetUnmanaged.initEmpty(allocator, sector_count);
|
|
210
|
-
errdefer memory_written.deinit(allocator);
|
|
211
|
-
|
|
212
|
-
var faults = try std.DynamicBitSetUnmanaged.initEmpty(allocator, sector_count);
|
|
213
|
-
errdefer faults.deinit(allocator);
|
|
214
|
-
|
|
215
|
-
const overlay_buffers_alloc =
|
|
216
|
-
try allocator.alignedAlloc(u8, constants.sector_size, @sizeOf(OverlayBuffers));
|
|
217
|
-
const overlay_buffers = std.mem.bytesAsValue(OverlayBuffers, overlay_buffers_alloc);
|
|
218
|
-
errdefer allocator.destroy(overlay_buffers);
|
|
219
|
-
|
|
220
|
-
var reads = std.PriorityQueue(*Storage.Read, void, Storage.Read.less_than)
|
|
221
|
-
.init(allocator, {});
|
|
222
|
-
errdefer reads.deinit();
|
|
223
|
-
|
|
224
|
-
try reads.ensureTotalCapacity(options.iops_read_max);
|
|
225
|
-
|
|
226
|
-
var writes = std.PriorityQueue(*Storage.Write, void, Storage.Write.less_than)
|
|
227
|
-
.init(allocator, {});
|
|
228
|
-
errdefer writes.deinit();
|
|
229
|
-
|
|
230
|
-
try writes.ensureTotalCapacity(options.iops_write_max);
|
|
231
|
-
|
|
232
|
-
return Storage{
|
|
233
|
-
.allocator = allocator,
|
|
234
|
-
.size = options.size,
|
|
235
|
-
.options = options,
|
|
236
|
-
.prng = prng,
|
|
237
|
-
.memory = memory,
|
|
238
|
-
.memory_written = memory_written,
|
|
239
|
-
.faults = faults,
|
|
240
|
-
.overlay_buffers = overlay_buffers,
|
|
241
|
-
.reads = reads,
|
|
242
|
-
.writes = writes,
|
|
243
|
-
};
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
pub fn deinit(storage: *Storage, allocator: mem.Allocator) void {
|
|
247
|
-
storage.writes.deinit();
|
|
248
|
-
storage.reads.deinit();
|
|
249
|
-
allocator.destroy(storage.overlay_buffers);
|
|
250
|
-
storage.faults.deinit(allocator);
|
|
251
|
-
storage.memory_written.deinit(allocator);
|
|
252
|
-
allocator.free(storage.memory);
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
/// Cancel any currently in-progress reads/writes.
|
|
256
|
-
/// Corrupt the target sectors of any in-progress writes.
|
|
257
|
-
pub fn reset(storage: *Storage) void {
|
|
258
|
-
log.debug("Reset: {} pending reads, {} pending writes, {} pending next_ticks", .{
|
|
259
|
-
storage.reads.count(),
|
|
260
|
-
storage.writes.count(),
|
|
261
|
-
storage.next_tick_queue.count(),
|
|
262
|
-
});
|
|
263
|
-
while (storage.writes.removeOrNull()) |write| {
|
|
264
|
-
if (storage.prng.chance(storage.options.crash_fault_probability)) {
|
|
265
|
-
// Randomly corrupt one of the faulty sectors the operation targeted.
|
|
266
|
-
// TODO: inject more realistic and varied storage faults as described above.
|
|
267
|
-
const sectors = SectorRange.from_zone(write.zone, write.offset, write.buffer.len);
|
|
268
|
-
storage.fault_sector(write.zone, sectors.random(&storage.prng));
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
while (storage.reads.removeOrNull()) |_| {}
|
|
272
|
-
storage.next_tick_queue.reset();
|
|
273
|
-
|
|
274
|
-
assert(storage.writes.count() == 0);
|
|
275
|
-
assert(storage.reads.count() == 0);
|
|
276
|
-
assert(storage.next_tick_queue.count() == 0);
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
/// Compile-time upper bound on the size of a grid of a testing Storage.
|
|
280
|
-
pub const grid_blocks_max =
|
|
281
|
-
grid_blocks_for_storage_size(constants.storage_size_limit_max);
|
|
282
|
-
|
|
283
|
-
/// Runtime bound on the size of the grid of a testing Storage.
|
|
284
|
-
pub fn grid_blocks(storage: *const Storage) u64 {
|
|
285
|
-
return grid_blocks_for_storage_size(storage.size);
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
/// How many grid blocks fit in the Storage of the specified size.
|
|
289
|
-
fn grid_blocks_for_storage_size(size: u64) u64 {
|
|
290
|
-
assert(size <= constants.storage_size_limit_max);
|
|
291
|
-
const free_set_shard_count = @divFloor(
|
|
292
|
-
size - superblock.data_file_size_min,
|
|
293
|
-
constants.block_size * FreeSet.shard_bits,
|
|
294
|
-
);
|
|
295
|
-
return free_set_shard_count * FreeSet.shard_bits;
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
/// Returns the number of bytes that have been written to, assuming that (the simulated)
|
|
299
|
-
/// `fallocate()` creates a sparse file.
|
|
300
|
-
pub fn size_used(storage: *const Storage) usize {
|
|
301
|
-
return storage.memory_written.count() * constants.sector_size;
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
/// Copy state from `origin` to `storage`:
|
|
305
|
-
///
|
|
306
|
-
/// - ticks
|
|
307
|
-
/// - memory
|
|
308
|
-
/// - occupied memory
|
|
309
|
-
/// - faulty sectors
|
|
310
|
-
/// - reads in-progress
|
|
311
|
-
/// - writes in-progress
|
|
312
|
-
///
|
|
313
|
-
/// Both instances must have an identical size.
|
|
314
|
-
pub fn copy(storage: *Storage, origin: *const Storage) void {
|
|
315
|
-
assert(storage.size == origin.size);
|
|
316
|
-
|
|
317
|
-
storage.ticks = origin.ticks;
|
|
318
|
-
|
|
319
|
-
var it = origin.memory_written.iterator(.{});
|
|
320
|
-
while (it.next()) |sector| {
|
|
321
|
-
stdx.copy_disjoint(
|
|
322
|
-
.exact,
|
|
323
|
-
u8,
|
|
324
|
-
storage.memory[sector * constants.sector_size ..][0..constants.sector_size],
|
|
325
|
-
origin.memory[sector * constants.sector_size ..][0..constants.sector_size],
|
|
326
|
-
);
|
|
327
|
-
}
|
|
328
|
-
storage.memory_written.toggleSet(storage.memory_written);
|
|
329
|
-
storage.memory_written.toggleSet(origin.memory_written);
|
|
330
|
-
storage.faults.toggleSet(storage.faults);
|
|
331
|
-
storage.faults.toggleSet(origin.faults);
|
|
332
|
-
|
|
333
|
-
storage.reads.items.len = 0;
|
|
334
|
-
for (origin.reads.items) |read| {
|
|
335
|
-
storage.reads.add(read) catch unreachable;
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
storage.writes.items.len = 0;
|
|
339
|
-
for (origin.writes.items) |write| {
|
|
340
|
-
storage.writes.add(write) catch unreachable;
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
pub fn step(storage: *Storage) bool {
|
|
345
|
-
var advanced = false;
|
|
346
|
-
|
|
347
|
-
const read_ready_at_ns =
|
|
348
|
-
if (storage.reads.peek()) |read| read.ready_at.ns else std.math.maxInt(u64);
|
|
349
|
-
const write_ready_at_ns =
|
|
350
|
-
if (storage.writes.peek()) |write| write.ready_at.ns else std.math.maxInt(u64);
|
|
351
|
-
if (read_ready_at_ns <= storage.tick_instant().ns and
|
|
352
|
-
read_ready_at_ns <= write_ready_at_ns)
|
|
353
|
-
{
|
|
354
|
-
const read = storage.reads.remove();
|
|
355
|
-
storage.read_sectors_finish(read);
|
|
356
|
-
advanced = true;
|
|
357
|
-
} else if (write_ready_at_ns <= storage.tick_instant().ns and
|
|
358
|
-
write_ready_at_ns <= read_ready_at_ns)
|
|
359
|
-
{
|
|
360
|
-
const write = storage.writes.remove();
|
|
361
|
-
storage.write_sectors_finish(write);
|
|
362
|
-
advanced = true;
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
// Process the queues in a single loop, since their callbacks may append to each other.
|
|
366
|
-
while (storage.next_tick_queue.pop()) |next_tick| {
|
|
367
|
-
advanced = true;
|
|
368
|
-
next_tick.callback(next_tick);
|
|
369
|
-
}
|
|
370
|
-
return advanced;
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
pub fn run(storage: *Storage) void {
|
|
374
|
-
while (storage.step()) {}
|
|
375
|
-
storage.tick();
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
pub fn tick(storage: *Storage) void {
|
|
379
|
-
storage.ticks += 1;
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
pub fn on_next_tick(
|
|
383
|
-
storage: *Storage,
|
|
384
|
-
source: NextTickSource,
|
|
385
|
-
callback: *const fn (next_tick: *Storage.NextTick) void,
|
|
386
|
-
next_tick: *Storage.NextTick,
|
|
387
|
-
) void {
|
|
388
|
-
next_tick.* = .{
|
|
389
|
-
.source = source,
|
|
390
|
-
.callback = callback,
|
|
391
|
-
};
|
|
392
|
-
|
|
393
|
-
storage.next_tick_queue.push(next_tick);
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
pub fn reset_next_tick_lsm(storage: *Storage) void {
|
|
397
|
-
var next_tick_iterator = storage.next_tick_queue;
|
|
398
|
-
storage.next_tick_queue.reset();
|
|
399
|
-
|
|
400
|
-
while (next_tick_iterator.pop()) |next_tick| {
|
|
401
|
-
if (next_tick.source != .lsm) storage.next_tick_queue.push(next_tick);
|
|
402
|
-
}
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
/// * Verifies that the read fits within the target sector.
|
|
406
|
-
/// * Verifies that the read targets sectors that have been written to.
|
|
407
|
-
pub fn read_sectors(
|
|
408
|
-
storage: *Storage,
|
|
409
|
-
callback: *const fn (read: *Storage.Read) void,
|
|
410
|
-
read: *Storage.Read,
|
|
411
|
-
buffer: []u8,
|
|
412
|
-
zone: vsr.Zone,
|
|
413
|
-
offset_in_zone: u64,
|
|
414
|
-
) void {
|
|
415
|
-
zone.verify_iop(buffer, offset_in_zone);
|
|
416
|
-
assert(zone != .grid_padding);
|
|
417
|
-
|
|
418
|
-
switch (zone) {
|
|
419
|
-
.superblock,
|
|
420
|
-
.wal_headers,
|
|
421
|
-
.wal_prepares,
|
|
422
|
-
=> {
|
|
423
|
-
var sectors = SectorRange.from_zone(zone, offset_in_zone, buffer.len);
|
|
424
|
-
while (sectors.next()) |sector| assert(storage.memory_written.isSet(sector));
|
|
425
|
-
},
|
|
426
|
-
.grid_padding => unreachable,
|
|
427
|
-
.client_replies, .grid => {
|
|
428
|
-
// ClientReplies/Grid repairs can read blocks that have not ever been written.
|
|
429
|
-
// (The former case is possible if we sync to a new superblock and someone requests
|
|
430
|
-
// a client reply that we haven't repaired yet.)
|
|
431
|
-
},
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
read.* = .{
|
|
435
|
-
.callback = callback,
|
|
436
|
-
.buffer = buffer,
|
|
437
|
-
.zone = zone,
|
|
438
|
-
.offset = offset_in_zone,
|
|
439
|
-
.ready_at = storage.tick_instant().add(storage.read_latency()),
|
|
440
|
-
.stack_trace = StackTrace.capture(),
|
|
441
|
-
};
|
|
442
|
-
|
|
443
|
-
// We ensure the capacity is sufficient for constants.iops_read_max in init()
|
|
444
|
-
storage.reads.add(read) catch unreachable;
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
fn read_sectors_finish(storage: *Storage, read: *Storage.Read) void {
|
|
448
|
-
const offset_in_storage = read.zone.offset(read.offset);
|
|
449
|
-
stdx.copy_disjoint(
|
|
450
|
-
.exact,
|
|
451
|
-
u8,
|
|
452
|
-
read.buffer,
|
|
453
|
-
storage.memory[offset_in_storage..][0..read.buffer.len],
|
|
454
|
-
);
|
|
455
|
-
|
|
456
|
-
if (storage.prng.chance(storage.options.read_fault_probability)) {
|
|
457
|
-
if (storage.pick_faulty_sector(read.zone, read.offset, read.buffer.len)) |sector| {
|
|
458
|
-
storage.fault_sector(read.zone, sector);
|
|
459
|
-
}
|
|
460
|
-
}
|
|
461
|
-
|
|
462
|
-
const faults_eligible = storage.read_sectors_fault_eligible(read);
|
|
463
|
-
|
|
464
|
-
var sectors = SectorRange.from_zone(read.zone, read.offset, read.buffer.len);
|
|
465
|
-
const sectors_min = sectors.min;
|
|
466
|
-
while (sectors.next()) |sector| {
|
|
467
|
-
const sector_offset = (sector - sectors_min) * constants.sector_size;
|
|
468
|
-
const sector_bytes = read.buffer[sector_offset..][0..constants.sector_size];
|
|
469
|
-
const sector_corrupt = faults_eligible != .none and storage.faults.isSet(sector);
|
|
470
|
-
const sector_uninitialized = !storage.memory_written.isSet(sector);
|
|
471
|
-
|
|
472
|
-
if (sector_corrupt) {
|
|
473
|
-
// Rather than corrupting the entire sector, inject a localized error.
|
|
474
|
-
// (In some cases this will just corrupt sector padding.)
|
|
475
|
-
// Inject the fault at a deterministic position (by using the pristine bytes as
|
|
476
|
-
// consistent seed) so that read-retries don't resolve the corruption.
|
|
477
|
-
const corrupt_seed: u64 = @bitCast(sector_bytes[0..@sizeOf(u64)].*);
|
|
478
|
-
var corrupt_prng = stdx.PRNG.from_seed(corrupt_seed);
|
|
479
|
-
const corrupt_byte = corrupt_prng.index(sector_bytes);
|
|
480
|
-
sector_bytes[corrupt_byte] ^= corrupt_prng.bit(u8);
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
if (sector_uninitialized) {
|
|
484
|
-
storage.prng.fill(sector_bytes);
|
|
485
|
-
}
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
// Apply misdirected data.
|
|
489
|
-
if (faults_eligible == .corrupt_or_misdirect) {
|
|
490
|
-
var overlays_iterator = storage.overlays.iterate();
|
|
491
|
-
while (overlays_iterator.next()) |overlay| {
|
|
492
|
-
if (overlay.zone == read.zone and
|
|
493
|
-
overlay.offset == read.offset)
|
|
494
|
-
{
|
|
495
|
-
log.debug("{}: read_sectors_finish: apply misdirect " ++
|
|
496
|
-
"zone={s} offset={} size={}", .{
|
|
497
|
-
storage.options.replica_index.?,
|
|
498
|
-
@tagName(overlay.zone),
|
|
499
|
-
overlay.offset,
|
|
500
|
-
overlay.size,
|
|
501
|
-
});
|
|
502
|
-
|
|
503
|
-
const overlay_index = storage.overlays.index(overlay);
|
|
504
|
-
const overlay_buffer = &storage.overlay_buffers[overlay_index];
|
|
505
|
-
const overlay_target = overlay_buffer[0..@min(overlay.size, read.buffer.len)];
|
|
506
|
-
stdx.copy_disjoint(.inexact, u8, read.buffer, overlay_target);
|
|
507
|
-
}
|
|
508
|
-
}
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
read.callback(read);
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
fn read_sectors_fault_eligible(storage: *const Storage, read: *const Storage.Read) enum {
|
|
515
|
-
none,
|
|
516
|
-
corrupt,
|
|
517
|
-
corrupt_or_misdirect,
|
|
518
|
-
} {
|
|
519
|
-
if (!storage.faulty) return .none;
|
|
520
|
-
|
|
521
|
-
if (read.zone == .wal_prepares) {
|
|
522
|
-
const header_slot = @divExact(read.offset, constants.message_size_max);
|
|
523
|
-
const header_offset = vsr.sector_floor(header_slot * @sizeOf(vsr.Header));
|
|
524
|
-
|
|
525
|
-
{
|
|
526
|
-
// Don't fault a WAL prepare if the corresponding WAL header write was misdirected,
|
|
527
|
-
// to avoid a double-fault which the journal interprets as a torn prepare.
|
|
528
|
-
// TODO If in our fault tracking we distinguish between "torn writes" injected by
|
|
529
|
-
// reset() and simulated LSE's/bitrot, then we could allow the former in this case.
|
|
530
|
-
var overlays_iterator = storage.overlays.iterate_const();
|
|
531
|
-
while (overlays_iterator.next()) |overlay| {
|
|
532
|
-
if (overlay.zone == .wal_headers and overlay.offset == header_offset) {
|
|
533
|
-
return .none;
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
{
|
|
539
|
-
// Don't misdirect a WAL prepare if the corresponding WAL header doesn't match or is
|
|
540
|
-
// corrupt, to avoid a double-fault in which the journal tries to `fix` the old
|
|
541
|
-
// prepare.
|
|
542
|
-
const wal_header = &storage.wal_headers()[header_slot];
|
|
543
|
-
const wal_prepare = &storage.wal_prepares()[header_slot];
|
|
544
|
-
if (wal_header.checksum != wal_prepare.header.checksum) {
|
|
545
|
-
return .corrupt;
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
const wal_sector =
|
|
549
|
-
@divFloor(vsr.Zone.wal_headers.start() + header_offset, constants.sector_size);
|
|
550
|
-
if (storage.faults.isSet(wal_sector)) {
|
|
551
|
-
return .corrupt;
|
|
552
|
-
}
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
return .corrupt_or_misdirect;
|
|
557
|
-
}
|
|
558
|
-
|
|
559
|
-
pub fn write_sectors(
|
|
560
|
-
storage: *Storage,
|
|
561
|
-
callback: *const fn (write: *Storage.Write) void,
|
|
562
|
-
write: *Storage.Write,
|
|
563
|
-
buffer: []const u8,
|
|
564
|
-
zone: vsr.Zone,
|
|
565
|
-
offset_in_zone: u64,
|
|
566
|
-
) void {
|
|
567
|
-
zone.verify_iop(buffer, offset_in_zone);
|
|
568
|
-
maybe(zone == .grid_padding); // Padding is zeroed during format.
|
|
569
|
-
|
|
570
|
-
// Verify that there are no concurrent overlapping writes.
|
|
571
|
-
for (storage.writes.items) |other| {
|
|
572
|
-
if (other.zone != zone) continue;
|
|
573
|
-
assert(offset_in_zone + buffer.len <= other.offset or
|
|
574
|
-
other.offset + other.buffer.len <= offset_in_zone);
|
|
575
|
-
}
|
|
576
|
-
|
|
577
|
-
write.* = .{
|
|
578
|
-
.callback = callback,
|
|
579
|
-
.buffer = buffer,
|
|
580
|
-
.zone = zone,
|
|
581
|
-
.offset = offset_in_zone,
|
|
582
|
-
.ready_at = storage.tick_instant().add(storage.write_latency()),
|
|
583
|
-
.stack_trace = StackTrace.capture(),
|
|
584
|
-
};
|
|
585
|
-
|
|
586
|
-
// We ensure the capacity is sufficient for constants.iops_write_max in init()
|
|
587
|
-
storage.writes.add(write) catch unreachable;
|
|
588
|
-
}
|
|
589
|
-
|
|
590
|
-
fn write_sectors_finish(storage: *Storage, write: *Storage.Write) void {
|
|
591
|
-
assert(storage.overlays.total() >= 2);
|
|
592
|
-
|
|
593
|
-
// Clean up old misdirects if they are overwritten.
|
|
594
|
-
var overlays_iterator = storage.overlays.iterate();
|
|
595
|
-
while (overlays_iterator.next()) |overlay| {
|
|
596
|
-
if (overlay.zone == write.zone and
|
|
597
|
-
overlay.offset == write.offset)
|
|
598
|
-
{
|
|
599
|
-
storage.overlays.release(overlay);
|
|
600
|
-
}
|
|
601
|
-
}
|
|
602
|
-
|
|
603
|
-
// Apply a new misdirect.
|
|
604
|
-
const misdirect = storage.overlays.available() >= 2 and
|
|
605
|
-
storage.pick_faulty_sector(write.zone, write.offset, write.buffer.len) != null and
|
|
606
|
-
storage.prng.chance(storage.options.write_misdirect_probability);
|
|
607
|
-
const misdirect_offset = if (misdirect) storage.pick_faulty_chunk_offset(write) else null;
|
|
608
|
-
if (misdirect_offset) |mistaken_offset| {
|
|
609
|
-
assert(mistaken_offset != write.offset);
|
|
610
|
-
|
|
611
|
-
const overlay_mistaken = storage.overlays.acquire().?;
|
|
612
|
-
const overlay_intended = storage.overlays.acquire().?;
|
|
613
|
-
|
|
614
|
-
const overlay_mistaken_index = storage.overlays.index(overlay_mistaken);
|
|
615
|
-
const overlay_intended_index = storage.overlays.index(overlay_intended);
|
|
616
|
-
|
|
617
|
-
log.debug("{}: write_sectors_finish: misdirect zone={s} offset={}->{} size={}", .{
|
|
618
|
-
storage.options.replica_index.?,
|
|
619
|
-
@tagName(write.zone),
|
|
620
|
-
write.offset,
|
|
621
|
-
mistaken_offset,
|
|
622
|
-
write.buffer.len,
|
|
623
|
-
});
|
|
624
|
-
|
|
625
|
-
const overlay_size: u32 = @intCast(write.buffer.len);
|
|
626
|
-
overlay_mistaken.* =
|
|
627
|
-
.{ .zone = write.zone, .offset = mistaken_offset, .size = overlay_size };
|
|
628
|
-
overlay_intended.* =
|
|
629
|
-
.{ .zone = write.zone, .offset = write.offset, .size = overlay_size };
|
|
630
|
-
|
|
631
|
-
const overlay_mistaken_buffer = &storage.overlay_buffers[overlay_mistaken_index];
|
|
632
|
-
const overlay_intended_buffer = &storage.overlay_buffers[overlay_intended_index];
|
|
633
|
-
const target_intended_buffer =
|
|
634
|
-
storage.memory[write.zone.offset(write.offset)..][0..write.buffer.len];
|
|
635
|
-
|
|
636
|
-
stdx.copy_disjoint(.inexact, u8, overlay_mistaken_buffer, write.buffer);
|
|
637
|
-
stdx.copy_disjoint(.inexact, u8, overlay_intended_buffer, target_intended_buffer);
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
var sectors = SectorRange.from_zone(write.zone, write.offset, write.buffer.len);
|
|
641
|
-
while (sectors.next()) |sector| {
|
|
642
|
-
storage.faults.unset(sector);
|
|
643
|
-
storage.memory_written.set(sector);
|
|
644
|
-
}
|
|
645
|
-
|
|
646
|
-
if (storage.prng.chance(storage.options.write_fault_probability)) {
|
|
647
|
-
if (storage.pick_faulty_sector(write.zone, write.offset, write.buffer.len)) |sector| {
|
|
648
|
-
storage.fault_sector(write.zone, sector);
|
|
649
|
-
}
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
const offset_in_storage = write.zone.offset(write.offset);
|
|
653
|
-
stdx.copy_disjoint(
|
|
654
|
-
.exact,
|
|
655
|
-
u8,
|
|
656
|
-
storage.memory[offset_in_storage..][0..write.buffer.len],
|
|
657
|
-
write.buffer,
|
|
658
|
-
);
|
|
659
|
-
|
|
660
|
-
write.callback(write);
|
|
661
|
-
}
|
|
662
|
-
|
|
663
|
-
fn read_latency(storage: *Storage) Duration {
|
|
664
|
-
return storage.latency(
|
|
665
|
-
storage.options.read_latency_min,
|
|
666
|
-
storage.options.read_latency_mean,
|
|
667
|
-
);
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
fn write_latency(storage: *Storage) Duration {
|
|
671
|
-
return storage.latency(
|
|
672
|
-
storage.options.write_latency_min,
|
|
673
|
-
storage.options.write_latency_mean,
|
|
674
|
-
);
|
|
675
|
-
}
|
|
676
|
-
|
|
677
|
-
fn tick_instant(storage: *const Storage) Instant {
|
|
678
|
-
return .{
|
|
679
|
-
.ns = storage.ticks * constants.tick_ms * std.time.ns_per_ms,
|
|
680
|
-
};
|
|
681
|
-
}
|
|
682
|
-
|
|
683
|
-
fn latency(storage: *Storage, min: Duration, mean: Duration) Duration {
|
|
684
|
-
return .{ .ns = @max(min.ns, fuzz.random_int_exponential(&storage.prng, u64, mean.ns)) };
|
|
685
|
-
}
|
|
686
|
-
|
|
687
|
-
fn pick_faulty_sector(
|
|
688
|
-
storage: *Storage,
|
|
689
|
-
zone: vsr.Zone,
|
|
690
|
-
offset_in_zone: u64,
|
|
691
|
-
size: u64,
|
|
692
|
-
) ?usize {
|
|
693
|
-
const atlas = storage.options.fault_atlas orelse return null;
|
|
694
|
-
return atlas.faulty_sector(
|
|
695
|
-
&storage.prng,
|
|
696
|
-
storage.options.replica_index.?,
|
|
697
|
-
zone,
|
|
698
|
-
offset_in_zone,
|
|
699
|
-
size,
|
|
700
|
-
);
|
|
701
|
-
}
|
|
702
|
-
|
|
703
|
-
fn pick_faulty_chunk_offset(storage: *Storage, write: *const Write) ?u64 {
|
|
704
|
-
const atlas = storage.options.fault_atlas orelse return null;
|
|
705
|
-
const offset = atlas.faulty_chunk_offset(
|
|
706
|
-
&storage.prng,
|
|
707
|
-
storage.options.replica_index.?,
|
|
708
|
-
write.zone,
|
|
709
|
-
write.buffer.len,
|
|
710
|
-
);
|
|
711
|
-
// Don't misdirect to the same offset.
|
|
712
|
-
return if (offset == write.offset) null else offset;
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
fn fault_sector(storage: *Storage, zone: vsr.Zone, sector: usize) void {
|
|
716
|
-
storage.faults.set(sector);
|
|
717
|
-
if (storage.options.replica_index) |replica_index| {
|
|
718
|
-
const offset = sector * constants.sector_size - zone.offset(0);
|
|
719
|
-
switch (zone) {
|
|
720
|
-
.superblock => {
|
|
721
|
-
log.debug(
|
|
722
|
-
"{}: corrupting sector at zone={} offset={}",
|
|
723
|
-
.{ replica_index, zone, offset },
|
|
724
|
-
);
|
|
725
|
-
},
|
|
726
|
-
.wal_prepares, .client_replies => {
|
|
727
|
-
comptime assert(constants.message_size_max % constants.sector_size == 0);
|
|
728
|
-
const slot = @divFloor(offset, constants.message_size_max);
|
|
729
|
-
log.debug(
|
|
730
|
-
"{}: corrupting sector at zone={} offset={} slot={}",
|
|
731
|
-
.{ replica_index, zone, offset, slot },
|
|
732
|
-
);
|
|
733
|
-
},
|
|
734
|
-
.wal_headers => {
|
|
735
|
-
comptime assert(constants.sector_size % @sizeOf(vsr.Header) == 0);
|
|
736
|
-
const slot_min = @divFloor(offset, @sizeOf(vsr.Header));
|
|
737
|
-
const slot_max = slot_min +
|
|
738
|
-
@divExact(constants.sector_size, @sizeOf(vsr.Header));
|
|
739
|
-
log.debug(
|
|
740
|
-
"{}: corrupting sector at zone={} offset={} slots={}...{}",
|
|
741
|
-
.{ replica_index, zone, offset, slot_min, slot_max },
|
|
742
|
-
);
|
|
743
|
-
},
|
|
744
|
-
.grid_padding => unreachable,
|
|
745
|
-
.grid => {
|
|
746
|
-
comptime assert(constants.block_size % @sizeOf(vsr.Header) == 0);
|
|
747
|
-
const address = @divFloor(offset, constants.block_size) + 1;
|
|
748
|
-
log.debug(
|
|
749
|
-
"{}: corrupting sector at zone={} offset={} address={}",
|
|
750
|
-
.{ replica_index, zone, offset, address },
|
|
751
|
-
);
|
|
752
|
-
},
|
|
753
|
-
}
|
|
754
|
-
}
|
|
755
|
-
}
|
|
756
|
-
|
|
757
|
-
pub fn area_memory(
|
|
758
|
-
storage: *const Storage,
|
|
759
|
-
area: Area,
|
|
760
|
-
) []align(constants.sector_size) const u8 {
|
|
761
|
-
const sectors = area.sectors();
|
|
762
|
-
const area_min = sectors.min * constants.sector_size;
|
|
763
|
-
const area_max = sectors.max * constants.sector_size;
|
|
764
|
-
return @alignCast(storage.memory[area_min..area_max]);
|
|
765
|
-
}
|
|
766
|
-
|
|
767
|
-
/// Returns whether any sector in the area is corrupt.
|
|
768
|
-
pub fn area_faulty(storage: *const Storage, area: Area) bool {
|
|
769
|
-
const sectors = area.sectors();
|
|
770
|
-
var sector = sectors.min;
|
|
771
|
-
var faulty: bool = false;
|
|
772
|
-
while (sector < sectors.max) : (sector += 1) {
|
|
773
|
-
faulty = faulty or storage.faults.isSet(sector);
|
|
774
|
-
}
|
|
775
|
-
|
|
776
|
-
var misdirected: bool = false;
|
|
777
|
-
var overlays = storage.overlays.iterate_const();
|
|
778
|
-
while (overlays.next()) |overlay| {
|
|
779
|
-
misdirected = misdirected or
|
|
780
|
-
(overlay.zone == area and overlay.offset == area.offset_in_zone());
|
|
781
|
-
}
|
|
782
|
-
return faulty or misdirected;
|
|
783
|
-
}
|
|
784
|
-
|
|
785
|
-
pub fn superblock_header(
|
|
786
|
-
storage: *const Storage,
|
|
787
|
-
copy_: u8,
|
|
788
|
-
) *const superblock.SuperBlockHeader {
|
|
789
|
-
const offset =
|
|
790
|
-
vsr.Zone.superblock.offset(@as(usize, copy_) * superblock.superblock_copy_size);
|
|
791
|
-
const bytes = storage.memory[offset..][0..@sizeOf(superblock.SuperBlockHeader)];
|
|
792
|
-
return @alignCast(mem.bytesAsValue(superblock.SuperBlockHeader, bytes));
|
|
793
|
-
}
|
|
794
|
-
|
|
795
|
-
pub fn wal_headers(storage: *const Storage) []const vsr.Header.Prepare {
|
|
796
|
-
const offset = vsr.Zone.wal_headers.offset(0);
|
|
797
|
-
const size = vsr.Zone.wal_headers.size().?;
|
|
798
|
-
return @alignCast(mem.bytesAsSlice(
|
|
799
|
-
vsr.Header.Prepare,
|
|
800
|
-
storage.memory[offset..][0..size],
|
|
801
|
-
));
|
|
802
|
-
}
|
|
803
|
-
|
|
804
|
-
fn MessageRawType(comptime command: vsr.Command) type {
|
|
805
|
-
return extern struct {
|
|
806
|
-
const MessageRaw = @This();
|
|
807
|
-
header: vsr.Header.Type(command),
|
|
808
|
-
body: [constants.message_size_max - @sizeOf(vsr.Header)]u8,
|
|
809
|
-
|
|
810
|
-
comptime {
|
|
811
|
-
assert(@sizeOf(MessageRaw) == constants.message_size_max);
|
|
812
|
-
assert(stdx.no_padding(MessageRaw));
|
|
813
|
-
}
|
|
814
|
-
};
|
|
815
|
-
}
|
|
816
|
-
|
|
817
|
-
pub fn wal_prepares(storage: *const Storage) []const MessageRawType(.prepare) {
|
|
818
|
-
const offset = vsr.Zone.wal_prepares.offset(0);
|
|
819
|
-
const size = vsr.Zone.wal_prepares.size().?;
|
|
820
|
-
return @alignCast(mem.bytesAsSlice(
|
|
821
|
-
MessageRawType(.prepare),
|
|
822
|
-
storage.memory[offset..][0..size],
|
|
823
|
-
));
|
|
824
|
-
}
|
|
825
|
-
|
|
826
|
-
pub fn client_replies(storage: *const Storage) []const MessageRawType(.reply) {
|
|
827
|
-
const offset = vsr.Zone.client_replies.offset(0);
|
|
828
|
-
const size = vsr.Zone.client_replies.size().?;
|
|
829
|
-
return @alignCast(mem.bytesAsSlice(
|
|
830
|
-
MessageRawType(.reply),
|
|
831
|
-
storage.memory[offset..][0..size],
|
|
832
|
-
));
|
|
833
|
-
}
|
|
834
|
-
|
|
835
|
-
pub fn grid_block(
|
|
836
|
-
storage: *const Storage,
|
|
837
|
-
address: u64,
|
|
838
|
-
) ?*align(constants.sector_size) const [constants.block_size]u8 {
|
|
839
|
-
assert(address > 0);
|
|
840
|
-
|
|
841
|
-
const block_offset = vsr.Zone.grid.offset((address - 1) * constants.block_size);
|
|
842
|
-
if (storage.memory_written.isSet(@divExact(block_offset, constants.sector_size))) {
|
|
843
|
-
const block_buffer = storage.memory[block_offset..][0..constants.block_size];
|
|
844
|
-
const block_header = schema.header_from_block(@alignCast(block_buffer));
|
|
845
|
-
assert(block_header.address == address);
|
|
846
|
-
|
|
847
|
-
return @alignCast(block_buffer);
|
|
848
|
-
} else {
|
|
849
|
-
return null;
|
|
850
|
-
}
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
pub fn log_pending_io(storage: *const Storage) void {
|
|
854
|
-
for (storage.reads.items) |read| {
|
|
855
|
-
log.debug("Pending read: {} {}\n{}", .{ read.offset, read.zone, read.stack_trace });
|
|
856
|
-
}
|
|
857
|
-
for (storage.writes.items) |write| {
|
|
858
|
-
log.debug("Pending write: {} {}\n{}", .{ write.offset, write.zone, write.stack_trace });
|
|
859
|
-
}
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
pub fn assert_no_pending_reads(storage: *const Storage, zone: vsr.Zone) void {
|
|
863
|
-
var assert_failed = false;
|
|
864
|
-
|
|
865
|
-
for (storage.reads.items) |read| {
|
|
866
|
-
if (read.zone == zone) {
|
|
867
|
-
log.err("Pending read: {} {}\n{}", .{ read.offset, read.zone, read.stack_trace });
|
|
868
|
-
assert_failed = true;
|
|
869
|
-
}
|
|
870
|
-
}
|
|
871
|
-
|
|
872
|
-
if (assert_failed) {
|
|
873
|
-
panic("Pending reads in zone: {}", .{zone});
|
|
874
|
-
}
|
|
875
|
-
}
|
|
876
|
-
|
|
877
|
-
pub fn assert_no_pending_writes(storage: *const Storage, zone: vsr.Zone) void {
|
|
878
|
-
var assert_failed = false;
|
|
879
|
-
|
|
880
|
-
const writes = storage.writes;
|
|
881
|
-
for (writes.items) |write| {
|
|
882
|
-
if (write.zone == zone) {
|
|
883
|
-
log.err("Pending write: {} {}\n{}", .{
|
|
884
|
-
write.offset,
|
|
885
|
-
write.zone,
|
|
886
|
-
write.stack_trace,
|
|
887
|
-
});
|
|
888
|
-
assert_failed = true;
|
|
889
|
-
}
|
|
890
|
-
}
|
|
891
|
-
|
|
892
|
-
if (assert_failed) {
|
|
893
|
-
panic("Pending writes in zone: {}", .{zone});
|
|
894
|
-
}
|
|
895
|
-
}
|
|
896
|
-
|
|
897
|
-
/// Verify that the storage:
|
|
898
|
-
/// - contains the given index block
|
|
899
|
-
/// - contains every value block referenced by the index block
|
|
900
|
-
pub fn verify_table(storage: *const Storage, index_address: u64, index_checksum: u128) void {
|
|
901
|
-
assert(index_address > 0);
|
|
902
|
-
|
|
903
|
-
const index_block = storage.grid_block(index_address).?;
|
|
904
|
-
const index_schema = schema.TableIndex.from(index_block);
|
|
905
|
-
const index_block_header = schema.header_from_block(index_block);
|
|
906
|
-
assert(index_block_header.address == index_address);
|
|
907
|
-
assert(index_block_header.checksum == index_checksum);
|
|
908
|
-
assert(index_block_header.block_type == .index);
|
|
909
|
-
|
|
910
|
-
for (
|
|
911
|
-
index_schema.value_addresses_used(index_block),
|
|
912
|
-
index_schema.value_checksums_used(index_block),
|
|
913
|
-
) |address, checksum| {
|
|
914
|
-
const value_block = storage.grid_block(address).?;
|
|
915
|
-
const value_block_header = schema.header_from_block(value_block);
|
|
916
|
-
|
|
917
|
-
assert(value_block_header.address == address);
|
|
918
|
-
assert(value_block_header.checksum == checksum.value);
|
|
919
|
-
assert(value_block_header.block_type == .value);
|
|
920
|
-
}
|
|
921
|
-
}
|
|
922
|
-
|
|
923
|
-
pub fn transition_to_liveness_mode(storage: *Storage) void {
|
|
924
|
-
storage.options.write_latency_mean = .ms(1);
|
|
925
|
-
storage.options.write_latency_min = .ms(1);
|
|
926
|
-
storage.options.read_latency_mean = .ms(1);
|
|
927
|
-
storage.options.read_latency_min = .ms(1);
|
|
928
|
-
storage.options.read_fault_probability = Ratio.zero();
|
|
929
|
-
storage.options.write_fault_probability = Ratio.zero();
|
|
930
|
-
storage.options.write_misdirect_probability = Ratio.zero();
|
|
931
|
-
storage.options.crash_fault_probability = Ratio.zero();
|
|
932
|
-
}
|
|
933
|
-
};
|
|
934
|
-
|
|
935
|
-
pub const Area = union(vsr.Zone) {
|
|
936
|
-
superblock: struct { copy: u8 },
|
|
937
|
-
wal_headers: struct { sector: usize },
|
|
938
|
-
wal_prepares: struct { slot: usize },
|
|
939
|
-
client_replies: struct { slot: usize },
|
|
940
|
-
grid_padding,
|
|
941
|
-
grid: struct { address: u64 },
|
|
942
|
-
|
|
943
|
-
fn offset_in_zone(area: Area) u64 {
|
|
944
|
-
return switch (area) {
|
|
945
|
-
.superblock => |data| vsr.superblock.superblock_copy_size * @as(u64, data.copy),
|
|
946
|
-
.wal_headers => |data| constants.sector_size * data.sector,
|
|
947
|
-
.wal_prepares => |data| constants.message_size_max * data.slot,
|
|
948
|
-
.client_replies => |data| constants.message_size_max * data.slot,
|
|
949
|
-
.grid_padding => unreachable,
|
|
950
|
-
.grid => |data| constants.block_size * (data.address - 1),
|
|
951
|
-
};
|
|
952
|
-
}
|
|
953
|
-
|
|
954
|
-
fn sectors(area: Area) SectorRange {
|
|
955
|
-
return SectorRange.from_zone(area, area.offset_in_zone(), switch (area) {
|
|
956
|
-
.superblock => vsr.superblock.superblock_copy_size,
|
|
957
|
-
.wal_headers => constants.sector_size,
|
|
958
|
-
.wal_prepares => constants.message_size_max,
|
|
959
|
-
.client_replies => constants.message_size_max,
|
|
960
|
-
.grid_padding => unreachable,
|
|
961
|
-
.grid => constants.block_size,
|
|
962
|
-
});
|
|
963
|
-
}
|
|
964
|
-
};
|
|
965
|
-
|
|
966
|
-
const SectorRange = struct {
|
|
967
|
-
min: usize, // inclusive sector index
|
|
968
|
-
max: usize, // exclusive sector index
|
|
969
|
-
|
|
970
|
-
fn from_zone(
|
|
971
|
-
zone: vsr.Zone,
|
|
972
|
-
offset_in_zone: u64,
|
|
973
|
-
size: usize,
|
|
974
|
-
) SectorRange {
|
|
975
|
-
return from_offset(zone.offset(offset_in_zone), size);
|
|
976
|
-
}
|
|
977
|
-
|
|
978
|
-
fn from_offset(offset_in_storage: u64, size: usize) SectorRange {
|
|
979
|
-
return .{
|
|
980
|
-
.min = @divExact(offset_in_storage, constants.sector_size),
|
|
981
|
-
.max = @divExact(offset_in_storage + size, constants.sector_size),
|
|
982
|
-
};
|
|
983
|
-
}
|
|
984
|
-
|
|
985
|
-
fn random(range: SectorRange, prng: *stdx.PRNG) usize {
|
|
986
|
-
return prng.range_inclusive(usize, range.min, range.max - 1);
|
|
987
|
-
}
|
|
988
|
-
|
|
989
|
-
fn next(range: *SectorRange) ?usize {
|
|
990
|
-
if (range.min == range.max) return null;
|
|
991
|
-
defer range.min += 1;
|
|
992
|
-
|
|
993
|
-
return range.min;
|
|
994
|
-
}
|
|
995
|
-
|
|
996
|
-
fn intersect(a: SectorRange, b: SectorRange) ?SectorRange {
|
|
997
|
-
if (a.max <= b.min) return null;
|
|
998
|
-
if (b.max <= a.min) return null;
|
|
999
|
-
return SectorRange{
|
|
1000
|
-
.min = @max(a.min, b.min),
|
|
1001
|
-
.max = @min(a.max, b.max),
|
|
1002
|
-
};
|
|
1003
|
-
}
|
|
1004
|
-
};
|
|
1005
|
-
|
|
1006
|
-
/// To ensure the cluster can recover, each header/prepare/block must be valid (not faulty) at
|
|
1007
|
-
/// a majority of replicas.
|
|
1008
|
-
///
|
|
1009
|
-
/// We can't allow WAL storage faults for the same message in a majority of
|
|
1010
|
-
/// the replicas as that would make recovery impossible. Instead, we only
|
|
1011
|
-
/// allow faults in certain areas which differ between replicas.
|
|
1012
|
-
pub const ClusterFaultAtlas = struct {
|
|
1013
|
-
pub const Options = struct {
|
|
1014
|
-
faulty_superblock: bool,
|
|
1015
|
-
faulty_wal_headers: bool,
|
|
1016
|
-
faulty_wal_prepares: bool,
|
|
1017
|
-
faulty_client_replies: bool,
|
|
1018
|
-
faulty_grid: bool,
|
|
1019
|
-
};
|
|
1020
|
-
|
|
1021
|
-
const ReplicaSet = stdx.BitSetType(constants.replicas_max);
|
|
1022
|
-
const headers_per_sector = @divExact(constants.sector_size, @sizeOf(vsr.Header));
|
|
1023
|
-
const members_max = constants.members_max;
|
|
1024
|
-
|
|
1025
|
-
faulty_wal_header_sectors: [members_max]std.DynamicBitSetUnmanaged,
|
|
1026
|
-
faulty_client_reply_slots: [members_max]std.DynamicBitSetUnmanaged,
|
|
1027
|
-
/// Bit 0 corresponds to address 1.
|
|
1028
|
-
faulty_grid_blocks: [members_max]std.DynamicBitSetUnmanaged,
|
|
1029
|
-
|
|
1030
|
-
pub fn init(
|
|
1031
|
-
allocator: std.mem.Allocator,
|
|
1032
|
-
replica_count: u8,
|
|
1033
|
-
prng: *stdx.PRNG,
|
|
1034
|
-
options: Options,
|
|
1035
|
-
) !ClusterFaultAtlas {
|
|
1036
|
-
if (replica_count == 1) {
|
|
1037
|
-
// If there is only one replica in the cluster, WAL/Grid faults are not recoverable.
|
|
1038
|
-
maybe(options.faulty_superblock);
|
|
1039
|
-
assert(!options.faulty_wal_headers);
|
|
1040
|
-
assert(!options.faulty_wal_prepares);
|
|
1041
|
-
assert(!options.faulty_client_replies);
|
|
1042
|
-
assert(!options.faulty_grid);
|
|
1043
|
-
}
|
|
1044
|
-
|
|
1045
|
-
// Currently these faulty areas are coupled together, so they should match.
|
|
1046
|
-
assert(options.faulty_wal_headers == options.faulty_wal_prepares);
|
|
1047
|
-
|
|
1048
|
-
const fault_bitset_sizes = [3]u32{
|
|
1049
|
-
@divExact(constants.journal_size_headers, constants.sector_size), // WAL headers.
|
|
1050
|
-
constants.clients_max, // Client replies.
|
|
1051
|
-
Storage.grid_blocks_max, // Grid.
|
|
1052
|
-
};
|
|
1053
|
-
|
|
1054
|
-
var fault_bitsets_allocated: u32 = 0;
|
|
1055
|
-
var fault_bitsets: [3 * members_max]std.DynamicBitSetUnmanaged = undefined;
|
|
1056
|
-
errdefer for (fault_bitsets[0..fault_bitsets_allocated]) |*b| b.deinit(allocator);
|
|
1057
|
-
|
|
1058
|
-
for (&fault_bitsets, 0..) |*fault_bitset, i| {
|
|
1059
|
-
const fault_bitset_size = fault_bitset_sizes[@divFloor(i, members_max)];
|
|
1060
|
-
fault_bitset.* = try std.DynamicBitSetUnmanaged.initEmpty(allocator, fault_bitset_size);
|
|
1061
|
-
fault_bitsets_allocated += 1;
|
|
1062
|
-
}
|
|
1063
|
-
|
|
1064
|
-
var atlas = ClusterFaultAtlas{
|
|
1065
|
-
.faulty_wal_header_sectors = fault_bitsets[0 * members_max ..][0..members_max].*,
|
|
1066
|
-
.faulty_client_reply_slots = fault_bitsets[1 * members_max ..][0..members_max].*,
|
|
1067
|
-
.faulty_grid_blocks = fault_bitsets[2 * members_max ..][0..members_max].*,
|
|
1068
|
-
};
|
|
1069
|
-
|
|
1070
|
-
const quorums = vsr.quorums(replica_count);
|
|
1071
|
-
const faults_max = quorums.replication - 1;
|
|
1072
|
-
assert(faults_max < replica_count);
|
|
1073
|
-
assert(faults_max < quorums.replication);
|
|
1074
|
-
assert(faults_max < quorums.view_change);
|
|
1075
|
-
assert(faults_max > 0 or replica_count == 1);
|
|
1076
|
-
|
|
1077
|
-
for ([_]struct { bool, *[members_max]std.DynamicBitSetUnmanaged }{
|
|
1078
|
-
.{ options.faulty_wal_headers, &atlas.faulty_wal_header_sectors },
|
|
1079
|
-
.{ options.faulty_client_replies, &atlas.faulty_client_reply_slots },
|
|
1080
|
-
.{ options.faulty_grid, &atlas.faulty_grid_blocks },
|
|
1081
|
-
}) |zone| {
|
|
1082
|
-
const faulty = zone.@"0";
|
|
1083
|
-
const chunks = zone.@"1";
|
|
1084
|
-
if (!faulty) continue;
|
|
1085
|
-
|
|
1086
|
-
for (0..chunks[0].bit_length) |chunk| {
|
|
1087
|
-
var replicas: ReplicaSet = .{};
|
|
1088
|
-
while (replicas.count() < faults_max) {
|
|
1089
|
-
const replica_index = prng.int_inclusive(u8, replica_count - 1);
|
|
1090
|
-
if (chunks[replica_index].count() + 1 <
|
|
1091
|
-
chunks[replica_index].capacity())
|
|
1092
|
-
{
|
|
1093
|
-
chunks[replica_index].set(chunk);
|
|
1094
|
-
replicas.set(replica_index);
|
|
1095
|
-
} else {
|
|
1096
|
-
// Never corrupt all chunks of a particular replica.
|
|
1097
|
-
// (For the WAL, this can cause error.WALInvalid).
|
|
1098
|
-
}
|
|
1099
|
-
}
|
|
1100
|
-
}
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
return atlas;
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
pub fn deinit(atlas: *ClusterFaultAtlas, allocator: std.mem.Allocator) void {
|
|
1107
|
-
for (&atlas.faulty_grid_blocks) |*b| b.deinit(allocator);
|
|
1108
|
-
for (&atlas.faulty_client_reply_slots) |*b| b.deinit(allocator);
|
|
1109
|
-
for (&atlas.faulty_wal_header_sectors) |*b| b.deinit(allocator);
|
|
1110
|
-
}
|
|
1111
|
-
|
|
1112
|
-
fn zone_chunks(atlas: *const ClusterFaultAtlas, zone: vsr.Zone) ?struct {
|
|
1113
|
-
chunk_size: u32,
|
|
1114
|
-
faulty: *const [members_max]std.DynamicBitSetUnmanaged,
|
|
1115
|
-
} {
|
|
1116
|
-
return switch (zone) {
|
|
1117
|
-
// Don't inject additional read/write/misdirect faults into superblock headers.
|
|
1118
|
-
// This prevents the quorum from being lost like so:
|
|
1119
|
-
// - copy₀: B (ok)
|
|
1120
|
-
// - copy₁: B (torn write)
|
|
1121
|
-
// - copy₂: A (corrupt)
|
|
1122
|
-
// - copy₃: A (ok)
|
|
1123
|
-
// TODO Use hash-chaining to safely load copy₀, so that we can inject a superblock
|
|
1124
|
-
// fault.
|
|
1125
|
-
.superblock => null,
|
|
1126
|
-
// We assert that the padding is never read, so there's no need to fault it.
|
|
1127
|
-
.grid_padding => unreachable,
|
|
1128
|
-
|
|
1129
|
-
.wal_headers => .{
|
|
1130
|
-
.chunk_size = constants.sector_size,
|
|
1131
|
-
.faulty = &atlas.faulty_wal_header_sectors,
|
|
1132
|
-
},
|
|
1133
|
-
.wal_prepares => .{
|
|
1134
|
-
.chunk_size = constants.message_size_max * headers_per_sector,
|
|
1135
|
-
.faulty = &atlas.faulty_wal_header_sectors,
|
|
1136
|
-
},
|
|
1137
|
-
.client_replies => .{
|
|
1138
|
-
.chunk_size = constants.message_size_max,
|
|
1139
|
-
.faulty = &atlas.faulty_client_reply_slots,
|
|
1140
|
-
},
|
|
1141
|
-
.grid => .{
|
|
1142
|
-
.chunk_size = constants.block_size,
|
|
1143
|
-
.faulty = &atlas.faulty_grid_blocks,
|
|
1144
|
-
},
|
|
1145
|
-
};
|
|
1146
|
-
}
|
|
1147
|
-
|
|
1148
|
-
/// Given a write of `size` bytes to the given zone, find an interesting offset within the same
|
|
1149
|
-
/// zone to target. (If we want to drop the latter condition, an alternate implementation
|
|
1150
|
-
/// strategy is: on random writes, perform the write successfully, but save the target
|
|
1151
|
-
/// zone/offset/size. Then on a future random write, misdirect to a compatible saved location.)
|
|
1152
|
-
fn faulty_chunk_offset(
|
|
1153
|
-
atlas: *const ClusterFaultAtlas,
|
|
1154
|
-
prng: *stdx.PRNG,
|
|
1155
|
-
replica_index: u8,
|
|
1156
|
-
zone: vsr.Zone,
|
|
1157
|
-
size: u64,
|
|
1158
|
-
) ?u64 {
|
|
1159
|
-
const chunks = atlas.zone_chunks(zone) orelse return null;
|
|
1160
|
-
|
|
1161
|
-
if (chunks.chunk_size < size) {
|
|
1162
|
-
// When formatting the WAL, we may write many chunks simultaneously (to avoid a storm of
|
|
1163
|
-
// tiny writes).
|
|
1164
|
-
assert(zone == .wal_headers or zone == .wal_prepares);
|
|
1165
|
-
assert(size % constants.sector_size == 0);
|
|
1166
|
-
return null;
|
|
1167
|
-
}
|
|
1168
|
-
|
|
1169
|
-
const chunks_faulty = &chunks.faulty[replica_index];
|
|
1170
|
-
const chunk_count = chunks_faulty.bit_length;
|
|
1171
|
-
const chunk_start = prng.int_inclusive(usize, chunk_count - 1);
|
|
1172
|
-
for (0..chunk_count) |i| {
|
|
1173
|
-
const chunk_index = (chunk_start + i) % chunk_count;
|
|
1174
|
-
if (chunks_faulty.isSet(chunk_index)) {
|
|
1175
|
-
// The chunk size of zone=wal_prepares is a multiple of the message_size_max, but
|
|
1176
|
-
// misdirects in the wal_prepares zone always land on the first message of a chunk.
|
|
1177
|
-
return chunk_index * chunks.chunk_size;
|
|
1178
|
-
}
|
|
1179
|
-
}
|
|
1180
|
-
return null;
|
|
1181
|
-
}
|
|
1182
|
-
|
|
1183
|
-
fn faulty_sector(
|
|
1184
|
-
atlas: *const ClusterFaultAtlas,
|
|
1185
|
-
prng: *stdx.PRNG,
|
|
1186
|
-
replica_index: u8,
|
|
1187
|
-
zone: vsr.Zone,
|
|
1188
|
-
offset_in_zone: u64,
|
|
1189
|
-
size: u64,
|
|
1190
|
-
) ?usize {
|
|
1191
|
-
const chunks = atlas.zone_chunks(zone) orelse return null;
|
|
1192
|
-
|
|
1193
|
-
var fault_start: ?usize = null;
|
|
1194
|
-
var fault_count: usize = 0;
|
|
1195
|
-
|
|
1196
|
-
var chunk: usize = @divFloor(offset_in_zone, chunks.chunk_size);
|
|
1197
|
-
while (chunk * chunks.chunk_size < offset_in_zone + size) : (chunk += 1) {
|
|
1198
|
-
if (chunks.faulty[replica_index].isSet(chunk)) {
|
|
1199
|
-
if (fault_start == null) fault_start = chunk;
|
|
1200
|
-
fault_count += 1;
|
|
1201
|
-
} else {
|
|
1202
|
-
if (fault_start != null) break;
|
|
1203
|
-
}
|
|
1204
|
-
}
|
|
1205
|
-
|
|
1206
|
-
if (fault_start) |start| {
|
|
1207
|
-
return SectorRange.from_zone(
|
|
1208
|
-
zone,
|
|
1209
|
-
chunks.chunk_size * start,
|
|
1210
|
-
chunks.chunk_size * fault_count,
|
|
1211
|
-
).intersect(SectorRange.from_zone(zone, offset_in_zone, size)).?.random(prng);
|
|
1212
|
-
} else {
|
|
1213
|
-
return null;
|
|
1214
|
-
}
|
|
1215
|
-
}
|
|
1216
|
-
};
|
|
1217
|
-
|
|
1218
|
-
const StackTrace = struct {
|
|
1219
|
-
addresses: [64]usize,
|
|
1220
|
-
index: usize,
|
|
1221
|
-
|
|
1222
|
-
fn capture() StackTrace {
|
|
1223
|
-
var addresses: [64]usize = undefined;
|
|
1224
|
-
var stack_trace = std.builtin.StackTrace{
|
|
1225
|
-
.instruction_addresses = &addresses,
|
|
1226
|
-
.index = 0,
|
|
1227
|
-
};
|
|
1228
|
-
std.debug.captureStackTrace(null, &stack_trace);
|
|
1229
|
-
return StackTrace{ .addresses = addresses, .index = stack_trace.index };
|
|
1230
|
-
}
|
|
1231
|
-
|
|
1232
|
-
pub fn format(
|
|
1233
|
-
self: StackTrace,
|
|
1234
|
-
comptime fmt: []const u8,
|
|
1235
|
-
options: std.fmt.FormatOptions,
|
|
1236
|
-
writer: anytype,
|
|
1237
|
-
) !void {
|
|
1238
|
-
_ = fmt;
|
|
1239
|
-
_ = options;
|
|
1240
|
-
var addresses = self.addresses;
|
|
1241
|
-
const stack_trace = std.builtin.StackTrace{
|
|
1242
|
-
.instruction_addresses = &addresses,
|
|
1243
|
-
.index = self.index,
|
|
1244
|
-
};
|
|
1245
|
-
try writer.print("{}", .{stack_trace});
|
|
1246
|
-
}
|
|
1247
|
-
};
|