tigerbeetle 0.0.34 → 0.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/tb_client/extconf.rb +13 -13
- data/ext/tb_client/tigerbeetle/LICENSE +177 -0
- data/ext/tb_client/tigerbeetle/build.zig +2327 -0
- data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
- data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
- data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1084 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
- data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
- data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
- data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
- data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
- data/ext/tb_client/tigerbeetle/src/direction.zig +11 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
- data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
- data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
- data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
- data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
- data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
- data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
- data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
- data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
- data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
- data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
- data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
- data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
- data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
- data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +362 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +1036 -0
- data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
- data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
- data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +557 -0
- data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
- data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
- data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
- data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
- data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
- data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
- data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
- data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
- data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
- data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
- data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
- data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
- data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
- data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
- data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
- data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
- data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
- data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
- data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
- data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
- data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
- data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
- data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
- data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
- data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
- data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
- data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
- data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
- data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
- data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
- data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
- data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
- data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
- data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
- data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
- data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
- data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
- data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
- data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
- data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
- data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
- data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
- data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
- data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
- data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
- data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
- data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
- data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
- data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
- data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
- data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
- data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
- data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
- data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
- data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
- data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
- data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2910 -0
- data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
- data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
- data/lib/tb_client/shared_lib.rb +12 -5
- data/lib/tigerbeetle/client.rb +1 -1
- data/lib/tigerbeetle/platforms.rb +9 -0
- data/lib/tigerbeetle/version.rb +2 -2
- data/tigerbeetle.gemspec +22 -5
- metadata +242 -3
- data/ext/tb_client/pkg.tar.gz +0 -0
|
@@ -0,0 +1,754 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const mem = std.mem;
|
|
3
|
+
const math = std.math;
|
|
4
|
+
const assert = std.debug.assert;
|
|
5
|
+
const log = std.log.scoped(.manifest);
|
|
6
|
+
|
|
7
|
+
const stdx = @import("stdx");
|
|
8
|
+
const constants = @import("../constants.zig");
|
|
9
|
+
const growth_factor = constants.lsm_growth_factor;
|
|
10
|
+
|
|
11
|
+
const vsr = @import("../vsr.zig");
|
|
12
|
+
const table_count_max_tree = @import("tree.zig").table_count_max;
|
|
13
|
+
const table_count_max_for_level = @import("tree.zig").table_count_max_for_level;
|
|
14
|
+
const snapshot_latest = @import("tree.zig").snapshot_latest;
|
|
15
|
+
const schema = @import("schema.zig");
|
|
16
|
+
|
|
17
|
+
const TreeConfig = @import("tree.zig").TreeConfig;
|
|
18
|
+
const Direction = @import("../direction.zig").Direction;
|
|
19
|
+
const ManifestLogType = @import("manifest_log.zig").ManifestLogType;
|
|
20
|
+
const ManifestLevelType = @import("manifest_level.zig").ManifestLevelType;
|
|
21
|
+
const NodePool = @import("node_pool.zig").NodePoolType(constants.lsm_manifest_node_size, 16);
|
|
22
|
+
const TableInfo = schema.ManifestNode.TableInfo;
|
|
23
|
+
const Tracer = vsr.trace.Tracer;
|
|
24
|
+
|
|
25
|
+
pub fn TreeTableInfoType(comptime Table: type) type {
|
|
26
|
+
const Key = Table.Key;
|
|
27
|
+
|
|
28
|
+
return struct {
|
|
29
|
+
const TreeTableInfo = @This();
|
|
30
|
+
|
|
31
|
+
/// Checksum of the table's index block.
|
|
32
|
+
checksum: u128,
|
|
33
|
+
/// Address of the table's index block.
|
|
34
|
+
address: u64,
|
|
35
|
+
|
|
36
|
+
/// The minimum snapshot that can see this table (with inclusive bounds).
|
|
37
|
+
/// - This value is set to the current snapshot tick on table creation.
|
|
38
|
+
snapshot_min: u64,
|
|
39
|
+
|
|
40
|
+
/// The maximum snapshot that can see this table (with inclusive bounds).
|
|
41
|
+
/// - This value is set to maxInt(64) when the table is created (output) by compaction.
|
|
42
|
+
/// - This value is set to the current snapshot tick when the table is processed (input) by
|
|
43
|
+
/// compaction.
|
|
44
|
+
snapshot_max: u64 = math.maxInt(u64),
|
|
45
|
+
|
|
46
|
+
key_min: Key, // Inclusive.
|
|
47
|
+
key_max: Key, // Inclusive.
|
|
48
|
+
|
|
49
|
+
/// The number of values this table has. Tables aren't always full, so being able to know
|
|
50
|
+
/// ahead of time how many values they have helps with compaction pacing.
|
|
51
|
+
value_count: u32,
|
|
52
|
+
|
|
53
|
+
/// Every query targets a particular snapshot. The snapshot determines which tables are
|
|
54
|
+
/// visible to the query — i.e., which tables are accessed to answer the query.
|
|
55
|
+
///
|
|
56
|
+
/// A table is "visible" to a snapshot if the snapshot lies within the table's
|
|
57
|
+
/// snapshot_min/snapshot_max interval.
|
|
58
|
+
///
|
|
59
|
+
/// Snapshot visibility is:
|
|
60
|
+
/// - inclusive to snapshot_min.
|
|
61
|
+
/// (New tables are inserted with `snapshot_min = compaction.snapshot + 1`).
|
|
62
|
+
/// - inclusive to snapshot_max.
|
|
63
|
+
/// (Tables are made invisible by setting `snapshot_max = compaction.snapshot`).
|
|
64
|
+
///
|
|
65
|
+
/// Prefetch does not query the output tables of an ongoing compaction, because the output
|
|
66
|
+
/// tables are not ready. Output tables are added to the manifest before being written to
|
|
67
|
+
/// disk.
|
|
68
|
+
///
|
|
69
|
+
/// Instead, prefetch will continue to query the compaction's input tables until the
|
|
70
|
+
/// half-bar of compaction completes. At that point `tree.prefetch_snapshot_max` is
|
|
71
|
+
/// updated (to the compaction's `compaction_op`), simultaneously rendering the old (input)
|
|
72
|
+
/// tables invisible, and the new (output) tables visible.
|
|
73
|
+
pub fn visible(table: *const TreeTableInfo, snapshot: u64) bool {
|
|
74
|
+
assert(table.address != 0);
|
|
75
|
+
assert(table.snapshot_min <= table.snapshot_max);
|
|
76
|
+
assert(snapshot <= snapshot_latest);
|
|
77
|
+
|
|
78
|
+
return table.snapshot_min <= snapshot and snapshot <= table.snapshot_max;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
pub fn invisible(table: *const TreeTableInfo, snapshots: []const u64) bool {
|
|
82
|
+
// Return early and do not iterate all snapshots if the table was never deleted:
|
|
83
|
+
if (table.visible(snapshot_latest)) return false;
|
|
84
|
+
for (snapshots) |snapshot| if (table.visible(snapshot)) return false;
|
|
85
|
+
assert(table.snapshot_max < math.maxInt(u64));
|
|
86
|
+
return true;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
pub fn equal(table: *const TreeTableInfo, other: *const TreeTableInfo) bool {
|
|
90
|
+
return table.checksum == other.checksum and
|
|
91
|
+
table.address == other.address and
|
|
92
|
+
table.snapshot_min == other.snapshot_min and
|
|
93
|
+
table.snapshot_max == other.snapshot_max and
|
|
94
|
+
table.key_min == other.key_min and
|
|
95
|
+
table.key_max == other.key_max and
|
|
96
|
+
table.value_count == other.value_count;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
pub fn decode(table: *const TableInfo) TreeTableInfo {
|
|
100
|
+
assert(table.tree_id > 0);
|
|
101
|
+
assert(stdx.zeroed(&table.reserved));
|
|
102
|
+
assert(table.value_count > 0);
|
|
103
|
+
|
|
104
|
+
const key_min = std.mem.bytesAsValue(Key, table.key_min[0..@sizeOf(Key)]);
|
|
105
|
+
const key_max = std.mem.bytesAsValue(Key, table.key_max[0..@sizeOf(Key)]);
|
|
106
|
+
|
|
107
|
+
assert(key_min.* <= key_max.*);
|
|
108
|
+
assert(stdx.zeroed(table.key_min[@sizeOf(Key)..]));
|
|
109
|
+
assert(stdx.zeroed(table.key_max[@sizeOf(Key)..]));
|
|
110
|
+
|
|
111
|
+
return .{
|
|
112
|
+
.checksum = table.checksum,
|
|
113
|
+
.address = table.address,
|
|
114
|
+
.snapshot_min = table.snapshot_min,
|
|
115
|
+
.snapshot_max = table.snapshot_max,
|
|
116
|
+
.key_min = key_min.*,
|
|
117
|
+
.key_max = key_max.*,
|
|
118
|
+
.value_count = table.value_count,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
pub fn encode(table: *const TreeTableInfo, options: struct {
|
|
123
|
+
tree_id: u16,
|
|
124
|
+
level: u6,
|
|
125
|
+
event: schema.ManifestNode.Event,
|
|
126
|
+
}) TableInfo {
|
|
127
|
+
assert(options.tree_id > 0);
|
|
128
|
+
assert(table.value_count > 0);
|
|
129
|
+
|
|
130
|
+
var key_min: TableInfo.KeyPadded = @splat(0);
|
|
131
|
+
var key_max: TableInfo.KeyPadded = @splat(0);
|
|
132
|
+
|
|
133
|
+
stdx.copy_disjoint(.inexact, u8, &key_min, std.mem.asBytes(&table.key_min));
|
|
134
|
+
stdx.copy_disjoint(.inexact, u8, &key_max, std.mem.asBytes(&table.key_max));
|
|
135
|
+
|
|
136
|
+
return .{
|
|
137
|
+
.checksum = table.checksum,
|
|
138
|
+
.address = table.address,
|
|
139
|
+
.snapshot_min = table.snapshot_min,
|
|
140
|
+
.snapshot_max = table.snapshot_max,
|
|
141
|
+
.tree_id = options.tree_id,
|
|
142
|
+
.key_min = key_min,
|
|
143
|
+
.key_max = key_max,
|
|
144
|
+
.value_count = table.value_count,
|
|
145
|
+
.label = .{
|
|
146
|
+
.level = options.level,
|
|
147
|
+
.event = options.event,
|
|
148
|
+
},
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
pub fn ManifestType(comptime Table: type, comptime Storage: type) type {
|
|
155
|
+
const Key = Table.Key;
|
|
156
|
+
|
|
157
|
+
return struct {
|
|
158
|
+
const Manifest = @This();
|
|
159
|
+
|
|
160
|
+
pub const TreeTableInfo = TreeTableInfoType(Table);
|
|
161
|
+
pub const LevelIterator = Level.Iterator;
|
|
162
|
+
pub const TableInfoReference = Level.TableInfoReference;
|
|
163
|
+
pub const KeyRange = Level.KeyRange;
|
|
164
|
+
pub const ManifestLog = ManifestLogType(Storage);
|
|
165
|
+
pub const Level =
|
|
166
|
+
ManifestLevelType(NodePool, Key, TreeTableInfo, table_count_max_tree);
|
|
167
|
+
|
|
168
|
+
const CompactionTableRange = struct {
|
|
169
|
+
table_a: TableInfoReference,
|
|
170
|
+
range_b: CompactionRange,
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
pub const CompactionRange = struct {
|
|
174
|
+
/// The minimum key across both levels.
|
|
175
|
+
key_min: Key,
|
|
176
|
+
/// The maximum key across both levels.
|
|
177
|
+
key_max: Key,
|
|
178
|
+
// References to tables in level B that intersect with the chosen table in level A.
|
|
179
|
+
tables: stdx.BoundedArrayType(TableInfoReference, constants.lsm_growth_factor),
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
node_pool: *NodePool,
|
|
183
|
+
config: TreeConfig,
|
|
184
|
+
/// manifest_log is lazily initialized rather than passed into init() because the Forest
|
|
185
|
+
/// needs it for @fieldParentPtr().
|
|
186
|
+
manifest_log: ?*ManifestLog = null,
|
|
187
|
+
|
|
188
|
+
levels: [constants.lsm_levels]Level,
|
|
189
|
+
|
|
190
|
+
// TODO Set this at startup when reading in the manifest.
|
|
191
|
+
// This should be the greatest TableInfo.snapshot_min/snapshot_max (if deleted) or
|
|
192
|
+
// registered snapshot seen so far.
|
|
193
|
+
snapshot_max: u64 = 1,
|
|
194
|
+
|
|
195
|
+
tracer: *Tracer,
|
|
196
|
+
|
|
197
|
+
pub fn init(
|
|
198
|
+
manifest: *Manifest,
|
|
199
|
+
allocator: mem.Allocator,
|
|
200
|
+
node_pool: *NodePool,
|
|
201
|
+
config: TreeConfig,
|
|
202
|
+
tracer: *Tracer,
|
|
203
|
+
) !void {
|
|
204
|
+
manifest.* = .{
|
|
205
|
+
.node_pool = node_pool,
|
|
206
|
+
.config = config,
|
|
207
|
+
|
|
208
|
+
.levels = undefined,
|
|
209
|
+
.tracer = tracer,
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
for (&manifest.levels, 0..) |*level, i| {
|
|
213
|
+
errdefer for (manifest.levels[0..i]) |*l| l.deinit(allocator, node_pool);
|
|
214
|
+
try level.init(allocator, node_pool);
|
|
215
|
+
}
|
|
216
|
+
errdefer for (&manifest.levels) |*level| level.deinit(allocator, node_pool);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
pub fn deinit(manifest: *Manifest, allocator: mem.Allocator) void {
|
|
220
|
+
for (&manifest.levels) |*level| level.deinit(allocator, manifest.node_pool);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
pub fn reset(manifest: *Manifest) void {
|
|
224
|
+
for (&manifest.levels) |*level| level.reset(manifest.node_pool);
|
|
225
|
+
|
|
226
|
+
manifest.* = .{
|
|
227
|
+
.node_pool = manifest.node_pool,
|
|
228
|
+
.config = manifest.config,
|
|
229
|
+
.levels = manifest.levels,
|
|
230
|
+
.tracer = manifest.tracer,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
pub fn open_commence(manifest: *Manifest, manifest_log: *ManifestLog) void {
|
|
235
|
+
assert(manifest.manifest_log == null);
|
|
236
|
+
assert(!manifest_log.opened);
|
|
237
|
+
|
|
238
|
+
manifest.manifest_log = manifest_log;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
pub fn insert_table(
|
|
242
|
+
manifest: *Manifest,
|
|
243
|
+
level: u8,
|
|
244
|
+
table: *const TreeTableInfo,
|
|
245
|
+
) void {
|
|
246
|
+
const manifest_level = &manifest.levels[level];
|
|
247
|
+
if (constants.verify) {
|
|
248
|
+
assert(!manifest_level.contains(table));
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
manifest_level.insert_table(manifest.node_pool, table);
|
|
252
|
+
|
|
253
|
+
// Append insert changes to the manifest log.
|
|
254
|
+
manifest.manifest_log.?.append(&table.encode(.{
|
|
255
|
+
.tree_id = manifest.config.id,
|
|
256
|
+
.event = .insert,
|
|
257
|
+
.level = @intCast(level),
|
|
258
|
+
}));
|
|
259
|
+
|
|
260
|
+
if (constants.verify) {
|
|
261
|
+
assert(manifest_level.contains(table));
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/// Updates the snapshot_max on the provided table for the given level.
|
|
266
|
+
pub fn update_table(
|
|
267
|
+
manifest: *Manifest,
|
|
268
|
+
level: u8,
|
|
269
|
+
snapshot: u64,
|
|
270
|
+
table_ref: TableInfoReference,
|
|
271
|
+
) void {
|
|
272
|
+
assert(manifest.manifest_log.?.opened);
|
|
273
|
+
const manifest_level = &manifest.levels[level];
|
|
274
|
+
|
|
275
|
+
var table = table_ref.table_info;
|
|
276
|
+
if (constants.verify) {
|
|
277
|
+
assert(manifest_level.contains(table));
|
|
278
|
+
}
|
|
279
|
+
assert(table.snapshot_max >= snapshot);
|
|
280
|
+
assert(table.snapshot_min <= snapshot);
|
|
281
|
+
manifest_level.set_snapshot_max(snapshot, table_ref);
|
|
282
|
+
assert(table.snapshot_max == snapshot);
|
|
283
|
+
|
|
284
|
+
// Append update changes to the manifest log.
|
|
285
|
+
manifest.manifest_log.?.append(&table.encode(.{
|
|
286
|
+
.tree_id = manifest.config.id,
|
|
287
|
+
.event = .update,
|
|
288
|
+
.level = @intCast(level),
|
|
289
|
+
}));
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
pub fn move_table(
|
|
293
|
+
manifest: *Manifest,
|
|
294
|
+
level_a: u8,
|
|
295
|
+
level_b: u8,
|
|
296
|
+
table: *const TreeTableInfo,
|
|
297
|
+
) void {
|
|
298
|
+
assert(manifest.manifest_log.?.opened);
|
|
299
|
+
assert(level_b == level_a + 1);
|
|
300
|
+
assert(level_b < constants.lsm_levels);
|
|
301
|
+
assert(table.visible(snapshot_latest));
|
|
302
|
+
|
|
303
|
+
const manifest_level_a = &manifest.levels[level_a];
|
|
304
|
+
const manifest_level_b = &manifest.levels[level_b];
|
|
305
|
+
|
|
306
|
+
if (constants.verify) {
|
|
307
|
+
assert(manifest_level_a.contains(table));
|
|
308
|
+
assert(!manifest_level_b.contains(table));
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// First, remove the table from level A without appending changes to the manifest log.
|
|
312
|
+
manifest_level_a.remove_table(manifest.node_pool, table);
|
|
313
|
+
|
|
314
|
+
// Then, insert the table into level B and append these changes to the manifest log.
|
|
315
|
+
// To move a table w.r.t manifest log, a "remove" change should NOT be appended for
|
|
316
|
+
// the previous level A; When replaying the log from open(), events are processed in
|
|
317
|
+
// LIFO order and duplicates are ignored. This means the table will only be replayed in
|
|
318
|
+
// level B instead of the old one in level A.
|
|
319
|
+
manifest_level_b.insert_table(manifest.node_pool, table);
|
|
320
|
+
manifest.manifest_log.?.append(&table.encode(.{
|
|
321
|
+
.tree_id = manifest.config.id,
|
|
322
|
+
.event = .update,
|
|
323
|
+
.level = @intCast(level_b),
|
|
324
|
+
}));
|
|
325
|
+
|
|
326
|
+
if (constants.verify) {
|
|
327
|
+
assert(!manifest_level_a.contains(table));
|
|
328
|
+
assert(manifest_level_b.contains(table));
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/// Returns the key range spanned by all ManifestLevels.
|
|
333
|
+
pub fn key_range(manifest: *Manifest) ?KeyRange {
|
|
334
|
+
assert(manifest.manifest_log.?.opened);
|
|
335
|
+
|
|
336
|
+
var manifest_range: ?KeyRange = null;
|
|
337
|
+
for (&manifest.levels) |*level| {
|
|
338
|
+
if (level.key_range_latest.key_range) |level_range| {
|
|
339
|
+
if (manifest_range) |*range| {
|
|
340
|
+
if (level_range.key_min < range.key_min) {
|
|
341
|
+
range.key_min = level_range.key_min;
|
|
342
|
+
}
|
|
343
|
+
if (level_range.key_max > range.key_max) {
|
|
344
|
+
range.key_max = level_range.key_max;
|
|
345
|
+
}
|
|
346
|
+
} else {
|
|
347
|
+
manifest_range = level_range;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
return manifest_range;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
pub fn remove_invisible_tables(
|
|
355
|
+
manifest: *Manifest,
|
|
356
|
+
level: u8,
|
|
357
|
+
snapshots: []const u64,
|
|
358
|
+
key_min: Key,
|
|
359
|
+
key_max: Key,
|
|
360
|
+
) void {
|
|
361
|
+
assert(manifest.manifest_log.?.opened);
|
|
362
|
+
assert(level < constants.lsm_levels);
|
|
363
|
+
assert(key_min <= key_max);
|
|
364
|
+
|
|
365
|
+
// Remove tables in descending order to avoid desynchronizing the iterator from
|
|
366
|
+
// the ManifestLevel.
|
|
367
|
+
const direction = .descending;
|
|
368
|
+
const manifest_level = &manifest.levels[level];
|
|
369
|
+
|
|
370
|
+
var it = manifest_level.iterator(
|
|
371
|
+
.invisible,
|
|
372
|
+
snapshots,
|
|
373
|
+
direction,
|
|
374
|
+
KeyRange{ .key_min = key_min, .key_max = key_max },
|
|
375
|
+
);
|
|
376
|
+
|
|
377
|
+
while (it.next()) |table_pointer| {
|
|
378
|
+
// Copy the table onto the stack: `remove_table()` doesn't allow pointers into
|
|
379
|
+
// SegmentedArray memory since it invalidates them.
|
|
380
|
+
const table: TreeTableInfo = table_pointer.*;
|
|
381
|
+
assert(table.snapshot_max < snapshot_latest);
|
|
382
|
+
assert(table.invisible(snapshots));
|
|
383
|
+
assert(key_min <= table.key_max);
|
|
384
|
+
assert(table.key_min <= key_max);
|
|
385
|
+
|
|
386
|
+
// Append remove changes to the manifest log and purge from memory (ManifestLevel):
|
|
387
|
+
manifest.manifest_log.?.append(&table.encode(.{
|
|
388
|
+
.tree_id = manifest.config.id,
|
|
389
|
+
.event = .remove,
|
|
390
|
+
.level = @intCast(level),
|
|
391
|
+
}));
|
|
392
|
+
manifest_level.remove_table(manifest.node_pool, &table);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
if (constants.verify) manifest.assert_no_invisible_tables_at_level(level, snapshots);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/// Returns an iterator over the tables visible to `snapshot` that may contain `key`
|
|
399
|
+
/// (but are not guaranteed to), across all levels > `level_min`.
|
|
400
|
+
pub fn lookup(manifest: *Manifest, snapshot: u64, key: Key, level_min: u8) LookupIterator {
|
|
401
|
+
return .{
|
|
402
|
+
.manifest = manifest,
|
|
403
|
+
.snapshot = snapshot,
|
|
404
|
+
.key = key,
|
|
405
|
+
.level = level_min,
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
pub const LookupIterator = struct {
|
|
410
|
+
manifest: *const Manifest,
|
|
411
|
+
snapshot: u64,
|
|
412
|
+
key: Key,
|
|
413
|
+
level: u8,
|
|
414
|
+
inner: ?Level.Iterator = null,
|
|
415
|
+
|
|
416
|
+
pub fn next(it: *LookupIterator) ?*const TreeTableInfo {
|
|
417
|
+
while (it.level < constants.lsm_levels) : (it.level += 1) {
|
|
418
|
+
const level = &it.manifest.levels[it.level];
|
|
419
|
+
if (!level.key_range_contains(it.snapshot, it.key)) continue;
|
|
420
|
+
|
|
421
|
+
var inner = level.iterator(
|
|
422
|
+
.visible,
|
|
423
|
+
@as(*const [1]u64, &it.snapshot),
|
|
424
|
+
.ascending,
|
|
425
|
+
KeyRange{ .key_min = it.key, .key_max = it.key },
|
|
426
|
+
);
|
|
427
|
+
|
|
428
|
+
if (inner.next()) |table| {
|
|
429
|
+
assert(table.visible(it.snapshot));
|
|
430
|
+
assert(table.key_min <= it.key);
|
|
431
|
+
assert(it.key <= table.key_max);
|
|
432
|
+
assert(inner.next() == null);
|
|
433
|
+
|
|
434
|
+
it.level += 1;
|
|
435
|
+
return table;
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
assert(it.level == constants.lsm_levels);
|
|
440
|
+
return null;
|
|
441
|
+
}
|
|
442
|
+
};
|
|
443
|
+
|
|
444
|
+
pub fn assert_level_table_counts(manifest: *const Manifest) void {
|
|
445
|
+
var table_count_visible: u32 = 0;
|
|
446
|
+
var table_count_visible_max: u32 = 0;
|
|
447
|
+
for (&manifest.levels, 0..) |*manifest_level, index| {
|
|
448
|
+
const level: u8 = @intCast(index);
|
|
449
|
+
const level_table_count_visible_max =
|
|
450
|
+
table_count_max_for_level(growth_factor, level);
|
|
451
|
+
assert(manifest_level.table_count_visible <= level_table_count_visible_max);
|
|
452
|
+
|
|
453
|
+
table_count_visible += manifest_level.table_count_visible;
|
|
454
|
+
table_count_visible_max += level_table_count_visible_max;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
manifest.tracer.gauge(
|
|
458
|
+
.{ .table_count_visible = .{ .tree = @enumFromInt(manifest.config.id) } },
|
|
459
|
+
table_count_visible,
|
|
460
|
+
);
|
|
461
|
+
manifest.tracer.gauge(
|
|
462
|
+
.{ .table_count_visible_max = .{ .tree = @enumFromInt(manifest.config.id) } },
|
|
463
|
+
table_count_visible_max,
|
|
464
|
+
);
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
pub fn assert_no_invisible_tables(manifest: *const Manifest, snapshots: []const u64) void {
|
|
468
|
+
for (manifest.levels, 0..) |_, level| {
|
|
469
|
+
manifest.assert_no_invisible_tables_at_level(@intCast(level), snapshots);
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
fn assert_no_invisible_tables_at_level(
|
|
474
|
+
manifest: *const Manifest,
|
|
475
|
+
level: u8,
|
|
476
|
+
snapshots: []const u64,
|
|
477
|
+
) void {
|
|
478
|
+
var it = manifest.levels[level].iterator(.invisible, snapshots, .ascending, null);
|
|
479
|
+
assert(it.next() == null);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/// Returns the next table in the range, after `key_exclusive` if provided.
|
|
483
|
+
///
|
|
484
|
+
/// * The table returned is visible to `snapshot`.
|
|
485
|
+
pub fn next_table(manifest: *const Manifest, parameters: struct {
|
|
486
|
+
level: u8,
|
|
487
|
+
snapshot: u64,
|
|
488
|
+
key_min: Key,
|
|
489
|
+
key_max: Key,
|
|
490
|
+
key_exclusive: ?Key,
|
|
491
|
+
direction: Direction,
|
|
492
|
+
}) ?*const TreeTableInfo {
|
|
493
|
+
assert(parameters.level < constants.lsm_levels);
|
|
494
|
+
assert(parameters.key_min <= parameters.key_max);
|
|
495
|
+
|
|
496
|
+
const table_info_reference = manifest.levels[parameters.level].next_table(.{
|
|
497
|
+
.snapshot = parameters.snapshot,
|
|
498
|
+
.key_min = parameters.key_min,
|
|
499
|
+
.key_max = parameters.key_max,
|
|
500
|
+
.key_exclusive = parameters.key_exclusive,
|
|
501
|
+
.direction = parameters.direction,
|
|
502
|
+
}) orelse return null;
|
|
503
|
+
|
|
504
|
+
return table_info_reference.table_info;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/// Returns the most optimal table from a level that is due for compaction.
|
|
508
|
+
/// The optimal compaction table is one that overlaps with the least number
|
|
509
|
+
/// of tables in the next level.
|
|
510
|
+
/// Returns null if the level is not due for compaction (table_count_visible < count_max).
|
|
511
|
+
pub fn compaction_table(manifest: *const Manifest, level_a: u8) ?CompactionTableRange {
|
|
512
|
+
// The last level is not compacted into another.
|
|
513
|
+
assert(level_a < constants.lsm_levels - 1);
|
|
514
|
+
|
|
515
|
+
const table_count_visible_max = table_count_max_for_level(growth_factor, level_a);
|
|
516
|
+
assert(table_count_visible_max > 0);
|
|
517
|
+
|
|
518
|
+
const manifest_level_a: *const Level = &manifest.levels[level_a];
|
|
519
|
+
const manifest_level_b: *const Level = &manifest.levels[level_a + 1];
|
|
520
|
+
|
|
521
|
+
// If even levels are compacted ahead of odd levels, then odd levels may burst.
|
|
522
|
+
assert(manifest_level_a.table_count_visible <= table_count_visible_max + 1);
|
|
523
|
+
if (manifest_level_a.table_count_visible < table_count_visible_max) return null;
|
|
524
|
+
|
|
525
|
+
const least_overlap_table = manifest_level_a.table_with_least_overlap(
|
|
526
|
+
manifest_level_b,
|
|
527
|
+
snapshot_latest,
|
|
528
|
+
growth_factor,
|
|
529
|
+
) orelse return null;
|
|
530
|
+
assert(least_overlap_table.range.tables.count() <= growth_factor);
|
|
531
|
+
|
|
532
|
+
const compaction_table_range = CompactionTableRange{
|
|
533
|
+
.table_a = least_overlap_table.table,
|
|
534
|
+
.range_b = CompactionRange{
|
|
535
|
+
.key_min = least_overlap_table.range.key_min,
|
|
536
|
+
.key_max = least_overlap_table.range.key_max,
|
|
537
|
+
.tables = least_overlap_table.range.tables,
|
|
538
|
+
},
|
|
539
|
+
};
|
|
540
|
+
return compaction_table_range;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
/// Returns the smallest visible range of tables across the immutable table
|
|
544
|
+
/// and Level 0 that overlaps with the given key range: [key_min, key_max].
|
|
545
|
+
pub fn immutable_table_compaction_range(
|
|
546
|
+
manifest: *const Manifest,
|
|
547
|
+
key_min: Key,
|
|
548
|
+
key_max: Key,
|
|
549
|
+
options: struct { value_count: u32 },
|
|
550
|
+
) CompactionRange {
|
|
551
|
+
assert(key_min <= key_max);
|
|
552
|
+
assert(options.value_count > 0);
|
|
553
|
+
assert(options.value_count <= Table.value_count_max);
|
|
554
|
+
|
|
555
|
+
const level_b = 0;
|
|
556
|
+
const manifest_level: *const Level = &manifest.levels[level_b];
|
|
557
|
+
assert(manifest_level.table_count_visible <= growth_factor);
|
|
558
|
+
|
|
559
|
+
// We are guaranteed to get a non-null range because Level 0 has
|
|
560
|
+
// lsm_growth_factor number of tables, so the number of tables that intersect
|
|
561
|
+
// with the immutable table can be no more than lsm_growth_factor.
|
|
562
|
+
const range_overlap = manifest_level.tables_overlapping_with_key_range(
|
|
563
|
+
key_min,
|
|
564
|
+
key_max,
|
|
565
|
+
snapshot_latest,
|
|
566
|
+
growth_factor,
|
|
567
|
+
).?;
|
|
568
|
+
|
|
569
|
+
// Attempt to coalesce with adjacent tables in level 0.
|
|
570
|
+
const range_coalesced = range: {
|
|
571
|
+
const value_count_target = stdx.div_ceil((Table.value_count_max *
|
|
572
|
+
constants.lsm_table_coalescing_threshold_percent), 100);
|
|
573
|
+
assert(value_count_target > 1);
|
|
574
|
+
assert(value_count_target < Table.value_count_max);
|
|
575
|
+
|
|
576
|
+
var value_count_output: u32 = options.value_count;
|
|
577
|
+
for (range_overlap.tables.const_slice()) |*table| {
|
|
578
|
+
value_count_output += table.table_info.value_count;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// Set to true when we encounter a coalesce-able table that is small enough to
|
|
582
|
+
// warrant coalescing.
|
|
583
|
+
var coalesced_small_table: bool = value_count_output < value_count_target;
|
|
584
|
+
|
|
585
|
+
var range = range_overlap;
|
|
586
|
+
outer: for ([_]Direction{ .descending, .ascending }) |direction| {
|
|
587
|
+
inner: for (0..constants.lsm_growth_factor) |_| {
|
|
588
|
+
if (range.tables.full()) break :outer;
|
|
589
|
+
if (value_count_output >= value_count_target) break :outer;
|
|
590
|
+
|
|
591
|
+
const table_next = manifest_level.next_table(.{
|
|
592
|
+
.snapshot = snapshot_latest,
|
|
593
|
+
.key_min = 0,
|
|
594
|
+
.key_max = std.math.maxInt(Key),
|
|
595
|
+
.key_exclusive = switch (direction) {
|
|
596
|
+
.descending => range.key_min,
|
|
597
|
+
.ascending => range.key_max,
|
|
598
|
+
},
|
|
599
|
+
.direction = direction,
|
|
600
|
+
}) orelse break :inner;
|
|
601
|
+
|
|
602
|
+
const table_next_value_count = table_next.table_info.value_count;
|
|
603
|
+
assert(table_next_value_count > 0);
|
|
604
|
+
|
|
605
|
+
if (value_count_output + table_next_value_count <= Table.value_count_max) {
|
|
606
|
+
value_count_output += table_next_value_count;
|
|
607
|
+
coalesced_small_table = coalesced_small_table or
|
|
608
|
+
table_next.table_info.value_count < value_count_target;
|
|
609
|
+
|
|
610
|
+
switch (direction) {
|
|
611
|
+
.descending => range.key_min = table_next.table_info.key_min,
|
|
612
|
+
.ascending => range.key_max = table_next.table_info.key_max,
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
switch (direction) {
|
|
616
|
+
.descending => range.tables.insert_at(0, table_next),
|
|
617
|
+
.ascending => range.tables.push(table_next),
|
|
618
|
+
}
|
|
619
|
+
} else {
|
|
620
|
+
break :inner;
|
|
621
|
+
}
|
|
622
|
+
} else unreachable;
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
if (range.tables.count() != range_overlap.tables.count() and
|
|
626
|
+
coalesced_small_table)
|
|
627
|
+
{
|
|
628
|
+
break :range range;
|
|
629
|
+
} else {
|
|
630
|
+
// None of the tables benefit much from coalescing, so just use the overlap.
|
|
631
|
+
break :range null;
|
|
632
|
+
}
|
|
633
|
+
};
|
|
634
|
+
|
|
635
|
+
if (range_coalesced) |range| {
|
|
636
|
+
log.debug("{}: {s}: manifest: coalesced with {} adjacent tables", .{
|
|
637
|
+
manifest.manifest_log.?.grid.superblock.replica_index.?,
|
|
638
|
+
manifest.config.name,
|
|
639
|
+
range.tables.count() - range_overlap.tables.count(),
|
|
640
|
+
});
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
const range = range_coalesced orelse range_overlap;
|
|
644
|
+
assert(range.tables.count() >= range_overlap.tables.count());
|
|
645
|
+
assert(range.key_min <= range.key_max);
|
|
646
|
+
assert(range.key_min <= key_min);
|
|
647
|
+
assert(key_max <= range.key_max);
|
|
648
|
+
|
|
649
|
+
if (range.tables.count() > 1) {
|
|
650
|
+
for (
|
|
651
|
+
range.tables.const_slice()[0 .. range.tables.count() - 1],
|
|
652
|
+
range.tables.const_slice()[1..],
|
|
653
|
+
) |a, b| {
|
|
654
|
+
assert(a.table_info.key_max < b.table_info.key_min);
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
return .{
|
|
659
|
+
.key_min = range.key_min,
|
|
660
|
+
.key_max = range.key_max,
|
|
661
|
+
.tables = range.tables,
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
/// If no subsequent levels have any overlap, then tombstones must be dropped.
|
|
666
|
+
pub fn compaction_must_drop_tombstones(
|
|
667
|
+
manifest: *const Manifest,
|
|
668
|
+
level_b: u8,
|
|
669
|
+
range: *const CompactionRange,
|
|
670
|
+
) bool {
|
|
671
|
+
assert(level_b < constants.lsm_levels);
|
|
672
|
+
assert(range.key_min <= range.key_max);
|
|
673
|
+
|
|
674
|
+
var level_c: u8 = level_b + 1;
|
|
675
|
+
while (level_c < constants.lsm_levels) : (level_c += 1) {
|
|
676
|
+
const manifest_level: *const Level = &manifest.levels[level_c];
|
|
677
|
+
if (manifest_level.next_table(.{
|
|
678
|
+
.snapshot = snapshot_latest,
|
|
679
|
+
.direction = .ascending,
|
|
680
|
+
.key_min = range.key_min,
|
|
681
|
+
.key_max = range.key_max,
|
|
682
|
+
.key_exclusive = null,
|
|
683
|
+
}) != null) {
|
|
684
|
+
// If the range is being compacted into the last level then this is unreachable,
|
|
685
|
+
// as the last level has no subsequent levels and must always drop tombstones.
|
|
686
|
+
assert(level_b != constants.lsm_levels - 1);
|
|
687
|
+
return false;
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
assert(level_c == constants.lsm_levels);
|
|
692
|
+
return true;
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
pub fn verify(manifest: *const Manifest, snapshot: u64) void {
|
|
696
|
+
assert(snapshot <= snapshot_latest);
|
|
697
|
+
|
|
698
|
+
switch (Table.usage) {
|
|
699
|
+
// Interior levels are non-empty.
|
|
700
|
+
.general => {
|
|
701
|
+
var empty: bool = false;
|
|
702
|
+
for (&manifest.levels) |*level| {
|
|
703
|
+
var level_iterator =
|
|
704
|
+
level.iterator(.visible, &.{snapshot}, .ascending, null);
|
|
705
|
+
if (level_iterator.next()) |_| {
|
|
706
|
+
assert(!empty);
|
|
707
|
+
} else {
|
|
708
|
+
empty = true;
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
},
|
|
712
|
+
// In the secondary index TableUsage, it is possible (albeit unlikely!) that every
|
|
713
|
+
// table in an interior level is deleted.
|
|
714
|
+
//
|
|
715
|
+
// Unlike general-usage tables, secondary-index tombstones need not compact down to
|
|
716
|
+
// the last level of the tree before they are deleted. (Rather, the tombstones are
|
|
717
|
+
// deleted as soon as they merge with their corresponding "put").
|
|
718
|
+
// In this way, enough object deletions may lead to compactions where the both input
|
|
719
|
+
// tables entirely cancel each other out, and no output table is written at all.
|
|
720
|
+
// See `TableUsage` for more detail.
|
|
721
|
+
.secondary_index => {},
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
const snapshot_from_commit = vsr.Snapshot.readable_at_commit;
|
|
725
|
+
const vsr_state = &manifest.manifest_log.?.grid.superblock.working.vsr_state;
|
|
726
|
+
for (&manifest.levels) |*level| {
|
|
727
|
+
var key_max_previous: ?Key = null;
|
|
728
|
+
var table_info_iterator = level.iterator(.visible, &.{snapshot}, .ascending, null);
|
|
729
|
+
while (table_info_iterator.next()) |table_info| {
|
|
730
|
+
const table_snapshot = table_info.snapshot_min;
|
|
731
|
+
|
|
732
|
+
if (key_max_previous) |key_previous| {
|
|
733
|
+
assert(key_previous < table_info.key_min);
|
|
734
|
+
}
|
|
735
|
+
// We could have key_min == key_max if there is only one value.
|
|
736
|
+
assert(table_info.key_min <= table_info.key_max);
|
|
737
|
+
key_max_previous = table_info.key_max;
|
|
738
|
+
|
|
739
|
+
if (table_snapshot < snapshot_from_commit(vsr_state.sync_op_min) or
|
|
740
|
+
table_snapshot > snapshot_from_commit(vsr_state.sync_op_max))
|
|
741
|
+
{
|
|
742
|
+
Table.verify(
|
|
743
|
+
Storage,
|
|
744
|
+
manifest.manifest_log.?.grid.superblock.storage,
|
|
745
|
+
table_info.address,
|
|
746
|
+
table_info.key_min,
|
|
747
|
+
table_info.key_max,
|
|
748
|
+
);
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
};
|
|
754
|
+
}
|