@nxtedition/rocksdb 15.4.1 → 15.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +24 -15
- package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
- package/deps/rocksdb/rocksdb/BUCK +42 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
- package/deps/rocksdb/rocksdb/Makefile +59 -32
- package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
- package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
- package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
- package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
- package/deps/rocksdb/rocksdb/db/builder.h +7 -0
- package/deps/rocksdb/rocksdb/db/c.cc +373 -57
- package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
- package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
- package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
- package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
- package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
- package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
- package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
- package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
- package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
- package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
- package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
- package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
- package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
- package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
- package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
- package/deps/rocksdb/rocksdb/env/env.cc +1 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
- package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
- package/deps/rocksdb/rocksdb/folly.mk +22 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
- package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
- package/deps/rocksdb/rocksdb/options/options.cc +5 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
- package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
- package/deps/rocksdb/rocksdb/port/lang.h +4 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
- package/deps/rocksdb/rocksdb/src.mk +12 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
- package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
- package/deps/rocksdb/rocksdb/table/format.cc +27 -15
- package/deps/rocksdb/rocksdb/table/format.h +41 -15
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
- package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
- package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
- package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
- package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
- package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
- package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
- package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
- package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
- package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
- package/deps/rocksdb/rocksdb/util/coding.h +14 -27
- package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
- package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
- package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
- package/deps/rocksdb/rocksdb/util/math.h +3 -1
- package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
- package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
- package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
- package/deps/rocksdb/rocksdb/util/status.cc +3 -1
- package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
- package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
- package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
- package/deps/rocksdb/rocksdb.gyp +7 -0
- package/iterator.js +2 -2
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
|
@@ -0,0 +1,685 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
//
|
|
6
|
+
// *****************************************************************
|
|
7
|
+
// EXPERIMENTAL - subject to change while under development
|
|
8
|
+
// *****************************************************************
|
|
9
|
+
//
|
|
10
|
+
// Fast Succinct Trie (FST) implementation based on the LOUDS (Level-Order
|
|
11
|
+
// Unary Degree Sequence) encoding, inspired by the SuRF paper (Zhang et al.,
|
|
12
|
+
// SIGMOD 2018). The trie uses a hybrid encoding:
|
|
13
|
+
//
|
|
14
|
+
// - LOUDS-Dense: For the upper levels of the trie (levels close to the root)
|
|
15
|
+
// where the fanout tends to be high. Uses 256-bit bitmaps per node (one bit
|
|
16
|
+
// per possible byte label), achieving excellent cache locality and O(1)
|
|
17
|
+
// child lookup via popcount.
|
|
18
|
+
//
|
|
19
|
+
// - LOUDS-Sparse: For the lower levels of the trie where fanout is typically
|
|
20
|
+
// low. Uses compact label arrays and bitvectors, achieving better space
|
|
21
|
+
// efficiency than the dense encoding for sparse regions.
|
|
22
|
+
//
|
|
23
|
+
// The boundary between dense and sparse levels (the "cutoff level") is chosen
|
|
24
|
+
// to minimize total space: dense levels use 256 bits per node regardless of
|
|
25
|
+
// fanout, while sparse levels use ~10 bits per edge. When the average fanout
|
|
26
|
+
// drops below ~25 children per node, sparse becomes more efficient.
|
|
27
|
+
//
|
|
28
|
+
// Key design decisions:
|
|
29
|
+
// - Immutable: Built once from sorted keys during SST file construction.
|
|
30
|
+
// - Flat-array layout: The entire trie is stored as a sequence of bitvectors,
|
|
31
|
+
// making serialization trivial and enabling zero-copy reads from disk.
|
|
32
|
+
// - Leaf-indexed: Each trie leaf maps to a data block handle via packed
|
|
33
|
+
// uint32_t offset/size arrays, indexed by the leaf's BFS ordinal.
|
|
34
|
+
// - Key reconstruction: The separator key is reconstructed by tracing
|
|
35
|
+
// the path from root to the current leaf, collecting byte labels at
|
|
36
|
+
// each level. Dense levels encode the label in the bit position
|
|
37
|
+
// (pos % 256), sparse levels store it in the label array.
|
|
38
|
+
//
|
|
39
|
+
// Leaf ordinal computation (SuRF formulas):
|
|
40
|
+
// - Dense leaf: rank1(d_labels, pos+1) - rank1(d_has_child, rank1(d_labels,
|
|
41
|
+
// pos+1)) + rank1(d_is_prefix_key, node_num+1) - 1
|
|
42
|
+
// where pos = node_num * 256 + label_byte
|
|
43
|
+
// - Sparse leaf: ((label_pos + 1) - rank1(s_has_child, label_pos + 1)) +
|
|
44
|
+
// rank1(s_is_prefix_key, node_num+1) + dense_leaf_count - 1
|
|
45
|
+
|
|
46
|
+
#pragma once
|
|
47
|
+
|
|
48
|
+
#include <cstddef>
|
|
49
|
+
#include <cstdint>
|
|
50
|
+
#include <memory>
|
|
51
|
+
#include <string>
|
|
52
|
+
#include <vector>
|
|
53
|
+
|
|
54
|
+
#include "rocksdb/slice.h"
|
|
55
|
+
#include "rocksdb/status.h"
|
|
56
|
+
#include "util/autovector.h"
|
|
57
|
+
#include "utilities/trie_index/bitvector.h"
|
|
58
|
+
|
|
59
|
+
namespace ROCKSDB_NAMESPACE {
|
|
60
|
+
namespace trie_index {
|
|
61
|
+
|
|
62
|
+
// ============================================================================
|
|
63
|
+
// Forward declarations
|
|
64
|
+
// ============================================================================
|
|
65
|
+
class LoudsTrie;
|
|
66
|
+
|
|
67
|
+
// ============================================================================
|
|
68
|
+
// BlockHandle: offset and size of a data block in the SST file.
|
|
69
|
+
// Matches UserDefinedIndexBuilder::BlockHandle but defined locally to avoid
|
|
70
|
+
// header dependencies in the core trie implementation.
|
|
71
|
+
// ============================================================================
|
|
72
|
+
struct TrieBlockHandle {
|
|
73
|
+
uint64_t offset = 0;
|
|
74
|
+
uint64_t size = 0;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
// ============================================================================
|
|
78
|
+
// LoudsTrieBuilder: Constructs a LOUDS-encoded trie from sorted keys.
|
|
79
|
+
//
|
|
80
|
+
// Usage:
|
|
81
|
+
// LoudsTrieBuilder builder;
|
|
82
|
+
// for each data block in sorted order:
|
|
83
|
+
// builder.AddKey(separator_key, block_handle);
|
|
84
|
+
// builder.Finish();
|
|
85
|
+
// Slice serialized = builder.GetSerializedData();
|
|
86
|
+
//
|
|
87
|
+
// The builder collects all separator keys, then in Finish() it:
|
|
88
|
+
// 1. Determines the optimal cutoff level between dense and sparse encoding.
|
|
89
|
+
// 2. Constructs LOUDS-Dense bitvectors for levels [0, cutoff).
|
|
90
|
+
// 3. Constructs LOUDS-Sparse bitvectors for levels [cutoff, max_depth).
|
|
91
|
+
// 4. Serializes everything into a flat buffer.
|
|
92
|
+
//
|
|
93
|
+
// All keys must be added in sorted order (according to the comparator used
|
|
94
|
+
// by the SST file).
|
|
95
|
+
// ============================================================================
|
|
96
|
+
class LoudsTrieBuilder {
|
|
97
|
+
public:
|
|
98
|
+
LoudsTrieBuilder();
|
|
99
|
+
|
|
100
|
+
// Add a separator key and its associated data block handle.
|
|
101
|
+
// Keys must be added in sorted (ascending) order.
|
|
102
|
+
// This is the basic form used when seqno encoding is not active.
|
|
103
|
+
void AddKey(const Slice& key, const TrieBlockHandle& handle);
|
|
104
|
+
|
|
105
|
+
// Add a separator key with seqno side-table metadata. Used when
|
|
106
|
+
// has_seqno_encoding_ is true. The seqno is stored in a side-table
|
|
107
|
+
// alongside the trie (NOT encoded into the key). block_count is the
|
|
108
|
+
// number of consecutive data blocks that share this separator key
|
|
109
|
+
// (1 = no overflow, >1 = same-user-key run).
|
|
110
|
+
//
|
|
111
|
+
// IMPORTANT: When block_count > 1, the overflow blocks must be added
|
|
112
|
+
// via AddOverflowBlock() immediately after this call, before calling
|
|
113
|
+
// AddKey() for the next separator.
|
|
114
|
+
void AddKeyWithSeqno(const Slice& key, const TrieBlockHandle& handle,
|
|
115
|
+
uint64_t seqno, uint32_t block_count);
|
|
116
|
+
|
|
117
|
+
// Add an overflow block for the most recently added key (same separator).
|
|
118
|
+
// Called block_count-1 times after AddKeyWithSeqno() with block_count > 1.
|
|
119
|
+
// Each overflow block has its own handle and seqno.
|
|
120
|
+
void AddOverflowBlock(const TrieBlockHandle& handle, uint64_t seqno);
|
|
121
|
+
|
|
122
|
+
// Set the seqno encoding flag. Must be called before Finish().
|
|
123
|
+
// When true, the serialized trie will include a seqno side-table after
|
|
124
|
+
// the handle arrays, enabling post-seek correction for same-user-key
|
|
125
|
+
// block boundaries.
|
|
126
|
+
void SetHasSeqnoEncoding(bool has_seqno_encoding) {
|
|
127
|
+
has_seqno_encoding_ = has_seqno_encoding;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Finalize the trie construction. After this call, GetSerializedData()
|
|
131
|
+
// returns the serialized trie.
|
|
132
|
+
void Finish();
|
|
133
|
+
|
|
134
|
+
// Get the serialized trie data. Valid only after Finish().
|
|
135
|
+
Slice GetSerializedData() const { return Slice(serialized_data_); }
|
|
136
|
+
|
|
137
|
+
private:
|
|
138
|
+
// Determine the optimal cutoff level between dense and sparse encoding.
|
|
139
|
+
// Returns the first level where sparse encoding is more space-efficient.
|
|
140
|
+
uint32_t ComputeCutoffLevel() const;
|
|
141
|
+
|
|
142
|
+
// Serialize all trie data structures into serialized_data_.
|
|
143
|
+
void SerializeAll();
|
|
144
|
+
|
|
145
|
+
// ---- Input data ----
|
|
146
|
+
std::vector<std::string> keys_;
|
|
147
|
+
std::vector<TrieBlockHandle> handles_;
|
|
148
|
+
|
|
149
|
+
// ---- Seqno side-table data (populated by AddKeyWithSeqno/AddOverflowBlock)
|
|
150
|
+
// ----
|
|
151
|
+
//
|
|
152
|
+
// Per-key (one entry per call to AddKey/AddKeyWithSeqno):
|
|
153
|
+
// seqnos_[i]: seqno for the i-th separator (0 = sentinel for non-boundary)
|
|
154
|
+
// block_counts_[i]: how many consecutive blocks share this separator
|
|
155
|
+
// (1 = normal, >1 = same-user-key run with overflows)
|
|
156
|
+
//
|
|
157
|
+
// Per-overflow-block (one entry per call to AddOverflowBlock):
|
|
158
|
+
// overflow_handles_[j]: handle for the j-th overflow block
|
|
159
|
+
// overflow_seqnos_[j]: seqno for the j-th overflow block
|
|
160
|
+
std::vector<uint64_t> seqnos_;
|
|
161
|
+
std::vector<uint32_t> block_counts_;
|
|
162
|
+
std::vector<TrieBlockHandle> overflow_handles_;
|
|
163
|
+
std::vector<uint64_t> overflow_seqnos_;
|
|
164
|
+
|
|
165
|
+
// ---- Trie structure (built during Finish()) ----
|
|
166
|
+
|
|
167
|
+
// Cutoff level: levels [0, cutoff_level_) use dense, rest use sparse.
|
|
168
|
+
uint32_t cutoff_level_;
|
|
169
|
+
|
|
170
|
+
// Max key depth (length of longest key).
|
|
171
|
+
uint32_t max_depth_;
|
|
172
|
+
|
|
173
|
+
// LOUDS-Dense bitvectors (all levels concatenated into single bitvectors):
|
|
174
|
+
// d_labels_: 256-bit bitmaps concatenated for all nodes across all dense
|
|
175
|
+
// levels. Bit (node_num * 256 + label) is set if node has child with
|
|
176
|
+
// that label.
|
|
177
|
+
// d_has_child_: One bit per set bit in d_labels_. Set if the child
|
|
178
|
+
// is an internal node (has further children), clear if it's a leaf.
|
|
179
|
+
// d_is_prefix_key_: One bit per node across all dense levels. Set if
|
|
180
|
+
// the path from root to this node forms a valid key (prefix match).
|
|
181
|
+
BitvectorBuilder d_labels_;
|
|
182
|
+
BitvectorBuilder d_has_child_;
|
|
183
|
+
BitvectorBuilder d_is_prefix_key_;
|
|
184
|
+
|
|
185
|
+
// LOUDS-Sparse arrays (all sparse levels concatenated):
|
|
186
|
+
// s_labels_: Byte labels of all edges, in level-order.
|
|
187
|
+
// s_has_child_: One bit per label. Set if the child is internal.
|
|
188
|
+
// s_louds_: One bit per label. Set at the first label of each node
|
|
189
|
+
// (marks node boundaries in the label array).
|
|
190
|
+
// s_is_prefix_key_: One bit per node in the sparse region. Set if the
|
|
191
|
+
// path to this node forms a valid key.
|
|
192
|
+
std::vector<uint8_t> s_labels_;
|
|
193
|
+
BitvectorBuilder s_has_child_;
|
|
194
|
+
BitvectorBuilder s_louds_;
|
|
195
|
+
BitvectorBuilder s_is_prefix_key_;
|
|
196
|
+
|
|
197
|
+
// Total number of leaves in the dense section.
|
|
198
|
+
uint64_t dense_leaf_count_;
|
|
199
|
+
|
|
200
|
+
// Total number of nodes in all dense levels combined.
|
|
201
|
+
uint64_t dense_node_count_;
|
|
202
|
+
|
|
203
|
+
// Number of sparse root nodes: internal children at the last dense level
|
|
204
|
+
// that cross into the sparse region. See LoudsTrie::dense_child_count_.
|
|
205
|
+
uint64_t dense_child_count_;
|
|
206
|
+
|
|
207
|
+
// Whether separator keys include seqno encoding. Written to the serialized
|
|
208
|
+
// header so the reader can detect it.
|
|
209
|
+
bool has_seqno_encoding_;
|
|
210
|
+
|
|
211
|
+
// ---- Serialized output ----
|
|
212
|
+
std::string serialized_data_;
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
// ============================================================================
|
|
216
|
+
// LoudsTrie: Immutable LOUDS-encoded trie for reading.
|
|
217
|
+
//
|
|
218
|
+
// Deserialized from a flat buffer (e.g., read from an SST meta-block).
|
|
219
|
+
// Supports Seek (find the first leaf >= target key) and Next (advance to
|
|
220
|
+
// the next leaf in sorted order).
|
|
221
|
+
//
|
|
222
|
+
// The trie does NOT own the underlying data when initialized from external
|
|
223
|
+
// memory (e.g., a block cache entry). The caller must ensure the data remains
|
|
224
|
+
// valid for the lifetime of this object.
|
|
225
|
+
//
|
|
226
|
+
// The trie is organized as:
|
|
227
|
+
// - Dense levels [0, cutoff_level_): bitvectors d_labels_, d_has_child_,
|
|
228
|
+
// d_is_prefix_key_ with 256-bit bitmaps per node.
|
|
229
|
+
// - Sparse levels [cutoff_level_, max_depth_]: arrays s_labels_,
|
|
230
|
+
// s_has_child_, s_louds_, s_is_prefix_key_.
|
|
231
|
+
// ============================================================================
|
|
232
|
+
class LoudsTrie {
|
|
233
|
+
public:
|
|
234
|
+
LoudsTrie();
|
|
235
|
+
|
|
236
|
+
// LoudsTrie contains Bitvector members (which hold raw pointers into
|
|
237
|
+
// owned or external memory) and a raw pointer to sparse labels data.
|
|
238
|
+
// Copying would create dangling pointers or aliased external references.
|
|
239
|
+
//
|
|
240
|
+
// Move safety: when aligned_copy_ is non-empty, bitvectors and raw
|
|
241
|
+
// pointers reference its buffer. The C++ standard guarantees that
|
|
242
|
+
// std::string's noexcept move constructor transfers the heap buffer
|
|
243
|
+
// without reallocation (noexcept precludes allocation, and COW is
|
|
244
|
+
// forbidden since C++11). Trie data always exceeds the SSO threshold
|
|
245
|
+
// (hundreds to thousands of bytes), so aligned_copy_ is always
|
|
246
|
+
// heap-allocated, and move always preserves the buffer address.
|
|
247
|
+
~LoudsTrie() = default;
|
|
248
|
+
|
|
249
|
+
LoudsTrie(const LoudsTrie&) = delete;
|
|
250
|
+
LoudsTrie& operator=(const LoudsTrie&) = delete;
|
|
251
|
+
LoudsTrie(LoudsTrie&&) = default;
|
|
252
|
+
LoudsTrie& operator=(LoudsTrie&&) = default;
|
|
253
|
+
|
|
254
|
+
// Initialize from serialized data. Returns Status::OK() on success,
|
|
255
|
+
// or Status::Corruption() if the data is malformed.
|
|
256
|
+
Status InitFromData(const Slice& data);
|
|
257
|
+
|
|
258
|
+
// ---- Accessors ----
|
|
259
|
+
uint64_t NumKeys() const { return num_keys_; }
|
|
260
|
+
uint32_t CutoffLevel() const { return cutoff_level_; }
|
|
261
|
+
uint32_t MaxDepth() const { return max_depth_; }
|
|
262
|
+
|
|
263
|
+
// Whether this trie was built with a seqno side-table (enabling post-seek
|
|
264
|
+
// correction for same-user-key block boundaries). When true, the serialized
|
|
265
|
+
// data includes per-leaf seqno and block count arrays, plus overflow block
|
|
266
|
+
// metadata for runs of blocks sharing the same separator key.
|
|
267
|
+
bool HasSeqnoEncoding() const { return has_seqno_encoding_; }
|
|
268
|
+
|
|
269
|
+
// Get the block handle for the i-th leaf (0-indexed).
|
|
270
|
+
TrieBlockHandle GetHandle(uint64_t leaf_index) const;
|
|
271
|
+
|
|
272
|
+
// Whether this trie has path-compression chains. Used by the iterator
|
|
273
|
+
// to select a specialized Seek implementation at construction time,
|
|
274
|
+
// avoiding any per-level overhead when chains are absent.
|
|
275
|
+
bool HasChains() const { return !s_chain_lens_.empty(); }
|
|
276
|
+
|
|
277
|
+
// Approximate heap memory used by auxiliary data structures (child position
|
|
278
|
+
// lookup tables). Does not include the serialized data itself (which is
|
|
279
|
+
// typically owned by the block cache).
|
|
280
|
+
size_t ApproximateAuxMemoryUsage() const {
|
|
281
|
+
return (s_child_start_pos_.capacity() + s_child_end_pos_.capacity()) *
|
|
282
|
+
sizeof(uint32_t);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Allow the iterator to access internal bitvectors directly for
|
|
286
|
+
// performance-critical rank/select operations during traversal.
|
|
287
|
+
friend class LoudsTrieIterator;
|
|
288
|
+
|
|
289
|
+
private:
|
|
290
|
+
// Number of keys (leaves).
|
|
291
|
+
uint64_t num_keys_;
|
|
292
|
+
|
|
293
|
+
// Cutoff level between dense and sparse.
|
|
294
|
+
uint32_t cutoff_level_;
|
|
295
|
+
|
|
296
|
+
// Maximum key depth.
|
|
297
|
+
uint32_t max_depth_;
|
|
298
|
+
|
|
299
|
+
// True if the trie includes a seqno side-table for post-seek correction.
|
|
300
|
+
// Set from the flags field in the serialized header.
|
|
301
|
+
bool has_seqno_encoding_;
|
|
302
|
+
|
|
303
|
+
// Dense leaf count (leaves in levels [0, cutoff_level_)).
|
|
304
|
+
uint64_t dense_leaf_count_;
|
|
305
|
+
|
|
306
|
+
// Total number of nodes across all dense levels.
|
|
307
|
+
uint64_t dense_node_count_;
|
|
308
|
+
|
|
309
|
+
// Number of sparse root nodes: internal children at the last dense level
|
|
310
|
+
// (cutoff_level_ - 1) that cross into the sparse region. Used to offset
|
|
311
|
+
// sparse node numbering so that children of sparse internal labels are
|
|
312
|
+
// numbered after these root sparse nodes. When cutoff_level_ == 0, this
|
|
313
|
+
// is set to 1 (the root itself).
|
|
314
|
+
uint64_t dense_child_count_;
|
|
315
|
+
|
|
316
|
+
// LOUDS-Dense bitvectors (all dense levels concatenated).
|
|
317
|
+
Bitvector d_labels_;
|
|
318
|
+
Bitvector d_has_child_;
|
|
319
|
+
Bitvector d_is_prefix_key_;
|
|
320
|
+
|
|
321
|
+
// LOUDS-Sparse (all sparse levels concatenated).
|
|
322
|
+
const uint8_t* s_labels_data_;
|
|
323
|
+
uint64_t s_labels_size_;
|
|
324
|
+
Bitvector s_has_child_;
|
|
325
|
+
Bitvector s_louds_;
|
|
326
|
+
Bitvector s_is_prefix_key_;
|
|
327
|
+
|
|
328
|
+
// SuRF-style child position lookup tables for Select-free traversal.
|
|
329
|
+
// Instead of computing FindNthOneBit(node_num) during traversal, we
|
|
330
|
+
// precompute child start/end positions indexed by internal label rank. This
|
|
331
|
+
// allows traversal using only Rank1 (O(1)) and array lookup (O(1)).
|
|
332
|
+
//
|
|
333
|
+
// For the k-th internal label (has_child[pos]=1, where k = Rank1(pos+1)-1):
|
|
334
|
+
// s_child_start_pos_[k] = start position of child node
|
|
335
|
+
// s_child_end_pos_[k] = end position (exclusive) of child node
|
|
336
|
+
//
|
|
337
|
+
// Memory overhead: 8 bytes per internal node (2 x uint32_t).
|
|
338
|
+
std::vector<uint32_t> s_child_start_pos_;
|
|
339
|
+
std::vector<uint32_t> s_child_end_pos_;
|
|
340
|
+
|
|
341
|
+
// Path compression: chain metadata for fanout-1 chains in the sparse region.
|
|
342
|
+
//
|
|
343
|
+
// A "chain" is a sequence of >= 2 consecutive fanout-1 nodes (nodes with
|
|
344
|
+
// exactly one label that is internal) starting from the child of an internal
|
|
345
|
+
// label. Chains are common in tries with long shared prefixes (e.g.,
|
|
346
|
+
// zero-padded numeric keys, URL paths).
|
|
347
|
+
//
|
|
348
|
+
// For the k-th internal label (same indexing as s_child_start_pos_):
|
|
349
|
+
// Storage uses a bitmap (1 bit per internal label) for O(1) chain detection,
|
|
350
|
+
// plus compact arrays indexed by chain ordinal (Rank1 on the bitmap).
|
|
351
|
+
//
|
|
352
|
+
// Lookup during Seek:
|
|
353
|
+
// 1. s_chain_bitmap_.GetBit(child_idx) — has chain?
|
|
354
|
+
// 2. chain_idx = s_chain_bitmap_.Rank1(child_idx + 1) - 1
|
|
355
|
+
// 3. s_chain_lens_[chain_idx], s_chain_suffix_offsets_[chain_idx], etc.
|
|
356
|
+
//
|
|
357
|
+
// Space overhead: 1 bit per internal label (bitmap) + 10 bytes per chain
|
|
358
|
+
// (offset + len + end_child_idx) + suffix bytes. For key sets with few
|
|
359
|
+
// chains (e.g., random hex), overhead is < 1 byte per internal label.
|
|
360
|
+
Bitvector s_chain_bitmap_;
|
|
361
|
+
std::vector<uint32_t> s_chain_suffix_offsets_;
|
|
362
|
+
std::vector<uint16_t> s_chain_lens_;
|
|
363
|
+
std::vector<uint32_t> s_chain_end_child_idx_;
|
|
364
|
+
const uint8_t* s_chain_suffix_data_;
|
|
365
|
+
uint64_t s_chain_suffix_size_;
|
|
366
|
+
|
|
367
|
+
// Block handles: packed uint32_t arrays for data block offsets and sizes.
|
|
368
|
+
// BFS leaf order does not necessarily match key-sorted order (deeper leaves
|
|
369
|
+
// appear later in BFS even if they precede shallower leaves
|
|
370
|
+
// lexicographically), so offsets are NOT monotonically non-decreasing and
|
|
371
|
+
// cannot use Elias-Fano encoding. Instead, we store offsets and sizes as
|
|
372
|
+
// packed uint32_t arrays for O(1) random access.
|
|
373
|
+
//
|
|
374
|
+
// uint32_t limits individual values to ~4 GB, which is sufficient since
|
|
375
|
+
// RocksDB SST files are typically 64 MB to 1 GB and never exceed 4 GB.
|
|
376
|
+
const uint32_t* handle_offsets_;
|
|
377
|
+
const uint32_t* handle_sizes_;
|
|
378
|
+
|
|
379
|
+
// ---- Seqno side-table (deserialized when has_seqno_encoding_ is true) ----
|
|
380
|
+
//
|
|
381
|
+
// The side-table enables post-seek correction for same-user-key block
|
|
382
|
+
// boundaries. It stores per-leaf seqno and block count data in BFS leaf
|
|
383
|
+
// order, plus overflow block handles/seqnos for runs where the same
|
|
384
|
+
// separator maps to multiple blocks.
|
|
385
|
+
//
|
|
386
|
+
// leaf_seqnos_[i]: seqno for the i-th leaf (BFS order).
|
|
387
|
+
// Value 0 = sentinel meaning "never advance past this leaf" (used for
|
|
388
|
+
// non-boundary leaves and for leaves where seqno=0 covers everything).
|
|
389
|
+
// For boundary leaves, stores the actual last_key_seq.
|
|
390
|
+
//
|
|
391
|
+
// leaf_block_counts_[i]: how many consecutive blocks share this separator.
|
|
392
|
+
// 1 = no overflow (the common case). >1 = same-user-key run.
|
|
393
|
+
//
|
|
394
|
+
// overflow_offsets_/overflow_sizes_/overflow_seqnos_: packed arrays for
|
|
395
|
+
// the overflow blocks (total count = sum of (block_count-1) for all
|
|
396
|
+
// leaves).
|
|
397
|
+
//
|
|
398
|
+
// overflow_base_[i]: prefix sum of (block_count-1) for leaves [0, i),
|
|
399
|
+
// precomputed during InitFromData() for O(1) random access into the
|
|
400
|
+
// overflow arrays. overflow_base_[i] is the starting index into the
|
|
401
|
+
// overflow arrays for leaf i's overflow blocks.
|
|
402
|
+
const uint64_t* leaf_seqnos_;
|
|
403
|
+
const uint32_t* leaf_block_counts_;
|
|
404
|
+
const uint32_t* overflow_offsets_;
|
|
405
|
+
const uint32_t* overflow_sizes_;
|
|
406
|
+
const uint64_t* overflow_seqnos_;
|
|
407
|
+
uint32_t num_overflow_blocks_;
|
|
408
|
+
std::vector<uint32_t> overflow_base_;
|
|
409
|
+
|
|
410
|
+
public:
|
|
411
|
+
// ---- Seqno side-table accessors (used by TrieIndexIterator) ----
|
|
412
|
+
|
|
413
|
+
// Get the seqno for the i-th leaf (BFS order). Returns 0 (sentinel) for
|
|
414
|
+
// non-boundary leaves.
|
|
415
|
+
uint64_t GetLeafSeqno(uint64_t leaf_index) const {
|
|
416
|
+
assert(has_seqno_encoding_ && leaf_seqnos_ != nullptr);
|
|
417
|
+
assert(leaf_index < num_keys_);
|
|
418
|
+
return leaf_seqnos_[leaf_index];
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// Get the block count for the i-th leaf. Returns 1 for normal leaves.
|
|
422
|
+
uint32_t GetLeafBlockCount(uint64_t leaf_index) const {
|
|
423
|
+
assert(has_seqno_encoding_ && leaf_block_counts_ != nullptr);
|
|
424
|
+
assert(leaf_index < num_keys_);
|
|
425
|
+
return leaf_block_counts_[leaf_index];
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Get the overflow base (starting index into overflow arrays) for leaf i.
|
|
429
|
+
uint32_t GetOverflowBase(uint64_t leaf_index) const {
|
|
430
|
+
assert(has_seqno_encoding_);
|
|
431
|
+
assert(leaf_index < overflow_base_.size());
|
|
432
|
+
return overflow_base_[leaf_index];
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Get the handle for the j-th overflow block.
|
|
436
|
+
TrieBlockHandle GetOverflowHandle(uint32_t overflow_index) const {
|
|
437
|
+
assert(overflow_index < num_overflow_blocks_);
|
|
438
|
+
return TrieBlockHandle{overflow_offsets_[overflow_index],
|
|
439
|
+
overflow_sizes_[overflow_index]};
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Get the seqno for the j-th overflow block.
|
|
443
|
+
uint64_t GetOverflowSeqno(uint32_t overflow_index) const {
|
|
444
|
+
assert(overflow_index < num_overflow_blocks_);
|
|
445
|
+
return overflow_seqnos_[overflow_index];
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
private:
|
|
449
|
+
// Aligned copy of the serialized trie data, used when the input data from
|
|
450
|
+
// the block reader is not 8-byte aligned (e.g., mmap at an unaligned file
|
|
451
|
+
// offset). All Bitvector and raw pointer members reference this buffer
|
|
452
|
+
// when non-empty. std::string::data() returns memory from new[]/malloc,
|
|
453
|
+
// which is aligned to at least alignof(max_align_t) >= 8.
|
|
454
|
+
std::string aligned_copy_;
|
|
455
|
+
};
|
|
456
|
+
|
|
457
|
+
// ============================================================================
|
|
458
|
+
// LoudsTrieIterator: Iterates over the leaves of a LoudsTrie.
|
|
459
|
+
//
|
|
460
|
+
// Supports forward-only iteration: Seek(key) + Next().
|
|
461
|
+
// Reconstructs the separator key from the trie path at each position.
|
|
462
|
+
//
|
|
463
|
+
// The iterator maintains a stack of positions through the trie (one per
|
|
464
|
+
// level from root to current leaf). This enables:
|
|
465
|
+
// - Key reconstruction: collecting the label byte at each level.
|
|
466
|
+
// - Backtracking for Next(): pop to parent, advance to next sibling.
|
|
467
|
+
// - Leaf ordinal computation: using rank formulas on bitvectors.
|
|
468
|
+
//
|
|
469
|
+
// Design follows the SuRF reference implementation for correctness.
|
|
470
|
+
// ============================================================================
|
|
471
|
+
class LoudsTrieIterator {
|
|
472
|
+
public:
|
|
473
|
+
explicit LoudsTrieIterator(const LoudsTrie* trie);
|
|
474
|
+
|
|
475
|
+
// Position on the very first leaf (smallest key) by descending from the
|
|
476
|
+
// root to the leftmost leaf. More efficient than Seek(Slice()) because it
|
|
477
|
+
// skips SeekImpl's target-consumption loop and its redundant prefix key
|
|
478
|
+
// check at root (DescendToLeftmostLeaf handles prefix keys at every node).
|
|
479
|
+
// Returns true if positioned on a valid leaf.
|
|
480
|
+
bool SeekToFirst();
|
|
481
|
+
|
|
482
|
+
// Seek to the first leaf whose key is >= `target`.
|
|
483
|
+
// Returns true if positioned on a valid leaf.
|
|
484
|
+
//
|
|
485
|
+
// Dispatches to a specialized implementation selected at construction time
|
|
486
|
+
// based on whether the trie has path-compression chains. This eliminates
|
|
487
|
+
// all chain-related code from the instruction cache when chains are absent,
|
|
488
|
+
// following the same pattern as RocksDB's BlockIter::ParseNextKey template.
|
|
489
|
+
bool Seek(const Slice& target) {
|
|
490
|
+
if (has_chains_) {
|
|
491
|
+
return SeekImpl<true>(target);
|
|
492
|
+
}
|
|
493
|
+
return SeekImpl<false>(target);
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Advance to the next leaf in sorted order.
|
|
497
|
+
// Returns true if positioned on a valid leaf.
|
|
498
|
+
bool Next();
|
|
499
|
+
|
|
500
|
+
// Check if the iterator is positioned on a valid leaf.
|
|
501
|
+
bool Valid() const { return valid_; }
|
|
502
|
+
|
|
503
|
+
// Get the current separator key. Valid only when Valid() is true.
|
|
504
|
+
// The returned Slice is valid until the next Seek/Next call.
|
|
505
|
+
Slice Key() const { return Slice(key_buf_.get(), key_len_); }
|
|
506
|
+
|
|
507
|
+
// Get the current leaf index (for mapping to block handles).
|
|
508
|
+
uint64_t LeafIndex() const { return leaf_index_; }
|
|
509
|
+
|
|
510
|
+
// Get the block handle for the current leaf.
|
|
511
|
+
TrieBlockHandle Value() const;
|
|
512
|
+
|
|
513
|
+
private:
|
|
514
|
+
// Position within a single trie level. The iterator maintains a stack
|
|
515
|
+
// of these from root to the current position.
|
|
516
|
+
//
|
|
517
|
+
// Packed into 8 bytes by encoding the is_dense flag in bit 63 of the
|
|
518
|
+
// position value. Since bitvector positions and label array indices are
|
|
519
|
+
// well under 2^63, this is safe. This halves the per-level memory from
|
|
520
|
+
// 16 bytes (with alignment padding) to 8 bytes, improving cache
|
|
521
|
+
// utilization for the path_ stack.
|
|
522
|
+
struct LevelPos {
|
|
523
|
+
// Position in the bitvector/label array at this level, with the
|
|
524
|
+
// is_dense flag encoded in the high bit (bit 63).
|
|
525
|
+
// - Dense: bit position in d_labels_ (= node_num * 256 +
|
|
526
|
+
// label_byte). The label byte is pos % 256.
|
|
527
|
+
// - Sparse: index into s_labels_ array.
|
|
528
|
+
uint64_t pos_and_flag;
|
|
529
|
+
|
|
530
|
+
static constexpr uint64_t kDenseFlag = uint64_t(1) << 63;
|
|
531
|
+
|
|
532
|
+
uint64_t pos() const { return pos_and_flag & ~kDenseFlag; }
|
|
533
|
+
bool is_dense() const { return (pos_and_flag & kDenseFlag) != 0; }
|
|
534
|
+
|
|
535
|
+
static LevelPos MakeDense(uint64_t p) { return {p | kDenseFlag}; }
|
|
536
|
+
static LevelPos MakeSparse(uint64_t p) { return {p}; }
|
|
537
|
+
};
|
|
538
|
+
|
|
539
|
+
// --- Dense level helpers ---
|
|
540
|
+
|
|
541
|
+
// Seek within a dense node for a target label byte.
|
|
542
|
+
// Sets result to the position of the label if found, or the position of
|
|
543
|
+
// the next label >= target_byte if not found.
|
|
544
|
+
// Returns true if the exact label was found, false if we landed on a
|
|
545
|
+
// label > target_byte (or no label exists).
|
|
546
|
+
bool DenseSeekLabel(uint64_t node_num, uint8_t target_byte,
|
|
547
|
+
uint64_t* out_pos);
|
|
548
|
+
|
|
549
|
+
// Compute the child node number for a dense internal child with the given
|
|
550
|
+
// label_rank. Takes a pre-computed label_rank to avoid redundant
|
|
551
|
+
// Rank1(d_labels_) calls in hot paths where label_rank was already computed
|
|
552
|
+
// for has_child checking.
|
|
553
|
+
uint64_t DenseChildNodeNumFromRank(uint64_t label_rank) const;
|
|
554
|
+
|
|
555
|
+
// Compute the leaf ordinal for a dense leaf at `pos`.
|
|
556
|
+
// leaf_idx = rank1(d_labels_, pos+1) - rank1(d_has_child_,
|
|
557
|
+
// rank1(d_labels_, pos+1)) + rank1(d_is_prefix_key_, node_num+1) - 1
|
|
558
|
+
// Takes a pre-computed label_rank.
|
|
559
|
+
uint64_t DenseLeafIndexFromRank(uint64_t pos, uint64_t label_rank) const;
|
|
560
|
+
|
|
561
|
+
// Same as DenseLeafIndexFromRank but also takes a pre-computed
|
|
562
|
+
// d_has_child_.Rank1(label_rank + 1) to avoid redundant rank call.
|
|
563
|
+
uint64_t DenseLeafIndexFromRankAndHasChildRank(uint64_t pos,
|
|
564
|
+
uint64_t label_rank,
|
|
565
|
+
uint64_t has_child_rank) const;
|
|
566
|
+
|
|
567
|
+
// Compute the leaf ordinal for a dense prefix key at node `node_num`.
|
|
568
|
+
// A prefix key leaf comes before any child leaves of that node.
|
|
569
|
+
uint64_t DensePrefixKeyLeafIndex(uint64_t node_num) const;
|
|
570
|
+
|
|
571
|
+
// --- Sparse level helpers ---
|
|
572
|
+
|
|
573
|
+
// Seek within a sparse node starting at `node_start_pos` for label byte.
|
|
574
|
+
// Returns true if the exact label was found, false otherwise. Writes the
|
|
575
|
+
// position to `out_pos`. `node_end_pos` is one past the last label
|
|
576
|
+
// position of this node.
|
|
577
|
+
bool SparseSeekLabel(uint64_t node_start_pos, uint64_t node_end_pos,
|
|
578
|
+
uint8_t target_byte, uint64_t* out_pos);
|
|
579
|
+
|
|
580
|
+
// Compute the child node number for a sparse internal child at `pos`.
|
|
581
|
+
// child_node_num = dense_child_count_ + rank1(s_has_child_, pos+1) - 1
|
|
582
|
+
// (offset by dense_child_count_ because children of sparse internal labels
|
|
583
|
+
// are numbered after the root sparse nodes).
|
|
584
|
+
uint64_t SparseChildNodeNum(uint64_t pos) const;
|
|
585
|
+
|
|
586
|
+
// Compute the leaf ordinal for a sparse leaf at `pos`.
|
|
587
|
+
uint64_t SparseLeafIndex(uint64_t pos) const;
|
|
588
|
+
|
|
589
|
+
// Same as SparseLeafIndex but takes a pre-computed
|
|
590
|
+
// s_has_child_.Rank1(pos + 1) to avoid redundant rank call.
|
|
591
|
+
uint64_t SparseLeafIndexFromHasChildRank(uint64_t pos,
|
|
592
|
+
uint64_t has_child_rank) const;
|
|
593
|
+
|
|
594
|
+
// Compute the leaf ordinal for a sparse prefix key at sparse node
|
|
595
|
+
// `sparse_node_num`. The sparse_node_num is 0-indexed among sparse nodes
|
|
596
|
+
// only (not including dense nodes).
|
|
597
|
+
uint64_t SparsePrefixKeyLeafIndex(uint64_t sparse_node_num) const;
|
|
598
|
+
|
|
599
|
+
// Get the sparse node number (0-indexed among sparse nodes) from a
|
|
600
|
+
// position in the s_labels_ array.
|
|
601
|
+
uint64_t SparseNodeNum(uint64_t pos) const;
|
|
602
|
+
|
|
603
|
+
// Get the start position (in s_labels_) for sparse node `sparse_node_num`.
|
|
604
|
+
uint64_t SparseNodeStartPos(uint64_t sparse_node_num) const;
|
|
605
|
+
|
|
606
|
+
// Get the end position (one past last label) for a sparse node starting
|
|
607
|
+
// at `start_pos`.
|
|
608
|
+
uint64_t SparseNodeEndPos(uint64_t start_pos) const;
|
|
609
|
+
|
|
610
|
+
// --- Traversal helpers ---
|
|
611
|
+
|
|
612
|
+
// Descend from the given node to the leftmost leaf in its subtree,
|
|
613
|
+
// pushing entries onto path_ and building key_buf_. Sets
|
|
614
|
+
// leaf_index_ and valid_. Returns true if a leaf was found.
|
|
615
|
+
bool DescendToLeftmostLeaf(bool in_dense, uint64_t node_num);
|
|
616
|
+
|
|
617
|
+
// Advance to the next valid leaf by backtracking up the trie path
|
|
618
|
+
// and finding the next sibling label, then descending to the leftmost
|
|
619
|
+
// leaf in that subtree. Used by Next() and SeekImpl().
|
|
620
|
+
// Returns true if a next leaf was found.
|
|
621
|
+
bool Advance();
|
|
622
|
+
|
|
623
|
+
// Seek implementation, templated on whether path-compression chains exist.
|
|
624
|
+
// When kHasChains=false, the compiler eliminates ALL chain-related code,
|
|
625
|
+
// keeping the i-cache footprint minimal for tries without chains.
|
|
626
|
+
// This follows the same pattern as RocksDB's BlockIter::ParseNextKey.
|
|
627
|
+
template <bool kHasChains>
|
|
628
|
+
bool SeekImpl(const Slice& target);
|
|
629
|
+
|
|
630
|
+
// True if the trie has path-compression chains. Set once in the constructor
|
|
631
|
+
// and used by Seek() to dispatch to the correct specialization.
|
|
632
|
+
bool has_chains_;
|
|
633
|
+
|
|
634
|
+
const LoudsTrie* trie_;
|
|
635
|
+
bool valid_;
|
|
636
|
+
uint64_t leaf_index_;
|
|
637
|
+
|
|
638
|
+
// The reconstructed key at the current position. Each byte corresponds
|
|
639
|
+
// to a level in path_. For dense levels, the byte is pos % 256; for
|
|
640
|
+
// sparse levels, the byte is s_labels_[pos].
|
|
641
|
+
//
|
|
642
|
+
// Key reconstruction appends one byte per trie level in the Seek/Next
|
|
643
|
+
// hot loop, so the append operation must be as cheap as possible — a
|
|
644
|
+
// single inlined store + increment with no function call overhead. The
|
|
645
|
+
// buffer is heap-allocated once in the constructor to MaxDepth()+1 bytes.
|
|
646
|
+
//
|
|
647
|
+
// All append sites go through AppendKeySlot() which validates bounds in
|
|
648
|
+
// debug builds. In release builds it compiles to the same single store +
|
|
649
|
+
// increment with no overhead (the assert is elided).
|
|
650
|
+
std::unique_ptr<char[]> key_buf_;
|
|
651
|
+
uint32_t key_len_;
|
|
652
|
+
uint32_t key_cap_;
|
|
653
|
+
|
|
654
|
+
// Returns a reference to the next key buffer slot, advancing key_len_.
|
|
655
|
+
// Validates in debug builds that the buffer has space. A corrupted trie
|
|
656
|
+
// with depth > max_depth_ would overflow key_buf_ without this check.
|
|
657
|
+
char& AppendKeySlot() {
|
|
658
|
+
assert(key_len_ < key_cap_ &&
|
|
659
|
+
"key_buf_ overflow: trie depth exceeds max_depth_");
|
|
660
|
+
return key_buf_[key_len_++];
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// Stack of positions from root to current leaf. path_[i] holds the
|
|
664
|
+
// position at depth i in the trie. path_.size() equals key_len_ for
|
|
665
|
+
// both child leaves and prefix keys (in which case the last path_
|
|
666
|
+
// entry's label is not appended to
|
|
667
|
+
// key_buf_ since the node itself is the key).
|
|
668
|
+
//
|
|
669
|
+
// For a prefix key, we mark it by setting is_at_prefix_key_ = true and
|
|
670
|
+
// the path_ only goes up to the prefix key node level (no label entry
|
|
671
|
+
// for the prefix key itself since the key terminates at the node, not
|
|
672
|
+
// at a child edge).
|
|
673
|
+
//
|
|
674
|
+
// Uses autovector with 24 inline slots to avoid heap allocation for
|
|
675
|
+
// tries up to 24 levels deep. Most real-world key sets have depth < 24.
|
|
676
|
+
autovector<LevelPos, 24> path_;
|
|
677
|
+
|
|
678
|
+
// True if the current leaf is a prefix key (the key terminates at a
|
|
679
|
+
// node that also has children). In this case, path_.size() == depth
|
|
680
|
+
// of the node and key_buf_[0..key_len_) holds the prefix key value.
|
|
681
|
+
bool is_at_prefix_key_;
|
|
682
|
+
};
|
|
683
|
+
|
|
684
|
+
} // namespace trie_index
|
|
685
|
+
} // namespace ROCKSDB_NAMESPACE
|