@nxtedition/rocksdb 15.4.1 → 15.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +24 -15
- package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
- package/deps/rocksdb/rocksdb/BUCK +42 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
- package/deps/rocksdb/rocksdb/Makefile +59 -32
- package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
- package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
- package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
- package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
- package/deps/rocksdb/rocksdb/db/builder.h +7 -0
- package/deps/rocksdb/rocksdb/db/c.cc +373 -57
- package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
- package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
- package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
- package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
- package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
- package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
- package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
- package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
- package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
- package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
- package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
- package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
- package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
- package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
- package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
- package/deps/rocksdb/rocksdb/env/env.cc +1 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
- package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
- package/deps/rocksdb/rocksdb/folly.mk +22 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
- package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
- package/deps/rocksdb/rocksdb/options/options.cc +5 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
- package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
- package/deps/rocksdb/rocksdb/port/lang.h +4 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
- package/deps/rocksdb/rocksdb/src.mk +12 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
- package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
- package/deps/rocksdb/rocksdb/table/format.cc +27 -15
- package/deps/rocksdb/rocksdb/table/format.h +41 -15
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
- package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
- package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
- package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
- package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
- package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
- package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
- package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
- package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
- package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
- package/deps/rocksdb/rocksdb/util/coding.h +14 -27
- package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
- package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
- package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
- package/deps/rocksdb/rocksdb/util/math.h +3 -1
- package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
- package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
- package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
- package/deps/rocksdb/rocksdb/util/status.cc +3 -1
- package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
- package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
- package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
- package/deps/rocksdb/rocksdb.gyp +7 -0
- package/iterator.js +2 -2
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
|
|
6
|
+
#include "utilities/trie_index/trie_index_factory.h"
|
|
7
|
+
|
|
8
|
+
#include <algorithm>
|
|
9
|
+
#include <cassert>
|
|
10
|
+
#include <cstring>
|
|
11
|
+
|
|
12
|
+
#include "db/dbformat.h"
|
|
13
|
+
#include "rocksdb/comparator.h"
|
|
14
|
+
#include "util/coding.h"
|
|
15
|
+
|
|
16
|
+
namespace ROCKSDB_NAMESPACE {
|
|
17
|
+
namespace trie_index {
|
|
18
|
+
|
|
19
|
+
// ============================================================================
|
|
20
|
+
// TrieIndexBuilder
|
|
21
|
+
// ============================================================================
|
|
22
|
+
|
|
23
|
+
TrieIndexBuilder::TrieIndexBuilder(const Comparator* comparator)
|
|
24
|
+
: comparator_(comparator),
|
|
25
|
+
finished_(false),
|
|
26
|
+
must_use_separator_with_seq_(false) {}
|
|
27
|
+
|
|
28
|
+
Slice TrieIndexBuilder::AddIndexEntry(const Slice& last_key_in_current_block,
|
|
29
|
+
const Slice* first_key_in_next_block,
|
|
30
|
+
const BlockHandle& block_handle,
|
|
31
|
+
std::string* separator_scratch,
|
|
32
|
+
const IndexEntryContext& context) {
|
|
33
|
+
SequenceNumber last_key_seq = context.last_key_seq;
|
|
34
|
+
|
|
35
|
+
// Compute a short separator between the two user keys using the
|
|
36
|
+
// comparator. FindShortestSeparator takes `*start` as both input and output:
|
|
37
|
+
// input: *start == last_key_in_current_block
|
|
38
|
+
// output: *start modified to shortest string in [start, limit)
|
|
39
|
+
// If first_key_in_next_block is nullptr, this is the last block — use a
|
|
40
|
+
// short successor of the last key.
|
|
41
|
+
Slice separator;
|
|
42
|
+
// True when last_key and first_key_in_next_block are the same user key
|
|
43
|
+
// (same-user-key block boundary). Computed once and reused below for
|
|
44
|
+
// both the sticky flag and the per-entry seqno decision.
|
|
45
|
+
bool same_user_key = false;
|
|
46
|
+
if (first_key_in_next_block != nullptr) {
|
|
47
|
+
same_user_key = comparator_->Compare(last_key_in_current_block,
|
|
48
|
+
*first_key_in_next_block) == 0;
|
|
49
|
+
|
|
50
|
+
*separator_scratch = last_key_in_current_block.ToString();
|
|
51
|
+
comparator_->FindShortestSeparator(separator_scratch,
|
|
52
|
+
*first_key_in_next_block);
|
|
53
|
+
separator = Slice(*separator_scratch);
|
|
54
|
+
|
|
55
|
+
// Detect same-user-key block boundary: if the two user keys are identical,
|
|
56
|
+
// FindShortestSeparator returns the same key for both sides, making it
|
|
57
|
+
// impossible to distinguish the two blocks. Set the sticky flag so that
|
|
58
|
+
// at Finish() time, ALL separators will include encoded seqnos.
|
|
59
|
+
// This mirrors ShortenedIndexBuilder::must_use_separator_with_seq_.
|
|
60
|
+
if (!must_use_separator_with_seq_ && same_user_key) {
|
|
61
|
+
must_use_separator_with_seq_ = true;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Edge case: FindShortestSeparator may fail to shorten the key even when
|
|
65
|
+
// the user keys are different. Example: FindShortestSeparator("abc","abd")
|
|
66
|
+
// returns "abc" unchanged because incrementing 'c' would yield "abd" which
|
|
67
|
+
// is not < limit. When the resulting separator matches the previous entry's
|
|
68
|
+
// separator, the blocks will be grouped into the same run in Finish().
|
|
69
|
+
// We must mark this as a same-user-key boundary so it gets a real seqno
|
|
70
|
+
// rather than kMaxSequenceNumber (which would trigger the overflow block
|
|
71
|
+
// assertion in Finish()).
|
|
72
|
+
if (!same_user_key && !buffered_entries_.empty() &&
|
|
73
|
+
buffered_entries_.back().separator_key == *separator_scratch) {
|
|
74
|
+
same_user_key = true;
|
|
75
|
+
if (!must_use_separator_with_seq_) {
|
|
76
|
+
must_use_separator_with_seq_ = true;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
} else {
|
|
80
|
+
// Last block: use the last key itself as the separator, NOT a shortened
|
|
81
|
+
// successor. This matches the standard ShortenedIndexBuilder behavior
|
|
82
|
+
// (see index_builder.h GetSeparatorWithSeq lines 278-286): it only calls
|
|
83
|
+
// FindShortInternalKeySuccessor when shortening_mode is
|
|
84
|
+
// kShortenSeparatorsAndSuccessor, which is not the default. With the
|
|
85
|
+
// default kShortenSeparators, the last block's separator is simply
|
|
86
|
+
// last_key_in_current_block.
|
|
87
|
+
//
|
|
88
|
+
// Why this matters: FindShortSuccessor can widen the key range. For
|
|
89
|
+
// example, if the actual last key is "9\xff\xff", FindShortSuccessor
|
|
90
|
+
// produces ":" (0x3A). The trie would then claim to cover keys up to
|
|
91
|
+
// ":", but the data block only contains keys up to "9\xff\xff". A seek
|
|
92
|
+
// targeting a key in that gap (e.g., "9\xff\xff\x01") would find a
|
|
93
|
+
// block via the trie that contains no matching data, causing iterator
|
|
94
|
+
// desynchronization — the trie index returns a valid block while the
|
|
95
|
+
// standard index correctly reports no match.
|
|
96
|
+
separator = last_key_in_current_block;
|
|
97
|
+
|
|
98
|
+
// Edge case: if this last block's separator matches the previous entry's
|
|
99
|
+
// separator, they share the same user key (same-user-key run boundary).
|
|
100
|
+
if (!buffered_entries_.empty() &&
|
|
101
|
+
comparator_->Compare(buffered_entries_.back().separator_key,
|
|
102
|
+
separator) == 0) {
|
|
103
|
+
same_user_key = true;
|
|
104
|
+
if (!must_use_separator_with_seq_) {
|
|
105
|
+
must_use_separator_with_seq_ = true;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Buffer the entry for deferred trie construction in Finish().
|
|
111
|
+
// We buffer rather than adding to the trie immediately because the
|
|
112
|
+
// all-or-nothing seqno encoding decision is made at Finish() time.
|
|
113
|
+
TrieBlockHandle handle;
|
|
114
|
+
handle.offset = block_handle.offset;
|
|
115
|
+
handle.size = block_handle.size;
|
|
116
|
+
|
|
117
|
+
BufferedEntry entry;
|
|
118
|
+
entry.separator_key = separator.ToString();
|
|
119
|
+
// For same-user-key boundaries, use the actual seqno of the last key.
|
|
120
|
+
// For different-user-key boundaries, use kMaxSequenceNumber (sentinel
|
|
121
|
+
// meaning "this is not a same-key boundary, never advance past it").
|
|
122
|
+
if (same_user_key) {
|
|
123
|
+
entry.seqno = last_key_seq;
|
|
124
|
+
} else {
|
|
125
|
+
entry.seqno = kMaxSequenceNumber;
|
|
126
|
+
}
|
|
127
|
+
entry.handle = handle;
|
|
128
|
+
buffered_entries_.push_back(std::move(entry));
|
|
129
|
+
|
|
130
|
+
return separator;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
void TrieIndexBuilder::OnKeyAdded(const Slice& /*key*/, ValueType /*type*/,
|
|
134
|
+
const Slice& /*value*/) {
|
|
135
|
+
// No-op: the trie is built from separator keys in AddIndexEntry(), not
|
|
136
|
+
// from individual key-value pairs.
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
Status TrieIndexBuilder::Finish(Slice* index_contents) {
|
|
140
|
+
if (finished_) {
|
|
141
|
+
return Status::InvalidArgument("TrieIndexBuilder::Finish called twice");
|
|
142
|
+
}
|
|
143
|
+
finished_ = true;
|
|
144
|
+
|
|
145
|
+
// Use seqno side-table when any same-user-key block boundary was detected.
|
|
146
|
+
// The must_use_separator_with_seq_ flag is set in AddIndexEntry() whenever
|
|
147
|
+
// the comparator finds two identical user keys at a block boundary. This
|
|
148
|
+
// always implies duplicate separators exist (since
|
|
149
|
+
// FindShortestSeparator("foo", "foo") = "foo"), so no separate scan is
|
|
150
|
+
// needed.
|
|
151
|
+
bool use_seqno = must_use_separator_with_seq_;
|
|
152
|
+
trie_builder_.SetHasSeqnoEncoding(use_seqno);
|
|
153
|
+
|
|
154
|
+
if (use_seqno) {
|
|
155
|
+
// Feed de-duplicated separators to the trie with seqno side-table metadata.
|
|
156
|
+
// Consecutive identical separators form a "run" — only the first occurrence
|
|
157
|
+
// goes into the trie (as the primary block). The remaining blocks in the
|
|
158
|
+
// run are stored as overflow blocks in the side-table.
|
|
159
|
+
//
|
|
160
|
+
// For non-boundary separators (different user keys), seqno is set to 0
|
|
161
|
+
// (sentinel = "never advance past this leaf"). kMaxSequenceNumber from
|
|
162
|
+
// AddIndexEntry is mapped to 0 here.
|
|
163
|
+
size_t i = 0;
|
|
164
|
+
while (i < buffered_entries_.size()) {
|
|
165
|
+
const auto& entry = buffered_entries_[i];
|
|
166
|
+
|
|
167
|
+
// Count how many consecutive entries share this separator key.
|
|
168
|
+
size_t run_start = i;
|
|
169
|
+
size_t run_end = i + 1;
|
|
170
|
+
while (run_end < buffered_entries_.size() &&
|
|
171
|
+
buffered_entries_[run_end].separator_key == entry.separator_key) {
|
|
172
|
+
run_end++;
|
|
173
|
+
}
|
|
174
|
+
uint32_t block_count = static_cast<uint32_t>(run_end - run_start);
|
|
175
|
+
|
|
176
|
+
// Map kMaxSequenceNumber (non-same-key boundary) to 0 (sentinel).
|
|
177
|
+
uint64_t seqno = (entry.seqno == kMaxSequenceNumber) ? 0 : entry.seqno;
|
|
178
|
+
|
|
179
|
+
// Add the primary (first) block for this separator.
|
|
180
|
+
trie_builder_.AddKeyWithSeqno(Slice(entry.separator_key), entry.handle,
|
|
181
|
+
seqno, block_count);
|
|
182
|
+
|
|
183
|
+
// Add overflow blocks (2nd, 3rd, ... in the run).
|
|
184
|
+
// Overflow blocks only exist within same-key runs, so their seqnos
|
|
185
|
+
// come from last_key_seq in AddIndexEntry (never kMaxSequenceNumber).
|
|
186
|
+
// The seqno may be 0 when bottommost compaction zeroes all sequence
|
|
187
|
+
// numbers — this is valid; see AddOverflowBlock comment.
|
|
188
|
+
for (size_t j = run_start + 1; j < run_end; j++) {
|
|
189
|
+
assert(buffered_entries_[j].seqno != kMaxSequenceNumber);
|
|
190
|
+
trie_builder_.AddOverflowBlock(buffered_entries_[j].handle,
|
|
191
|
+
buffered_entries_[j].seqno);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
i = run_end;
|
|
195
|
+
}
|
|
196
|
+
} else {
|
|
197
|
+
// Common case: no same-user-key boundaries, add separators directly.
|
|
198
|
+
// Zero overhead — no seqno data stored.
|
|
199
|
+
for (const auto& entry : buffered_entries_) {
|
|
200
|
+
trie_builder_.AddKey(Slice(entry.separator_key), entry.handle);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Release buffered entries — no longer needed after feeding to the trie.
|
|
205
|
+
buffered_entries_.clear();
|
|
206
|
+
buffered_entries_.shrink_to_fit();
|
|
207
|
+
|
|
208
|
+
// Always finish the trie builder, even with 0 keys — this produces a valid
|
|
209
|
+
// serialized trie that can be parsed by NewReader. Without this, an empty
|
|
210
|
+
// Slice would be returned, causing InitFromData to fail with "data too short
|
|
211
|
+
// for header".
|
|
212
|
+
trie_builder_.Finish();
|
|
213
|
+
*index_contents = trie_builder_.GetSerializedData();
|
|
214
|
+
return Status::OK();
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// ============================================================================
|
|
218
|
+
// TrieIndexIterator
|
|
219
|
+
// ============================================================================
|
|
220
|
+
|
|
221
|
+
TrieIndexIterator::TrieIndexIterator(const LoudsTrie* trie,
|
|
222
|
+
const Comparator* comparator,
|
|
223
|
+
bool has_seqno_encoding)
|
|
224
|
+
: comparator_(comparator),
|
|
225
|
+
iter_(trie),
|
|
226
|
+
trie_(trie),
|
|
227
|
+
current_scan_idx_(0),
|
|
228
|
+
prepared_(false),
|
|
229
|
+
has_seqno_encoding_(has_seqno_encoding),
|
|
230
|
+
overflow_run_index_(0),
|
|
231
|
+
overflow_run_size_(1),
|
|
232
|
+
overflow_base_idx_(0) {}
|
|
233
|
+
|
|
234
|
+
void TrieIndexIterator::Prepare(const ScanOptions scan_opts[],
|
|
235
|
+
size_t num_opts) {
|
|
236
|
+
scan_opts_.clear();
|
|
237
|
+
scan_opts_.reserve(num_opts);
|
|
238
|
+
for (size_t i = 0; i < num_opts; i++) {
|
|
239
|
+
scan_opts_.push_back(scan_opts[i]);
|
|
240
|
+
}
|
|
241
|
+
current_scan_idx_ = 0;
|
|
242
|
+
prepared_ = true;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
Status TrieIndexIterator::SeekToFirstAndGetResult(IterateResult* result) {
|
|
246
|
+
// Reset overflow state — SeekToFirst always lands on the primary block
|
|
247
|
+
// of the first trie leaf.
|
|
248
|
+
overflow_run_index_ = 0;
|
|
249
|
+
overflow_run_size_ = 1;
|
|
250
|
+
overflow_base_idx_ = 0;
|
|
251
|
+
|
|
252
|
+
if (!iter_.SeekToFirst()) {
|
|
253
|
+
result->bound_check_result = IterBoundCheck::kUnknown;
|
|
254
|
+
result->key = Slice();
|
|
255
|
+
return Status::OK();
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
result->key = iter_.Key();
|
|
259
|
+
current_key_scratch_ = result->key.ToString();
|
|
260
|
+
result->key = Slice(current_key_scratch_);
|
|
261
|
+
|
|
262
|
+
// Set up overflow state for the first leaf if seqno encoding is active.
|
|
263
|
+
if (has_seqno_encoding_) {
|
|
264
|
+
uint64_t leaf_idx = iter_.LeafIndex();
|
|
265
|
+
uint32_t block_count = trie_->GetLeafBlockCount(leaf_idx);
|
|
266
|
+
overflow_run_size_ = block_count;
|
|
267
|
+
overflow_base_idx_ = trie_->GetOverflowBase(leaf_idx);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// The very first entry is always in bounds (no target to compare against
|
|
271
|
+
// the limit, and the first block cannot precede any scan range).
|
|
272
|
+
result->bound_check_result = IterBoundCheck::kInbound;
|
|
273
|
+
return Status::OK();
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
Status TrieIndexIterator::SeekAndGetResult(const Slice& target,
|
|
277
|
+
IterateResult* result,
|
|
278
|
+
const SeekContext& context) {
|
|
279
|
+
SequenceNumber target_seq = context.target_seq;
|
|
280
|
+
|
|
281
|
+
// Advance current_scan_idx_ past any scans whose limit <= target.
|
|
282
|
+
// This handles the multi-scan case where the caller seeks into a later
|
|
283
|
+
// scan range after the previous scan returned kOutOfBound.
|
|
284
|
+
if (prepared_) {
|
|
285
|
+
while (current_scan_idx_ < scan_opts_.size()) {
|
|
286
|
+
const auto& opts = scan_opts_[current_scan_idx_];
|
|
287
|
+
if (opts.range.limit.has_value() &&
|
|
288
|
+
comparator_->Compare(target, opts.range.limit.value()) >= 0) {
|
|
289
|
+
current_scan_idx_++;
|
|
290
|
+
} else {
|
|
291
|
+
break;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Reset overflow state.
|
|
297
|
+
overflow_run_index_ = 0;
|
|
298
|
+
overflow_run_size_ = 1;
|
|
299
|
+
overflow_base_idx_ = 0;
|
|
300
|
+
|
|
301
|
+
// Always seek with user key only — the trie stores user-key separators.
|
|
302
|
+
// When seqno encoding is active, post-seek correction handles the seqno.
|
|
303
|
+
if (!iter_.Seek(target)) {
|
|
304
|
+
// No leaf has a key >= target: the target is past all blocks in this SST.
|
|
305
|
+
// Return kUnknown (not kOutOfBound) because exhausting this SST's trie
|
|
306
|
+
// says nothing about the upper bound — the next SST on the level may
|
|
307
|
+
// still contain in-bound keys. kOutOfBound would cause LevelIterator to
|
|
308
|
+
// stop scanning the level prematurely.
|
|
309
|
+
result->bound_check_result = IterBoundCheck::kUnknown;
|
|
310
|
+
result->key = Slice();
|
|
311
|
+
return Status::OK();
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Set the result key (always a user key, no suffix stripping needed).
|
|
315
|
+
result->key = iter_.Key();
|
|
316
|
+
current_key_scratch_ = result->key.ToString();
|
|
317
|
+
result->key = Slice(current_key_scratch_);
|
|
318
|
+
|
|
319
|
+
// ---- Post-seek correction for seqno side-table ----
|
|
320
|
+
//
|
|
321
|
+
// When has_seqno_encoding_ is true, the leaf we landed on may have a
|
|
322
|
+
// seqno side-table entry. We use it to determine if this is the right
|
|
323
|
+
// block for the given (target, target_seq).
|
|
324
|
+
//
|
|
325
|
+
// The trie stores separators that are upper bounds on block contents:
|
|
326
|
+
// separator_key >= all keys in the block
|
|
327
|
+
// separator_seqno = seqno of the last key written to the block
|
|
328
|
+
//
|
|
329
|
+
// For same-user-key boundaries, the separator IS the user key. The seqno
|
|
330
|
+
// determines which block within a run of same-key blocks is correct:
|
|
331
|
+
// - If target_seq >= leaf_seqno: this is the right block (target's
|
|
332
|
+
// internal key <= separator's internal key, because higher seqno means
|
|
333
|
+
// "smaller" internal key for the same user key)
|
|
334
|
+
// - If target_seq < leaf_seqno: target's internal key > separator,
|
|
335
|
+
// so we need to advance to the next block in the run
|
|
336
|
+
//
|
|
337
|
+
// For non-boundary leaves (leaf_seqno == 0), the `leaf_seqno != 0` guard
|
|
338
|
+
// short-circuits before the comparison, so we never advance. This is the
|
|
339
|
+
// zero-overhead common path.
|
|
340
|
+
if (has_seqno_encoding_ && iter_.Valid()) {
|
|
341
|
+
uint64_t leaf_idx = iter_.LeafIndex();
|
|
342
|
+
uint64_t leaf_seqno = trie_->GetLeafSeqno(leaf_idx);
|
|
343
|
+
|
|
344
|
+
if (leaf_seqno != 0 && target_seq < leaf_seqno) {
|
|
345
|
+
// Target's internal key is AFTER the separator (lower seqno = later
|
|
346
|
+
// in internal key order for same user key). Advance through overflow
|
|
347
|
+
// blocks.
|
|
348
|
+
uint32_t block_count = trie_->GetLeafBlockCount(leaf_idx);
|
|
349
|
+
uint32_t base = trie_->GetOverflowBase(leaf_idx);
|
|
350
|
+
|
|
351
|
+
bool found = false;
|
|
352
|
+
for (uint32_t oi = 0; oi < block_count - 1; oi++) {
|
|
353
|
+
uint64_t ov_seqno = trie_->GetOverflowSeqno(base + oi);
|
|
354
|
+
if (ov_seqno == 0 || target_seq >= ov_seqno) {
|
|
355
|
+
// This overflow block is the right one.
|
|
356
|
+
overflow_run_index_ = oi + 1; // 1-based (0 = primary)
|
|
357
|
+
overflow_run_size_ = block_count;
|
|
358
|
+
overflow_base_idx_ = base;
|
|
359
|
+
found = true;
|
|
360
|
+
break;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
if (!found) {
|
|
365
|
+
// target_seq is below all seqnos in this run. Advance to the next
|
|
366
|
+
// trie leaf (the block after the run).
|
|
367
|
+
if (!iter_.Next()) {
|
|
368
|
+
// Exhausted all blocks: target is past the end of this SST.
|
|
369
|
+
// Return kUnknown — see comment in Seek path above.
|
|
370
|
+
result->bound_check_result = IterBoundCheck::kUnknown;
|
|
371
|
+
result->key = Slice();
|
|
372
|
+
return Status::OK();
|
|
373
|
+
}
|
|
374
|
+
// Update key and overflow state for the new leaf.
|
|
375
|
+
result->key = iter_.Key();
|
|
376
|
+
current_key_scratch_ = result->key.ToString();
|
|
377
|
+
result->key = Slice(current_key_scratch_);
|
|
378
|
+
overflow_run_index_ = 0;
|
|
379
|
+
overflow_run_size_ = 1;
|
|
380
|
+
overflow_base_idx_ = 0;
|
|
381
|
+
// Check if the new leaf also has overflow (unlikely but possible
|
|
382
|
+
// with adjacent same-key runs for different user keys).
|
|
383
|
+
// iter_.Valid() is guaranteed here — Next() returned true above.
|
|
384
|
+
if (has_seqno_encoding_) {
|
|
385
|
+
uint64_t new_leaf = iter_.LeafIndex();
|
|
386
|
+
overflow_run_size_ = trie_->GetLeafBlockCount(new_leaf);
|
|
387
|
+
overflow_base_idx_ = trie_->GetOverflowBase(new_leaf);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
} else {
|
|
391
|
+
// Right block (common path). Set overflow state in case this leaf
|
|
392
|
+
// has a run (for subsequent Next() calls).
|
|
393
|
+
uint32_t block_count = trie_->GetLeafBlockCount(leaf_idx);
|
|
394
|
+
overflow_run_index_ = 0;
|
|
395
|
+
overflow_run_size_ = block_count;
|
|
396
|
+
overflow_base_idx_ = trie_->GetOverflowBase(leaf_idx);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
result->bound_check_result = CheckBounds(target);
|
|
401
|
+
return Status::OK();
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
Status TrieIndexIterator::NextAndGetResult(IterateResult* result) {
|
|
405
|
+
// Save the current separator (user key) as "previous" before advancing.
|
|
406
|
+
prev_key_scratch_ = current_key_scratch_;
|
|
407
|
+
|
|
408
|
+
// If we're in an overflow run and haven't exhausted it, advance within
|
|
409
|
+
// the run (no trie traversal needed — just increment the overflow index).
|
|
410
|
+
if (overflow_run_index_ + 1 < overflow_run_size_) {
|
|
411
|
+
overflow_run_index_++;
|
|
412
|
+
// The key doesn't change (same separator for all blocks in the run).
|
|
413
|
+
result->key = Slice(current_key_scratch_);
|
|
414
|
+
result->bound_check_result = CheckBounds(Slice(prev_key_scratch_));
|
|
415
|
+
return Status::OK();
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// Advance to the next trie leaf.
|
|
419
|
+
overflow_run_index_ = 0;
|
|
420
|
+
overflow_run_size_ = 1;
|
|
421
|
+
overflow_base_idx_ = 0;
|
|
422
|
+
|
|
423
|
+
if (!iter_.Next()) {
|
|
424
|
+
// No more blocks: past the end of this SST.
|
|
425
|
+
// Return kUnknown — see comment in Seek path above.
|
|
426
|
+
result->bound_check_result = IterBoundCheck::kUnknown;
|
|
427
|
+
result->key = Slice();
|
|
428
|
+
return Status::OK();
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
result->key = iter_.Key();
|
|
432
|
+
current_key_scratch_ = result->key.ToString();
|
|
433
|
+
result->key = Slice(current_key_scratch_);
|
|
434
|
+
|
|
435
|
+
// Set overflow state for the new leaf.
|
|
436
|
+
if (has_seqno_encoding_ && iter_.Valid()) {
|
|
437
|
+
uint64_t leaf_idx = iter_.LeafIndex();
|
|
438
|
+
overflow_run_size_ = trie_->GetLeafBlockCount(leaf_idx);
|
|
439
|
+
overflow_base_idx_ = trie_->GetOverflowBase(leaf_idx);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
result->bound_check_result = CheckBounds(Slice(prev_key_scratch_));
|
|
443
|
+
return Status::OK();
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
UserDefinedIndexBuilder::BlockHandle TrieIndexIterator::value() {
|
|
447
|
+
if (overflow_run_index_ == 0) {
|
|
448
|
+
// Primary block — use the trie leaf's handle.
|
|
449
|
+
auto handle = iter_.Value();
|
|
450
|
+
return UserDefinedIndexBuilder::BlockHandle{handle.offset, handle.size};
|
|
451
|
+
}
|
|
452
|
+
// Overflow block — use the side-table handle.
|
|
453
|
+
// overflow_run_index_ is 1-based, overflow array is 0-based.
|
|
454
|
+
uint32_t overflow_idx = overflow_base_idx_ + overflow_run_index_ - 1;
|
|
455
|
+
auto handle = trie_->GetOverflowHandle(overflow_idx);
|
|
456
|
+
return UserDefinedIndexBuilder::BlockHandle{handle.offset, handle.size};
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
IterBoundCheck TrieIndexIterator::CheckBounds(
|
|
460
|
+
const Slice& reference_key) const {
|
|
461
|
+
if (!prepared_ || scan_opts_.empty()) {
|
|
462
|
+
// No bounds to check — always in-bound.
|
|
463
|
+
return IterBoundCheck::kInbound;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
if (current_scan_idx_ >= scan_opts_.size()) {
|
|
467
|
+
return IterBoundCheck::kOutOfBound;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
const auto& opts = scan_opts_[current_scan_idx_];
|
|
471
|
+
|
|
472
|
+
// Check upper bound (limit) against the reference key, NOT the current
|
|
473
|
+
// separator. The trie stores separator keys (upper bounds on block
|
|
474
|
+
// contents), so comparing the separator against the limit would
|
|
475
|
+
// prematurely reject blocks that contain keys < limit.
|
|
476
|
+
//
|
|
477
|
+
// For Seek: reference_key = seek target. If target < limit, the found
|
|
478
|
+
// block may contain keys within bounds.
|
|
479
|
+
// For Next: reference_key = previous separator. If prev_sep < limit,
|
|
480
|
+
// the current block may contain keys within bounds.
|
|
481
|
+
//
|
|
482
|
+
// This is conservative: it may return kInbound for a block that is fully
|
|
483
|
+
// out of bounds. The data-level iterator handles per-key filtering.
|
|
484
|
+
if (opts.range.limit.has_value()) {
|
|
485
|
+
const Slice& limit = opts.range.limit.value();
|
|
486
|
+
if (comparator_->Compare(reference_key, limit) >= 0) {
|
|
487
|
+
return IterBoundCheck::kOutOfBound;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
return IterBoundCheck::kInbound;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// ============================================================================
|
|
495
|
+
// TrieIndexReader
|
|
496
|
+
// ============================================================================
|
|
497
|
+
|
|
498
|
+
TrieIndexReader::TrieIndexReader(const Comparator* comparator)
|
|
499
|
+
: comparator_(comparator), data_size_(0) {}
|
|
500
|
+
|
|
501
|
+
Status TrieIndexReader::InitFromSlice(const Slice& data) {
|
|
502
|
+
data_size_ = data.size();
|
|
503
|
+
return trie_.InitFromData(data);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
std::unique_ptr<UserDefinedIndexIterator> TrieIndexReader::NewIterator(
|
|
507
|
+
const ReadOptions& /*read_options*/) {
|
|
508
|
+
return std::make_unique<TrieIndexIterator>(&trie_, comparator_,
|
|
509
|
+
trie_.HasSeqnoEncoding());
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
size_t TrieIndexReader::ApproximateMemoryUsage() const {
|
|
513
|
+
// The trie uses zero-copy pointers into the serialized data for bitvectors
|
|
514
|
+
// and handle arrays, so the base cost is the serialized data size. On top
|
|
515
|
+
// of that, InitFromData() heap-allocates child position lookup tables
|
|
516
|
+
// (s_child_start_pos_ and s_child_end_pos_) for Select-free sparse
|
|
517
|
+
// traversal — 8 bytes per sparse internal node.
|
|
518
|
+
return data_size_ + trie_.ApproximateAuxMemoryUsage();
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// ============================================================================
|
|
522
|
+
// TrieIndexFactory
|
|
523
|
+
// ============================================================================
|
|
524
|
+
|
|
525
|
+
Status TrieIndexFactory::NewBuilder(
|
|
526
|
+
const UserDefinedIndexOption& option,
|
|
527
|
+
std::unique_ptr<UserDefinedIndexBuilder>& builder) const {
|
|
528
|
+
// The trie traverses keys byte-by-byte in lexicographic order, so it
|
|
529
|
+
// requires a bytewise comparator. Non-bytewise comparators (e.g.,
|
|
530
|
+
// ReverseBytewiseComparator or custom comparators) would produce separator
|
|
531
|
+
// keys in a different order than the trie's byte-level traversal, causing
|
|
532
|
+
// incorrect Seek results.
|
|
533
|
+
if (option.comparator != nullptr &&
|
|
534
|
+
option.comparator != BytewiseComparator()) {
|
|
535
|
+
return Status::NotSupported(
|
|
536
|
+
"TrieIndexFactory requires BytewiseComparator; got: ",
|
|
537
|
+
option.comparator->Name());
|
|
538
|
+
}
|
|
539
|
+
// Default to BytewiseComparator when null. The trie requires a bytewise
|
|
540
|
+
// comparator for separator key ordering; null would cause a dereference
|
|
541
|
+
// crash in AddIndexEntry when comparing keys.
|
|
542
|
+
const Comparator* cmp =
|
|
543
|
+
option.comparator ? option.comparator : BytewiseComparator();
|
|
544
|
+
builder = std::make_unique<TrieIndexBuilder>(cmp);
|
|
545
|
+
return Status::OK();
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
Status TrieIndexFactory::NewReader(
|
|
549
|
+
const UserDefinedIndexOption& option, Slice& index_block,
|
|
550
|
+
std::unique_ptr<UserDefinedIndexReader>& reader) const {
|
|
551
|
+
const Comparator* cmp =
|
|
552
|
+
option.comparator ? option.comparator : BytewiseComparator();
|
|
553
|
+
if (cmp != BytewiseComparator()) {
|
|
554
|
+
return Status::NotSupported(
|
|
555
|
+
"TrieIndexFactory requires BytewiseComparator; got: ", cmp->Name());
|
|
556
|
+
}
|
|
557
|
+
auto trie_reader = std::make_unique<TrieIndexReader>(cmp);
|
|
558
|
+
Status s = trie_reader->InitFromSlice(index_block);
|
|
559
|
+
if (!s.ok()) {
|
|
560
|
+
return s;
|
|
561
|
+
}
|
|
562
|
+
reader = std::move(trie_reader);
|
|
563
|
+
return Status::OK();
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
} // namespace trie_index
|
|
567
|
+
} // namespace ROCKSDB_NAMESPACE
|