@nxtedition/rocksdb 15.4.0 → 15.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +24 -19
- package/cache.js +1 -1
- package/chained-batch.js +12 -3
- package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
- package/deps/rocksdb/rocksdb/BUCK +42 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
- package/deps/rocksdb/rocksdb/Makefile +59 -32
- package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
- package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
- package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
- package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
- package/deps/rocksdb/rocksdb/db/builder.h +7 -0
- package/deps/rocksdb/rocksdb/db/c.cc +373 -57
- package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
- package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
- package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
- package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
- package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
- package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
- package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
- package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
- package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
- package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
- package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
- package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
- package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
- package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
- package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
- package/deps/rocksdb/rocksdb/env/env.cc +1 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
- package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
- package/deps/rocksdb/rocksdb/folly.mk +22 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
- package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
- package/deps/rocksdb/rocksdb/options/options.cc +5 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
- package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
- package/deps/rocksdb/rocksdb/port/lang.h +4 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
- package/deps/rocksdb/rocksdb/src.mk +12 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
- package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
- package/deps/rocksdb/rocksdb/table/format.cc +27 -15
- package/deps/rocksdb/rocksdb/table/format.h +41 -15
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
- package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
- package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
- package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
- package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
- package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
- package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
- package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
- package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
- package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
- package/deps/rocksdb/rocksdb/util/coding.h +14 -27
- package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
- package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
- package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
- package/deps/rocksdb/rocksdb/util/math.h +3 -1
- package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
- package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
- package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
- package/deps/rocksdb/rocksdb/util/status.cc +3 -1
- package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
- package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
- package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
- package/deps/rocksdb/rocksdb.gyp +7 -0
- package/index.js +11 -2
- package/iterator.js +15 -7
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
|
@@ -0,0 +1,2843 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
|
|
6
|
+
// DB-level tests for the trie-based User Defined Index (UDI). Validates that
|
|
7
|
+
// a DB opened with the trie UDI factory correctly handles all operation types
|
|
8
|
+
// (Put, Delete, Merge, SingleDelete, PutEntity, TimedPut, DeleteRange) through
|
|
9
|
+
// flush and compaction, and that the resulting SST files are readable with
|
|
10
|
+
// correct data.
|
|
11
|
+
//
|
|
12
|
+
// These tests complement the SST-level tests in trie_index_test.cc (which use
|
|
13
|
+
// SstFileWriter and are limited to Put/Delete/Merge) by exercising the full
|
|
14
|
+
// DB path including CompactionIterator, memtable flush, and the UDI builder
|
|
15
|
+
// wrapper's ValueType mapping and kTypeValuePreferredSeqno handling.
|
|
16
|
+
|
|
17
|
+
#include <memory>
|
|
18
|
+
#include <string>
|
|
19
|
+
#include <vector>
|
|
20
|
+
|
|
21
|
+
#include "port/port.h"
|
|
22
|
+
#include "rocksdb/db.h"
|
|
23
|
+
#include "rocksdb/options.h"
|
|
24
|
+
#include "rocksdb/slice.h"
|
|
25
|
+
#include "rocksdb/sst_file_writer.h"
|
|
26
|
+
#include "rocksdb/status.h"
|
|
27
|
+
#include "rocksdb/table.h"
|
|
28
|
+
#include "rocksdb/utilities/transaction.h"
|
|
29
|
+
#include "rocksdb/utilities/transaction_db.h"
|
|
30
|
+
#include "rocksdb/wide_columns.h"
|
|
31
|
+
#include "rocksdb/write_batch.h"
|
|
32
|
+
#include "test_util/testharness.h"
|
|
33
|
+
#include "test_util/testutil.h"
|
|
34
|
+
#include "util/compression.h"
|
|
35
|
+
#include "util/random.h"
|
|
36
|
+
#include "utilities/merge_operators.h"
|
|
37
|
+
#include "utilities/trie_index/trie_index_factory.h"
|
|
38
|
+
|
|
39
|
+
namespace ROCKSDB_NAMESPACE {
|
|
40
|
+
namespace trie_index {
|
|
41
|
+
|
|
42
|
+
// Encodes an integer as an 8-byte big-endian key body, matching the pattern
|
|
43
|
+
// used by db_stress's test_batches_snapshots mode.
|
|
44
|
+
static std::string MakeKeyBody(int k) {
|
|
45
|
+
std::string key_body(8, '\0');
|
|
46
|
+
uint64_t val = static_cast<uint64_t>(k);
|
|
47
|
+
for (int i = 7; i >= 0; --i) {
|
|
48
|
+
key_body[i] = static_cast<char>(val & 0xff);
|
|
49
|
+
val >>= 8;
|
|
50
|
+
}
|
|
51
|
+
return key_body;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
class TrieIndexDBTest : public testing::Test {
|
|
55
|
+
protected:
|
|
56
|
+
void SetUp() override {
|
|
57
|
+
trie_factory_ = std::make_shared<TrieIndexFactory>();
|
|
58
|
+
dbname_ = test::PerThreadDBPath("trie_index_db_test");
|
|
59
|
+
ASSERT_OK(DestroyDB(dbname_, Options()));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
void TearDown() override {
|
|
63
|
+
if (db_) {
|
|
64
|
+
EXPECT_OK(db_->Close());
|
|
65
|
+
db_.reset();
|
|
66
|
+
}
|
|
67
|
+
EXPECT_OK(DestroyDB(dbname_, last_options_));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Opens a DB with the trie UDI factory configured. Caller should set
|
|
71
|
+
// options_ fields before calling this. An optional block_size overrides
|
|
72
|
+
// the default to force more data blocks in the SST.
|
|
73
|
+
Status OpenDB(int block_size = 0) {
|
|
74
|
+
options_.create_if_missing = true;
|
|
75
|
+
BlockBasedTableOptions table_options;
|
|
76
|
+
table_options.user_defined_index_factory = trie_factory_;
|
|
77
|
+
if (block_size > 0) {
|
|
78
|
+
table_options.block_size = block_size;
|
|
79
|
+
}
|
|
80
|
+
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
81
|
+
last_options_ = options_;
|
|
82
|
+
return DB::Open(options_, dbname_, &db_);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Returns a ReadOptions that routes reads through the standard binary
|
|
86
|
+
// search index (the default when table_index_factory is null).
|
|
87
|
+
ReadOptions StandardIndexReadOptions() const { return ReadOptions(); }
|
|
88
|
+
|
|
89
|
+
// Returns a ReadOptions that routes reads through the trie UDI index.
|
|
90
|
+
ReadOptions TrieIndexReadOptions() const {
|
|
91
|
+
ReadOptions ro;
|
|
92
|
+
ro.table_index_factory = trie_factory_.get();
|
|
93
|
+
return ro;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Collects all visible keys via forward scan using the given ReadOptions.
|
|
97
|
+
std::vector<std::string> ScanAllKeys(const ReadOptions& ro) {
|
|
98
|
+
std::vector<std::string> keys;
|
|
99
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
100
|
+
iter->SeekToFirst();
|
|
101
|
+
for (; iter->Valid(); iter->Next()) {
|
|
102
|
+
keys.push_back(iter->key().ToString());
|
|
103
|
+
}
|
|
104
|
+
EXPECT_OK(iter->status());
|
|
105
|
+
return keys;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Collects all visible keys via forward scan using the standard index.
|
|
109
|
+
std::vector<std::string> ScanAllKeys() {
|
|
110
|
+
return ScanAllKeys(StandardIndexReadOptions());
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Collects all visible (key, value) pairs via forward scan.
|
|
114
|
+
std::vector<std::pair<std::string, std::string>> ScanAllKeyValues(
|
|
115
|
+
const ReadOptions& ro) {
|
|
116
|
+
std::vector<std::pair<std::string, std::string>> kvs;
|
|
117
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
118
|
+
iter->SeekToFirst();
|
|
119
|
+
for (; iter->Valid(); iter->Next()) {
|
|
120
|
+
kvs.emplace_back(iter->key().ToString(), iter->value().ToString());
|
|
121
|
+
}
|
|
122
|
+
EXPECT_OK(iter->status());
|
|
123
|
+
return kvs;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Verifies that forward scan via SeekToFirst+Next produces the same key
|
|
127
|
+
// set through both the standard index and the trie index.
|
|
128
|
+
void VerifyForwardScanBothIndexes(
|
|
129
|
+
const std::vector<std::string>& expected_keys) {
|
|
130
|
+
{
|
|
131
|
+
SCOPED_TRACE("standard index");
|
|
132
|
+
ASSERT_EQ(ScanAllKeys(StandardIndexReadOptions()), expected_keys);
|
|
133
|
+
}
|
|
134
|
+
{
|
|
135
|
+
SCOPED_TRACE("trie index");
|
|
136
|
+
ASSERT_EQ(ScanAllKeys(TrieIndexReadOptions()), expected_keys);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Verifies that forward scan via SeekToFirst+Next produces the same
|
|
141
|
+
// (key, value) pairs through both indexes.
|
|
142
|
+
void VerifyForwardScanBothIndexes(
|
|
143
|
+
const std::vector<std::pair<std::string, std::string>>& expected_kvs) {
|
|
144
|
+
{
|
|
145
|
+
SCOPED_TRACE("standard index");
|
|
146
|
+
ASSERT_EQ(ScanAllKeyValues(StandardIndexReadOptions()), expected_kvs);
|
|
147
|
+
}
|
|
148
|
+
{
|
|
149
|
+
SCOPED_TRACE("trie index");
|
|
150
|
+
ASSERT_EQ(ScanAllKeyValues(TrieIndexReadOptions()), expected_kvs);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Verifies that a point Get returns the expected value through both indexes.
|
|
155
|
+
void VerifyGetBothIndexes(const std::string& key,
|
|
156
|
+
const std::string& expected_value) {
|
|
157
|
+
for (const auto& ro :
|
|
158
|
+
{StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
159
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
160
|
+
std::string value;
|
|
161
|
+
ASSERT_OK(db_->Get(ro, key, &value));
|
|
162
|
+
ASSERT_EQ(value, expected_value);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Verifies that a point Get returns NotFound through both indexes.
|
|
167
|
+
void VerifyGetNotFoundBothIndexes(const std::string& key) {
|
|
168
|
+
for (const auto& ro :
|
|
169
|
+
{StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
170
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
171
|
+
std::string value;
|
|
172
|
+
ASSERT_TRUE(db_->Get(ro, key, &value).IsNotFound());
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Verifies Get with a snapshot through both indexes.
|
|
177
|
+
void VerifyGetBothIndexes(const Snapshot* snap, const std::string& key,
|
|
178
|
+
const std::string& expected_value) {
|
|
179
|
+
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
180
|
+
SCOPED_TRACE(base_ro.table_index_factory ? "trie index"
|
|
181
|
+
: "standard index");
|
|
182
|
+
base_ro.snapshot = snap;
|
|
183
|
+
std::string value;
|
|
184
|
+
ASSERT_OK(db_->Get(base_ro, key, &value));
|
|
185
|
+
ASSERT_EQ(value, expected_value);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Verifies Get returns NotFound with a snapshot through both indexes.
|
|
190
|
+
void VerifyGetNotFoundBothIndexes(const Snapshot* snap,
|
|
191
|
+
const std::string& key) {
|
|
192
|
+
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
193
|
+
SCOPED_TRACE(base_ro.table_index_factory ? "trie index"
|
|
194
|
+
: "standard index");
|
|
195
|
+
base_ro.snapshot = snap;
|
|
196
|
+
std::string value;
|
|
197
|
+
ASSERT_TRUE(db_->Get(base_ro, key, &value).IsNotFound());
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Verifies that a forward scan with a snapshot produces the expected
|
|
202
|
+
// (key, value) pairs through both indexes.
|
|
203
|
+
void VerifyForwardScanBothIndexes(
|
|
204
|
+
const Snapshot* snap,
|
|
205
|
+
const std::vector<std::pair<std::string, std::string>>& expected_kvs) {
|
|
206
|
+
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
207
|
+
SCOPED_TRACE(base_ro.table_index_factory ? "trie index"
|
|
208
|
+
: "standard index");
|
|
209
|
+
base_ro.snapshot = snap;
|
|
210
|
+
ASSERT_EQ(ScanAllKeyValues(base_ro), expected_kvs);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Verifies that Seek to a specific key through both indexes returns the
|
|
215
|
+
// same result.
|
|
216
|
+
void VerifySeekBothIndexes(const std::string& seek_key,
|
|
217
|
+
const std::string& expected_key,
|
|
218
|
+
const std::string& expected_value) {
|
|
219
|
+
for (const auto& ro :
|
|
220
|
+
{StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
221
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
222
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
223
|
+
iter->Seek(seek_key);
|
|
224
|
+
ASSERT_TRUE(iter->Valid());
|
|
225
|
+
ASSERT_EQ(iter->key().ToString(), expected_key);
|
|
226
|
+
ASSERT_EQ(iter->value().ToString(), expected_value);
|
|
227
|
+
ASSERT_OK(iter->status());
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Verifies that Seek with a snapshot through both indexes returns the
|
|
232
|
+
// same result.
|
|
233
|
+
void VerifySeekBothIndexes(const Snapshot* snap, const std::string& seek_key,
|
|
234
|
+
const std::string& expected_key,
|
|
235
|
+
const std::string& expected_value) {
|
|
236
|
+
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
237
|
+
SCOPED_TRACE(base_ro.table_index_factory ? "trie index"
|
|
238
|
+
: "standard index");
|
|
239
|
+
base_ro.snapshot = snap;
|
|
240
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(base_ro));
|
|
241
|
+
iter->Seek(seek_key);
|
|
242
|
+
ASSERT_TRUE(iter->Valid());
|
|
243
|
+
ASSERT_EQ(iter->key().ToString(), expected_key);
|
|
244
|
+
ASSERT_EQ(iter->value().ToString(), expected_value);
|
|
245
|
+
ASSERT_OK(iter->status());
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Opens without UDI factory (standard index only). Used to test graceful
|
|
250
|
+
// degradation when reopening a DB that has UDI SSTs.
|
|
251
|
+
Status OpenDBWithoutUDI(int block_size = 0) {
|
|
252
|
+
options_.create_if_missing = true;
|
|
253
|
+
BlockBasedTableOptions table_options;
|
|
254
|
+
// Deliberately no user_defined_index_factory.
|
|
255
|
+
if (block_size > 0) {
|
|
256
|
+
table_options.block_size = block_size;
|
|
257
|
+
}
|
|
258
|
+
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
259
|
+
last_options_ = options_;
|
|
260
|
+
return DB::Open(options_, dbname_, &db_);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Verify prefix-scan lockstep across `num_prefixes` iterators.
|
|
264
|
+
//
|
|
265
|
+
// Creates one iterator per prefix digit (0..num_prefixes-1), seeks each to
|
|
266
|
+
// its prefix, then walks all in lockstep asserting key bodies match. When
|
|
267
|
+
// `use_upper_bounds` is true, even-numbered iterators get an upper bound
|
|
268
|
+
// set to the next prefix. When `verify_values` is true, value bodies are
|
|
269
|
+
// also cross-checked.
|
|
270
|
+
//
|
|
271
|
+
// Returns the number of keys walked (per-prefix).
|
|
272
|
+
uint64_t VerifyPrefixScanLockstep(const ReadOptions& base_ro,
|
|
273
|
+
int num_prefixes, bool use_upper_bounds,
|
|
274
|
+
bool verify_values,
|
|
275
|
+
const std::string& trace_context = "") {
|
|
276
|
+
std::vector<std::unique_ptr<Iterator>> iters(num_prefixes);
|
|
277
|
+
std::vector<std::string> prefixes(num_prefixes);
|
|
278
|
+
std::vector<Slice> prefix_slices(num_prefixes);
|
|
279
|
+
std::vector<ReadOptions> ro_copies(num_prefixes);
|
|
280
|
+
std::vector<std::string> upper_bounds(num_prefixes);
|
|
281
|
+
std::vector<Slice> ub_slices(num_prefixes);
|
|
282
|
+
|
|
283
|
+
for (int d = 0; d < num_prefixes; ++d) {
|
|
284
|
+
prefixes[d] = std::to_string(d);
|
|
285
|
+
prefix_slices[d] = Slice(prefixes[d]);
|
|
286
|
+
ro_copies[d] = base_ro;
|
|
287
|
+
if (use_upper_bounds && d % 2 == 0) {
|
|
288
|
+
upper_bounds[d] = prefixes[d];
|
|
289
|
+
upper_bounds[d].back()++;
|
|
290
|
+
ub_slices[d] = upper_bounds[d];
|
|
291
|
+
ro_copies[d].iterate_upper_bound = &ub_slices[d];
|
|
292
|
+
}
|
|
293
|
+
iters[d].reset(db_->NewIterator(ro_copies[d]));
|
|
294
|
+
iters[d]->Seek(prefix_slices[d]);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
uint64_t count = 0;
|
|
298
|
+
while (iters[0]->Valid() && iters[0]->key().starts_with(prefix_slices[0])) {
|
|
299
|
+
count++;
|
|
300
|
+
std::vector<std::string> keys(num_prefixes);
|
|
301
|
+
std::vector<std::string> values(num_prefixes);
|
|
302
|
+
for (int d = 0; d < num_prefixes; ++d) {
|
|
303
|
+
EXPECT_TRUE(iters[d]->Valid())
|
|
304
|
+
<< trace_context << " iter " << d << " invalid at step " << count;
|
|
305
|
+
EXPECT_TRUE(iters[d]->key().starts_with(prefix_slices[d]))
|
|
306
|
+
<< trace_context << " iter " << d << " out of prefix at step "
|
|
307
|
+
<< count;
|
|
308
|
+
if (!iters[d]->Valid()) {
|
|
309
|
+
return count;
|
|
310
|
+
}
|
|
311
|
+
keys[d] = iters[d]->key().ToString();
|
|
312
|
+
values[d] = iters[d]->value().ToString();
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
std::string key0_body = keys[0].substr(1);
|
|
316
|
+
for (int d = 1; d < num_prefixes; ++d) {
|
|
317
|
+
EXPECT_EQ(key0_body, keys[d].substr(1))
|
|
318
|
+
<< trace_context << " key body mismatch at step " << count
|
|
319
|
+
<< " iter " << d;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
if (verify_values) {
|
|
323
|
+
std::string val0 = values[0];
|
|
324
|
+
if (!val0.empty()) {
|
|
325
|
+
val0.pop_back();
|
|
326
|
+
}
|
|
327
|
+
for (int d = 1; d < num_prefixes; ++d) {
|
|
328
|
+
std::string vald = values[d];
|
|
329
|
+
if (!vald.empty()) {
|
|
330
|
+
vald.pop_back();
|
|
331
|
+
}
|
|
332
|
+
EXPECT_EQ(val0, vald) << trace_context << " value mismatch at step "
|
|
333
|
+
<< count << " iter " << d;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
for (int d = 0; d < num_prefixes; ++d) {
|
|
338
|
+
iters[d]->Next();
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
EXPECT_OK(iters[0]->status());
|
|
343
|
+
for (int d = 1; d < num_prefixes; ++d) {
|
|
344
|
+
EXPECT_TRUE(!iters[d]->Valid() ||
|
|
345
|
+
!iters[d]->key().starts_with(prefix_slices[d]))
|
|
346
|
+
<< trace_context << " iter " << d
|
|
347
|
+
<< " still has keys after iter 0 finished";
|
|
348
|
+
EXPECT_OK(iters[d]->status());
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
return count;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
std::shared_ptr<TrieIndexFactory> trie_factory_;
|
|
355
|
+
std::string dbname_;
|
|
356
|
+
Options options_;
|
|
357
|
+
Options last_options_;
|
|
358
|
+
std::unique_ptr<DB> db_;
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
// ============================================================================
|
|
362
|
+
// Flush tests
|
|
363
|
+
// ============================================================================
|
|
364
|
+
|
|
365
|
+
TEST_F(TrieIndexDBTest, FlushWithAllOperationTypes) {
|
|
366
|
+
// Write every supported operation type via the DB API, flush, and verify
|
|
367
|
+
// reads return correct results through both the standard binary search index
|
|
368
|
+
// and the trie UDI. This exercises the full path from memtable through
|
|
369
|
+
// CompactionIterator, BlockBasedTableBuilder, and the UDI builder wrapper's
|
|
370
|
+
// MapToUDIValueType for each internal ValueType.
|
|
371
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
372
|
+
options_.disable_auto_compactions = true;
|
|
373
|
+
ASSERT_OK(OpenDB());
|
|
374
|
+
|
|
375
|
+
// kTypeValue
|
|
376
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_01_put", "val_put"));
|
|
377
|
+
// kTypeMerge
|
|
378
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key_02_merge", "val_merge"));
|
|
379
|
+
// kTypeDeletion (bare tombstone — no prior value for this key)
|
|
380
|
+
ASSERT_OK(db_->Delete(WriteOptions(), "key_03_del"));
|
|
381
|
+
// kTypeSingleDeletion (preceded by a Put; both cancel out with no snapshot)
|
|
382
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_04_sdel", "val_sdel"));
|
|
383
|
+
ASSERT_OK(db_->SingleDelete(WriteOptions(), "key_04_sdel"));
|
|
384
|
+
// kTypeWideColumnEntity (with a default column so Get() returns a value)
|
|
385
|
+
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
386
|
+
"key_05_entity",
|
|
387
|
+
WideColumns{{"", "default_val"}, {"col1", "val1"}}));
|
|
388
|
+
// Another kTypeValue to anchor the end of the key range
|
|
389
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_06_put", "val_put2"));
|
|
390
|
+
|
|
391
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
392
|
+
|
|
393
|
+
// Forward scan via both indexes. Expected visible keys after flush:
|
|
394
|
+
// key_01_put — Put (visible)
|
|
395
|
+
// key_02_merge — Merge single operand (visible)
|
|
396
|
+
// key_03_del — bare Delete tombstone (hidden by DBIter)
|
|
397
|
+
// key_04_sdel — Put + SingleDelete cancel out (hidden)
|
|
398
|
+
// key_05_entity — PutEntity (visible)
|
|
399
|
+
// key_06_put — Put (visible)
|
|
400
|
+
{
|
|
401
|
+
std::vector<std::string> expected = {"key_01_put", "key_02_merge",
|
|
402
|
+
"key_05_entity", "key_06_put"};
|
|
403
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Point lookups via both indexes.
|
|
407
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01_put", "val_put"));
|
|
408
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02_merge", "val_merge"));
|
|
409
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_03_del"));
|
|
410
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_04_sdel"));
|
|
411
|
+
// PutEntity: Get() returns the value of the default column ("").
|
|
412
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_05_entity", "default_val"));
|
|
413
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_06_put", "val_put2"));
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
TEST_F(TrieIndexDBTest, TimedPutFlush) {
|
|
417
|
+
// TimedPut produces kTypeValuePreferredSeqno entries during flush when
|
|
418
|
+
// preclude_last_level_data_seconds > 0. The UDI wrapper strips the packed
|
|
419
|
+
// preferred seqno suffix via ParsePackedValueForValue() before forwarding
|
|
420
|
+
// to the plugin builder. This test verifies that path end-to-end through
|
|
421
|
+
// both the standard binary search index and the trie UDI.
|
|
422
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
423
|
+
options_.compaction_style = kCompactionStyleUniversal;
|
|
424
|
+
// Required for kTypeValuePreferredSeqno to survive the flush path: the
|
|
425
|
+
// seqno_to_time_mapping must be available so a preferred seqno can be
|
|
426
|
+
// computed. With write_unix_time=0, GetProximalSeqnoBeforeTime(0) returns 0,
|
|
427
|
+
// which is < any real seqno, so the entry stays as kTypeValuePreferredSeqno.
|
|
428
|
+
options_.preclude_last_level_data_seconds = 10000;
|
|
429
|
+
options_.disable_auto_compactions = true;
|
|
430
|
+
ASSERT_OK(OpenDB());
|
|
431
|
+
|
|
432
|
+
// Regular Put alongside the TimedPut to verify they coexist.
|
|
433
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_01_put", "val_put"));
|
|
434
|
+
|
|
435
|
+
// TimedPut via WriteBatch (there is no DB::TimedPut method).
|
|
436
|
+
{
|
|
437
|
+
WriteBatch wb;
|
|
438
|
+
ASSERT_OK(wb.TimedPut(db_->DefaultColumnFamily(), "key_02_timed",
|
|
439
|
+
"val_timed", /*write_unix_time=*/0));
|
|
440
|
+
ASSERT_OK(db_->Write(WriteOptions(), &wb));
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// Merge to verify mixed types work with TimedPut in the same flush.
|
|
444
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key_03_merge", "val_merge"));
|
|
445
|
+
|
|
446
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
447
|
+
|
|
448
|
+
// Point lookups via both indexes — the packed seqno must be transparent.
|
|
449
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01_put", "val_put"));
|
|
450
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02_timed", "val_timed"));
|
|
451
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_03_merge", "val_merge"));
|
|
452
|
+
|
|
453
|
+
// Forward scan via both indexes — all three keys visible in order.
|
|
454
|
+
{
|
|
455
|
+
std::vector<std::pair<std::string, std::string>> expected = {
|
|
456
|
+
{"key_01_put", "val_put"},
|
|
457
|
+
{"key_02_timed", "val_timed"},
|
|
458
|
+
{"key_03_merge", "val_merge"}};
|
|
459
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// ============================================================================
|
|
464
|
+
// Compaction tests
|
|
465
|
+
// ============================================================================
|
|
466
|
+
|
|
467
|
+
TEST_F(TrieIndexDBTest, CompactionWithMixedOpsAndSnapshots) {
|
|
468
|
+
// Multiple flushes followed by compaction with a snapshot held. The snapshot
|
|
469
|
+
// forces compaction to preserve multiple versions of the same user key,
|
|
470
|
+
// exercising the UDI builder's handling of duplicate user keys with different
|
|
471
|
+
// sequence numbers and value types. Verified through both indexes.
|
|
472
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
473
|
+
options_.disable_auto_compactions = true;
|
|
474
|
+
ASSERT_OK(OpenDB());
|
|
475
|
+
|
|
476
|
+
// Flush 1: initial values.
|
|
477
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_aa", "v1"));
|
|
478
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_bb", "v1"));
|
|
479
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key_cc", "m1"));
|
|
480
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
481
|
+
|
|
482
|
+
// Snapshot pins flush 1 versions so compaction preserves them.
|
|
483
|
+
const Snapshot* snap = db_->GetSnapshot();
|
|
484
|
+
|
|
485
|
+
// Flush 2: updates that create new versions.
|
|
486
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_aa", "v2"));
|
|
487
|
+
ASSERT_OK(db_->Delete(WriteOptions(), "key_bb"));
|
|
488
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key_cc", "m2"));
|
|
489
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
490
|
+
|
|
491
|
+
// Compact all levels. Both versions of each key are preserved because the
|
|
492
|
+
// snapshot prevents garbage collection of the older versions.
|
|
493
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
494
|
+
|
|
495
|
+
// Current view (no snapshot): key_aa=v2, key_bb deleted, key_cc="m1,m2".
|
|
496
|
+
{
|
|
497
|
+
std::vector<std::pair<std::string, std::string>> expected = {
|
|
498
|
+
{"key_aa", "v2"}, {"key_cc", "m1,m2"}};
|
|
499
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
500
|
+
}
|
|
501
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_aa", "v2"));
|
|
502
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_bb"));
|
|
503
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_cc", "m1,m2"));
|
|
504
|
+
|
|
505
|
+
// Snapshot view: key_aa=v1, key_bb=v1, key_cc="m1".
|
|
506
|
+
{
|
|
507
|
+
std::vector<std::pair<std::string, std::string>> expected = {
|
|
508
|
+
{"key_aa", "v1"}, {"key_bb", "v1"}, {"key_cc", "m1"}};
|
|
509
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(snap, expected));
|
|
510
|
+
}
|
|
511
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_aa", "v1"));
|
|
512
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_bb", "v1"));
|
|
513
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_cc", "m1"));
|
|
514
|
+
|
|
515
|
+
db_->ReleaseSnapshot(snap);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
TEST_F(TrieIndexDBTest, CompactionWithAllOperationTypes) {
|
|
519
|
+
// Exercises all operation types (Put, Delete, Merge, SingleDelete, PutEntity)
|
|
520
|
+
// across two flushes with a snapshot, then compacts. Verified through both
|
|
521
|
+
// indexes. This ensures the UDI builder handles the full range of value types
|
|
522
|
+
// in compaction output, and that both the current and snapshot views are
|
|
523
|
+
// correct.
|
|
524
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
525
|
+
options_.disable_auto_compactions = true;
|
|
526
|
+
ASSERT_OK(OpenDB());
|
|
527
|
+
|
|
528
|
+
// Flush 1: initial values with diverse types.
|
|
529
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_01_put", "v1"));
|
|
530
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key_02_merge", "m1"));
|
|
531
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_03_sd_target", "sd_val"));
|
|
532
|
+
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
533
|
+
"key_04_entity", WideColumns{{"", "e1"}}));
|
|
534
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_05_del_target", "del_val"));
|
|
535
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
536
|
+
|
|
537
|
+
// Snapshot pins flush 1 versions.
|
|
538
|
+
const Snapshot* snap = db_->GetSnapshot();
|
|
539
|
+
|
|
540
|
+
// Flush 2: updates each key with a different operation type.
|
|
541
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_01_put", "v2"));
|
|
542
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key_02_merge", "m2"));
|
|
543
|
+
ASSERT_OK(db_->SingleDelete(WriteOptions(), "key_03_sd_target"));
|
|
544
|
+
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
545
|
+
"key_04_entity", WideColumns{{"", "e2"}}));
|
|
546
|
+
ASSERT_OK(db_->Delete(WriteOptions(), "key_05_del_target"));
|
|
547
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
548
|
+
|
|
549
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
550
|
+
|
|
551
|
+
// Current view via both indexes: key_01=v2, key_02="m1,m2", key_03 SD'd,
|
|
552
|
+
// key_04=e2, key_05 deleted.
|
|
553
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01_put", "v2"));
|
|
554
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02_merge", "m1,m2"));
|
|
555
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_03_sd_target"));
|
|
556
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_04_entity", "e2"));
|
|
557
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_05_del_target"));
|
|
558
|
+
|
|
559
|
+
// Current view scan via both indexes: only key_01, key_02, key_04 visible.
|
|
560
|
+
{
|
|
561
|
+
std::vector<std::string> expected = {"key_01_put", "key_02_merge",
|
|
562
|
+
"key_04_entity"};
|
|
563
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
// Snapshot view via both indexes: all original flush 1 values visible.
|
|
567
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_01_put", "v1"));
|
|
568
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_02_merge", "m1"));
|
|
569
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
570
|
+
VerifyGetBothIndexes(snap, "key_03_sd_target", "sd_val"));
|
|
571
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_04_entity", "e1"));
|
|
572
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
573
|
+
VerifyGetBothIndexes(snap, "key_05_del_target", "del_val"));
|
|
574
|
+
|
|
575
|
+
{
|
|
576
|
+
std::vector<std::pair<std::string, std::string>> expected = {
|
|
577
|
+
{"key_01_put", "v1"},
|
|
578
|
+
{"key_02_merge", "m1"},
|
|
579
|
+
{"key_03_sd_target", "sd_val"},
|
|
580
|
+
{"key_04_entity", "e1"},
|
|
581
|
+
{"key_05_del_target", "del_val"}};
|
|
582
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(snap, expected));
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
db_->ReleaseSnapshot(snap);
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
TEST_F(TrieIndexDBTest, TimedPutCompaction) {
|
|
589
|
+
// Verifies that kTypeValuePreferredSeqno entries survive compaction and the
|
|
590
|
+
// UDI builder correctly strips the packed seqno during compaction output.
|
|
591
|
+
// Verified through both indexes.
|
|
592
|
+
options_.compaction_style = kCompactionStyleUniversal;
|
|
593
|
+
options_.preclude_last_level_data_seconds = 10000;
|
|
594
|
+
options_.disable_auto_compactions = true;
|
|
595
|
+
ASSERT_OK(OpenDB());
|
|
596
|
+
|
|
597
|
+
// Flush 1: TimedPut + regular Put.
|
|
598
|
+
{
|
|
599
|
+
WriteBatch wb;
|
|
600
|
+
ASSERT_OK(wb.TimedPut(db_->DefaultColumnFamily(), "key_01_timed",
|
|
601
|
+
"timed_v1", /*write_unix_time=*/0));
|
|
602
|
+
ASSERT_OK(db_->Write(WriteOptions(), &wb));
|
|
603
|
+
}
|
|
604
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_02_put", "put_v1"));
|
|
605
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
606
|
+
|
|
607
|
+
// Snapshot pins flush 1 versions.
|
|
608
|
+
const Snapshot* snap = db_->GetSnapshot();
|
|
609
|
+
|
|
610
|
+
// Flush 2: overwrite both keys with regular Puts.
|
|
611
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_01_timed", "put_v2"));
|
|
612
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_02_put", "put_v2"));
|
|
613
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
614
|
+
|
|
615
|
+
// Compact: the snapshot forces both versions of key_01_timed to be kept.
|
|
616
|
+
// The older version is kTypeValuePreferredSeqno with packed seqno.
|
|
617
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
618
|
+
|
|
619
|
+
// Current view via both indexes: both keys have the newer value.
|
|
620
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01_timed", "put_v2"));
|
|
621
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02_put", "put_v2"));
|
|
622
|
+
{
|
|
623
|
+
std::vector<std::pair<std::string, std::string>> expected = {
|
|
624
|
+
{"key_01_timed", "put_v2"}, {"key_02_put", "put_v2"}};
|
|
625
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
// Snapshot view via both indexes: key_01 has the original TimedPut value
|
|
629
|
+
// (packed seqno must be transparent), key_02 has its original value.
|
|
630
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
631
|
+
VerifyGetBothIndexes(snap, "key_01_timed", "timed_v1"));
|
|
632
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_02_put", "put_v1"));
|
|
633
|
+
{
|
|
634
|
+
std::vector<std::pair<std::string, std::string>> expected = {
|
|
635
|
+
{"key_01_timed", "timed_v1"}, {"key_02_put", "put_v1"}};
|
|
636
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(snap, expected));
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
db_->ReleaseSnapshot(snap);
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
TEST_F(TrieIndexDBTest, CrossFlushSingleDelete) {
|
|
643
|
+
// Verifies that a SingleDelete in a later SST correctly cancels a Put from
|
|
644
|
+
// an earlier SST after compaction with the trie UDI active. Verified through
|
|
645
|
+
// both indexes.
|
|
646
|
+
options_.disable_auto_compactions = true;
|
|
647
|
+
ASSERT_OK(OpenDB());
|
|
648
|
+
|
|
649
|
+
// Flush 1: Puts.
|
|
650
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_aa", "val_aa"));
|
|
651
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_bb", "val_bb"));
|
|
652
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_cc", "val_cc"));
|
|
653
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
654
|
+
|
|
655
|
+
// Flush 2: SingleDelete key_bb (targets the Put from flush 1).
|
|
656
|
+
ASSERT_OK(db_->SingleDelete(WriteOptions(), "key_bb"));
|
|
657
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
658
|
+
|
|
659
|
+
// Before compaction: key_bb is already hidden by the merging iterator.
|
|
660
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_bb"));
|
|
661
|
+
|
|
662
|
+
// After compaction: SingleDelete + Put fully cancel out, key_bb is gone.
|
|
663
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
664
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_bb"));
|
|
665
|
+
|
|
666
|
+
// Remaining keys unaffected via both indexes.
|
|
667
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_aa", "val_aa"));
|
|
668
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_cc", "val_cc"));
|
|
669
|
+
|
|
670
|
+
{
|
|
671
|
+
std::vector<std::string> expected = {"key_aa", "key_cc"};
|
|
672
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// ============================================================================
|
|
677
|
+
// Iteration tests
|
|
678
|
+
// ============================================================================
|
|
679
|
+
|
|
680
|
+
TEST_F(TrieIndexDBTest, ReverseIteration) {
|
|
681
|
+
// Verifies that reverse iteration (SeekToLast, Prev, SeekForPrev) works
|
|
682
|
+
// correctly with mixed operation types. Forward scan and point lookups are
|
|
683
|
+
// verified through both indexes. Reverse operations use the standard index
|
|
684
|
+
// (the trie UDI iterator does not yet support SeekToLast/Prev/SeekForPrev).
|
|
685
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
686
|
+
options_.disable_auto_compactions = true;
|
|
687
|
+
ASSERT_OK(OpenDB());
|
|
688
|
+
|
|
689
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_01", "v1"));
|
|
690
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key_02", "m1"));
|
|
691
|
+
ASSERT_OK(db_->Delete(WriteOptions(), "key_03"));
|
|
692
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_04", "v4"));
|
|
693
|
+
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), "key_05",
|
|
694
|
+
WideColumns{{"", "e5"}}));
|
|
695
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_06", "v6"));
|
|
696
|
+
|
|
697
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
698
|
+
|
|
699
|
+
// Visible keys: key_01, key_02, key_04, key_05, key_06 (key_03 deleted).
|
|
700
|
+
|
|
701
|
+
// Forward scan via both indexes.
|
|
702
|
+
{
|
|
703
|
+
std::vector<std::pair<std::string, std::string>> expected = {
|
|
704
|
+
{"key_01", "v1"},
|
|
705
|
+
{"key_02", "m1"},
|
|
706
|
+
{"key_04", "v4"},
|
|
707
|
+
{"key_05", "e5"},
|
|
708
|
+
{"key_06", "v6"}};
|
|
709
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
// Point lookups via both indexes.
|
|
713
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01", "v1"));
|
|
714
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02", "m1"));
|
|
715
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_03"));
|
|
716
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_04", "v4"));
|
|
717
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_05", "e5"));
|
|
718
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_06", "v6"));
|
|
719
|
+
|
|
720
|
+
// Seek via both indexes.
|
|
721
|
+
ASSERT_NO_FATAL_FAILURE(VerifySeekBothIndexes("key_04", "key_04", "v4"));
|
|
722
|
+
ASSERT_NO_FATAL_FAILURE(VerifySeekBothIndexes("key_05", "key_05", "e5"));
|
|
723
|
+
|
|
724
|
+
// Reverse operations below use the standard index only.
|
|
725
|
+
|
|
726
|
+
// SeekToLast + full reverse scan.
|
|
727
|
+
{
|
|
728
|
+
ReadOptions ro;
|
|
729
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
730
|
+
iter->SeekToLast();
|
|
731
|
+
std::vector<std::string> reverse_keys;
|
|
732
|
+
for (; iter->Valid(); iter->Prev()) {
|
|
733
|
+
reverse_keys.push_back(iter->key().ToString());
|
|
734
|
+
}
|
|
735
|
+
ASSERT_OK(iter->status());
|
|
736
|
+
std::vector<std::string> expected = {"key_06", "key_05", "key_04", "key_02",
|
|
737
|
+
"key_01"};
|
|
738
|
+
ASSERT_EQ(reverse_keys, expected);
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
// SeekForPrev to an exact visible key.
|
|
742
|
+
{
|
|
743
|
+
ReadOptions ro;
|
|
744
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
745
|
+
iter->SeekForPrev("key_04");
|
|
746
|
+
ASSERT_TRUE(iter->Valid());
|
|
747
|
+
ASSERT_EQ(iter->key().ToString(), "key_04");
|
|
748
|
+
ASSERT_EQ(iter->value().ToString(), "v4");
|
|
749
|
+
ASSERT_OK(iter->status());
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
// SeekForPrev to a deleted key — should land on the largest visible key
|
|
753
|
+
// that is <= "key_03", which is key_02.
|
|
754
|
+
{
|
|
755
|
+
ReadOptions ro;
|
|
756
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
757
|
+
iter->SeekForPrev("key_03");
|
|
758
|
+
ASSERT_TRUE(iter->Valid());
|
|
759
|
+
ASSERT_EQ(iter->key().ToString(), "key_02");
|
|
760
|
+
ASSERT_OK(iter->status());
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
// SeekForPrev to a key between existing keys.
|
|
764
|
+
{
|
|
765
|
+
ReadOptions ro;
|
|
766
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
767
|
+
iter->SeekForPrev("key_04_5");
|
|
768
|
+
ASSERT_TRUE(iter->Valid());
|
|
769
|
+
ASSERT_EQ(iter->key().ToString(), "key_04");
|
|
770
|
+
ASSERT_OK(iter->status());
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
// SeekForPrev before all keys — should be invalid.
|
|
774
|
+
{
|
|
775
|
+
ReadOptions ro;
|
|
776
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
777
|
+
iter->SeekForPrev("key_00");
|
|
778
|
+
ASSERT_FALSE(iter->Valid());
|
|
779
|
+
ASSERT_OK(iter->status());
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
// Prev from a Seek position in the middle of the range.
|
|
783
|
+
{
|
|
784
|
+
ReadOptions ro;
|
|
785
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
786
|
+
iter->Seek("key_05");
|
|
787
|
+
ASSERT_TRUE(iter->Valid());
|
|
788
|
+
ASSERT_EQ(iter->key().ToString(), "key_05");
|
|
789
|
+
|
|
790
|
+
iter->Prev();
|
|
791
|
+
ASSERT_TRUE(iter->Valid());
|
|
792
|
+
ASSERT_EQ(iter->key().ToString(), "key_04");
|
|
793
|
+
|
|
794
|
+
iter->Prev();
|
|
795
|
+
ASSERT_TRUE(iter->Valid());
|
|
796
|
+
ASSERT_EQ(iter->key().ToString(), "key_02");
|
|
797
|
+
|
|
798
|
+
iter->Prev();
|
|
799
|
+
ASSERT_TRUE(iter->Valid());
|
|
800
|
+
ASSERT_EQ(iter->key().ToString(), "key_01");
|
|
801
|
+
|
|
802
|
+
iter->Prev();
|
|
803
|
+
ASSERT_FALSE(iter->Valid());
|
|
804
|
+
ASSERT_OK(iter->status());
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
// ============================================================================
|
|
809
|
+
// DeleteRange test
|
|
810
|
+
// ============================================================================
|
|
811
|
+
|
|
812
|
+
TEST_F(TrieIndexDBTest, DeleteRangeWithTrieUDI) {
|
|
813
|
+
// Verifies that DeleteRange (kTypeRangeDeletion) works correctly alongside
|
|
814
|
+
// the trie UDI. Range deletions go to a separate range_del_block (not
|
|
815
|
+
// through OnKeyAdded), but we verify that reads correctly filter out
|
|
816
|
+
// range-deleted keys when the trie UDI is active. Forward scan and point
|
|
817
|
+
// lookups verified through both indexes; reverse scan uses standard index.
|
|
818
|
+
options_.disable_auto_compactions = true;
|
|
819
|
+
ASSERT_OK(OpenDB());
|
|
820
|
+
|
|
821
|
+
for (int i = 1; i <= 10; i++) {
|
|
822
|
+
char key_buf[16];
|
|
823
|
+
char val_buf[16];
|
|
824
|
+
snprintf(key_buf, sizeof(key_buf), "key_%02d", i);
|
|
825
|
+
snprintf(val_buf, sizeof(val_buf), "val_%02d", i);
|
|
826
|
+
ASSERT_OK(db_->Put(WriteOptions(), key_buf, val_buf));
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
// DeleteRange [key_04, key_08) — deletes key_04 through key_07.
|
|
830
|
+
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
|
|
831
|
+
"key_04", "key_08"));
|
|
832
|
+
|
|
833
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
834
|
+
|
|
835
|
+
// Forward scan via both indexes: key_01..key_03 and key_08..key_10 visible.
|
|
836
|
+
{
|
|
837
|
+
std::vector<std::string> expected = {"key_01", "key_02", "key_03",
|
|
838
|
+
"key_08", "key_09", "key_10"};
|
|
839
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
// Point lookups via both indexes for deleted keys.
|
|
843
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_04"));
|
|
844
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_07"));
|
|
845
|
+
|
|
846
|
+
// Point lookups via both indexes for surviving keys at boundaries.
|
|
847
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_03", "val_03"));
|
|
848
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_08", "val_08"));
|
|
849
|
+
|
|
850
|
+
// Reverse scan (standard index only) should also respect the range deletion.
|
|
851
|
+
{
|
|
852
|
+
ReadOptions ro;
|
|
853
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
854
|
+
iter->SeekToLast();
|
|
855
|
+
std::vector<std::string> reverse_keys;
|
|
856
|
+
for (; iter->Valid(); iter->Prev()) {
|
|
857
|
+
reverse_keys.push_back(iter->key().ToString());
|
|
858
|
+
}
|
|
859
|
+
ASSERT_OK(iter->status());
|
|
860
|
+
std::vector<std::string> expected = {"key_10", "key_09", "key_08",
|
|
861
|
+
"key_03", "key_02", "key_01"};
|
|
862
|
+
ASSERT_EQ(reverse_keys, expected);
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
// ============================================================================
|
|
867
|
+
// DB reopen test
|
|
868
|
+
// ============================================================================
|
|
869
|
+
|
|
870
|
+
TEST_F(TrieIndexDBTest, ReopenWithMixedOperationTypes) {
|
|
871
|
+
// Writes all operation types, flushes, closes the DB, reopens, and verifies
|
|
872
|
+
// all data reads correctly from cold SST files through both indexes. This
|
|
873
|
+
// exercises the read path on a freshly opened DB where no memtable data
|
|
874
|
+
// exists.
|
|
875
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
876
|
+
options_.disable_auto_compactions = true;
|
|
877
|
+
ASSERT_OK(OpenDB());
|
|
878
|
+
|
|
879
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_01", "val_put"));
|
|
880
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key_02", "val_merge"));
|
|
881
|
+
ASSERT_OK(db_->Delete(WriteOptions(), "key_03"));
|
|
882
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_04", "sd_target"));
|
|
883
|
+
ASSERT_OK(db_->SingleDelete(WriteOptions(), "key_04"));
|
|
884
|
+
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), "key_05",
|
|
885
|
+
WideColumns{{"", "entity_val"}}));
|
|
886
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_06", "val_put2"));
|
|
887
|
+
|
|
888
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
889
|
+
|
|
890
|
+
// Close the DB. All data is now only in SST files.
|
|
891
|
+
ASSERT_OK(db_->Close());
|
|
892
|
+
db_.reset();
|
|
893
|
+
|
|
894
|
+
// Reopen with the same trie UDI configuration.
|
|
895
|
+
ASSERT_OK(OpenDB());
|
|
896
|
+
|
|
897
|
+
// Point lookups on cold data via both indexes.
|
|
898
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01", "val_put"));
|
|
899
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02", "val_merge"));
|
|
900
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_03"));
|
|
901
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_04"));
|
|
902
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_05", "entity_val"));
|
|
903
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_06", "val_put2"));
|
|
904
|
+
|
|
905
|
+
// Forward scan via both indexes.
|
|
906
|
+
{
|
|
907
|
+
std::vector<std::string> expected = {"key_01", "key_02", "key_05",
|
|
908
|
+
"key_06"};
|
|
909
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
// Reverse scan on cold data (standard index only).
|
|
913
|
+
{
|
|
914
|
+
ReadOptions ro;
|
|
915
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
916
|
+
iter->SeekToLast();
|
|
917
|
+
std::vector<std::string> reverse_keys;
|
|
918
|
+
for (; iter->Valid(); iter->Prev()) {
|
|
919
|
+
reverse_keys.push_back(iter->key().ToString());
|
|
920
|
+
}
|
|
921
|
+
ASSERT_OK(iter->status());
|
|
922
|
+
std::vector<std::string> expected = {"key_06", "key_05", "key_02",
|
|
923
|
+
"key_01"};
|
|
924
|
+
ASSERT_EQ(reverse_keys, expected);
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
// ============================================================================
|
|
929
|
+
// Ingest external file test
|
|
930
|
+
// ============================================================================
|
|
931
|
+
|
|
932
|
+
TEST_F(TrieIndexDBTest, IngestExternalFileWithTrieUDI) {
|
|
933
|
+
// Creates an SST with SstFileWriter using the trie UDI, then ingests it
|
|
934
|
+
// into a live DB that also has trie UDI configured. Verifies that both the
|
|
935
|
+
// existing DB data and the ingested data are correctly readable through both
|
|
936
|
+
// indexes.
|
|
937
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
938
|
+
options_.disable_auto_compactions = true;
|
|
939
|
+
ASSERT_OK(OpenDB());
|
|
940
|
+
|
|
941
|
+
// Write some data directly to the DB and flush.
|
|
942
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_01", "db_val1"));
|
|
943
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_05", "db_val5"));
|
|
944
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
945
|
+
|
|
946
|
+
// Create an SST with SstFileWriter using trie UDI, containing mixed ops.
|
|
947
|
+
std::string sst_path = dbname_ + "/ingest.sst";
|
|
948
|
+
{
|
|
949
|
+
Options sst_options;
|
|
950
|
+
sst_options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
951
|
+
BlockBasedTableOptions table_options;
|
|
952
|
+
table_options.user_defined_index_factory = trie_factory_;
|
|
953
|
+
sst_options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
954
|
+
|
|
955
|
+
SstFileWriter writer(EnvOptions(), sst_options);
|
|
956
|
+
ASSERT_OK(writer.Open(sst_path));
|
|
957
|
+
ASSERT_OK(writer.Put("key_02", "ingest_val2"));
|
|
958
|
+
ASSERT_OK(writer.Merge("key_03", "ingest_merge3"));
|
|
959
|
+
ASSERT_OK(writer.Delete("key_04"));
|
|
960
|
+
ASSERT_OK(writer.Put("key_06", "ingest_val6"));
|
|
961
|
+
ASSERT_OK(writer.Finish());
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
// Ingest into the live DB.
|
|
965
|
+
IngestExternalFileOptions ingest_opts;
|
|
966
|
+
ASSERT_OK(db_->IngestExternalFile({sst_path}, ingest_opts));
|
|
967
|
+
|
|
968
|
+
// Point lookups via both indexes — combined DB + ingested data.
|
|
969
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01", "db_val1"));
|
|
970
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02", "ingest_val2"));
|
|
971
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_03", "ingest_merge3"));
|
|
972
|
+
// key_04: ingested Delete tombstone, no prior value — NotFound.
|
|
973
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_04"));
|
|
974
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_05", "db_val5"));
|
|
975
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_06", "ingest_val6"));
|
|
976
|
+
|
|
977
|
+
// Forward scan via both indexes.
|
|
978
|
+
{
|
|
979
|
+
std::vector<std::string> expected = {"key_01", "key_02", "key_03", "key_05",
|
|
980
|
+
"key_06"};
|
|
981
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
// ============================================================================
|
|
986
|
+
// WriteBatch test
|
|
987
|
+
// ============================================================================
|
|
988
|
+
|
|
989
|
+
TEST_F(TrieIndexDBTest, WriteBatchWithMixedOperations) {
|
|
990
|
+
// Verifies that a single WriteBatch containing multiple operation types
|
|
991
|
+
// (Put, Delete, Merge, SingleDelete, PutEntity) works correctly with the
|
|
992
|
+
// trie UDI. Verified through both indexes. Real-world workloads typically
|
|
993
|
+
// batch multiple operations.
|
|
994
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
995
|
+
options_.disable_auto_compactions = true;
|
|
996
|
+
ASSERT_OK(OpenDB());
|
|
997
|
+
|
|
998
|
+
// Pre-populate a key that the batch's Delete will target.
|
|
999
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_02_del", "pre_val"));
|
|
1000
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1001
|
+
|
|
1002
|
+
// Build a WriteBatch with all operation types.
|
|
1003
|
+
WriteBatch wb;
|
|
1004
|
+
ASSERT_OK(wb.Put(db_->DefaultColumnFamily(), "key_01_put", "batch_put"));
|
|
1005
|
+
ASSERT_OK(wb.Delete(db_->DefaultColumnFamily(), "key_02_del"));
|
|
1006
|
+
ASSERT_OK(wb.Merge(db_->DefaultColumnFamily(), "key_03_merge", "batch_m"));
|
|
1007
|
+
// Put + SingleDelete within the same batch — they cancel out.
|
|
1008
|
+
ASSERT_OK(wb.Put(db_->DefaultColumnFamily(), "key_04_sd", "sd_target"));
|
|
1009
|
+
ASSERT_OK(wb.SingleDelete(db_->DefaultColumnFamily(), "key_04_sd"));
|
|
1010
|
+
ASSERT_OK(wb.PutEntity(db_->DefaultColumnFamily(), "key_05_entity",
|
|
1011
|
+
WideColumns{{"", "batch_entity"}}));
|
|
1012
|
+
|
|
1013
|
+
ASSERT_OK(db_->Write(WriteOptions(), &wb));
|
|
1014
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1015
|
+
|
|
1016
|
+
// Point lookups via both indexes. Expected visible keys: key_01 (Put),
|
|
1017
|
+
// key_03 (Merge), key_05 (PutEntity). key_02 deleted, key_04
|
|
1018
|
+
// Put+SingleDelete cancel.
|
|
1019
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01_put", "batch_put"));
|
|
1020
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_02_del"));
|
|
1021
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_03_merge", "batch_m"));
|
|
1022
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_04_sd"));
|
|
1023
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1024
|
+
VerifyGetBothIndexes("key_05_entity", "batch_entity"));
|
|
1025
|
+
|
|
1026
|
+
// Forward scan via both indexes.
|
|
1027
|
+
{
|
|
1028
|
+
std::vector<std::string> expected = {"key_01_put", "key_03_merge",
|
|
1029
|
+
"key_05_entity"};
|
|
1030
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
// ============================================================================
|
|
1035
|
+
// Large-scale test
|
|
1036
|
+
// ============================================================================
|
|
1037
|
+
|
|
1038
|
+
TEST_F(TrieIndexDBTest, LargeMixedOperationsAcrossBlocks) {
|
|
1039
|
+
// Large-scale test with many keys of different operation types and a small
|
|
1040
|
+
// block size. This stresses block boundary handling in the trie UDI across
|
|
1041
|
+
// Put, Delete, Merge, SingleDelete, and PutEntity entries. Verified through
|
|
1042
|
+
// both indexes.
|
|
1043
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
1044
|
+
options_.disable_auto_compactions = true;
|
|
1045
|
+
// Small block size forces many data blocks, exercising the trie index's
|
|
1046
|
+
// AddIndexEntry at frequent block boundaries.
|
|
1047
|
+
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
1048
|
+
|
|
1049
|
+
const int kNumKeys = 500;
|
|
1050
|
+
// Track keys expected to be visible after flush (non-deleted, non-SD'd).
|
|
1051
|
+
std::vector<std::string> expected_visible;
|
|
1052
|
+
|
|
1053
|
+
for (int i = 0; i < kNumKeys; i++) {
|
|
1054
|
+
char key_buf[32];
|
|
1055
|
+
snprintf(key_buf, sizeof(key_buf), "key_%06d", i);
|
|
1056
|
+
std::string key(key_buf);
|
|
1057
|
+
|
|
1058
|
+
// Distribute operation types:
|
|
1059
|
+
// i%10 in [0,3] -> Put (40%)
|
|
1060
|
+
// i%10 in [4,5] -> Delete (20%)
|
|
1061
|
+
// i%10 in [6,7] -> Merge (20%)
|
|
1062
|
+
// i%10 == 8 -> SingleDelete (10%, preceded by Put -- both cancel)
|
|
1063
|
+
// i%10 == 9 -> PutEntity (10%)
|
|
1064
|
+
int type = i % 10;
|
|
1065
|
+
if (type <= 3) {
|
|
1066
|
+
char val_buf[32];
|
|
1067
|
+
snprintf(val_buf, sizeof(val_buf), "val_%06d", i);
|
|
1068
|
+
ASSERT_OK(db_->Put(WriteOptions(), key, val_buf));
|
|
1069
|
+
expected_visible.push_back(key);
|
|
1070
|
+
} else if (type <= 5) {
|
|
1071
|
+
ASSERT_OK(db_->Delete(WriteOptions(), key));
|
|
1072
|
+
// Bare tombstone — not visible.
|
|
1073
|
+
} else if (type <= 7) {
|
|
1074
|
+
char val_buf[32];
|
|
1075
|
+
snprintf(val_buf, sizeof(val_buf), "merge_%06d", i);
|
|
1076
|
+
ASSERT_OK(db_->Merge(WriteOptions(), key, val_buf));
|
|
1077
|
+
expected_visible.push_back(key);
|
|
1078
|
+
} else if (type == 8) {
|
|
1079
|
+
ASSERT_OK(db_->Put(WriteOptions(), key, "to_be_deleted"));
|
|
1080
|
+
ASSERT_OK(db_->SingleDelete(WriteOptions(), key));
|
|
1081
|
+
// Put + SingleDelete cancel — not visible.
|
|
1082
|
+
} else {
|
|
1083
|
+
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), key,
|
|
1084
|
+
WideColumns{{"", "entity_val"}}));
|
|
1085
|
+
expected_visible.push_back(key);
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1090
|
+
|
|
1091
|
+
// Forward scan via both indexes — verify exactly the expected visible keys.
|
|
1092
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected_visible));
|
|
1093
|
+
|
|
1094
|
+
// Spot-check: Seek to every 10th visible key via both indexes.
|
|
1095
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1096
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1097
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
1098
|
+
for (size_t i = 0; i < expected_visible.size(); i += 10) {
|
|
1099
|
+
iter->Seek(expected_visible[i]);
|
|
1100
|
+
ASSERT_TRUE(iter->Valid()) << "Seek failed for " << expected_visible[i];
|
|
1101
|
+
ASSERT_EQ(iter->key().ToString(), expected_visible[i]);
|
|
1102
|
+
}
|
|
1103
|
+
ASSERT_OK(iter->status());
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
// ============================================================================
|
|
1108
|
+
// Seqno side-table tests (same user key spanning data block boundaries)
|
|
1109
|
+
// ============================================================================
|
|
1110
|
+
|
|
1111
|
+
TEST_F(TrieIndexDBTest, SameUserKeyAcrossBlockBoundaries) {
|
|
1112
|
+
// Forces the same user key to appear in multiple data blocks by writing many
|
|
1113
|
+
// versions with snapshots held to prevent garbage collection, using a tiny
|
|
1114
|
+
// block_size. This exercises the trie's seqno side-table: the trie stores
|
|
1115
|
+
// only one separator per user key, and the side-table records the seqno +
|
|
1116
|
+
// overflow block count so that Seek() can find the correct data block for
|
|
1117
|
+
// each version.
|
|
1118
|
+
//
|
|
1119
|
+
// Without the seqno side-table fix (PR #14412), reads through the trie index
|
|
1120
|
+
// would return incorrect data when multiple versions of the same key span
|
|
1121
|
+
// different data blocks.
|
|
1122
|
+
options_.disable_auto_compactions = true;
|
|
1123
|
+
// Tiny block_size (64 bytes) forces each version of the key into its own
|
|
1124
|
+
// data block, creating same-user-key block boundaries that the trie must
|
|
1125
|
+
// handle via the seqno side-table.
|
|
1126
|
+
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
1127
|
+
|
|
1128
|
+
// Write multiple versions of the same key, holding snapshots so all versions
|
|
1129
|
+
// survive the flush to a single SST file.
|
|
1130
|
+
const std::string key = "same_key";
|
|
1131
|
+
constexpr int kNumVersions = 10;
|
|
1132
|
+
std::vector<const Snapshot*> snaps;
|
|
1133
|
+
for (int i = 0; i < kNumVersions; i++) {
|
|
1134
|
+
std::string val = "ver_" + std::to_string(i);
|
|
1135
|
+
ASSERT_OK(db_->Put(WriteOptions(), key, val));
|
|
1136
|
+
snaps.push_back(db_->GetSnapshot());
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1140
|
+
|
|
1141
|
+
// Current view: latest version visible.
|
|
1142
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1143
|
+
VerifyGetBothIndexes(key, "ver_" + std::to_string(kNumVersions - 1)));
|
|
1144
|
+
|
|
1145
|
+
// Each snapshot should see the version written at or before its creation.
|
|
1146
|
+
for (int i = 0; i < kNumVersions; i++) {
|
|
1147
|
+
std::string expected_val = "ver_" + std::to_string(i);
|
|
1148
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snaps[i], key, expected_val));
|
|
1149
|
+
}
|
|
1150
|
+
|
|
1151
|
+
// Forward scan with each snapshot should return exactly one key with the
|
|
1152
|
+
// correct version.
|
|
1153
|
+
for (int i = 0; i < kNumVersions; i++) {
|
|
1154
|
+
std::string expected_val = "ver_" + std::to_string(i);
|
|
1155
|
+
std::vector<std::pair<std::string, std::string>> expected = {
|
|
1156
|
+
{key, expected_val}};
|
|
1157
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(snaps[i], expected));
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
// Seek to the key through the trie index with each snapshot — the trie's
|
|
1161
|
+
// post-seek correction must advance through overflow blocks to find the
|
|
1162
|
+
// correct version for each seqno.
|
|
1163
|
+
for (int i = 0; i < kNumVersions; i++) {
|
|
1164
|
+
std::string expected_val = "ver_" + std::to_string(i);
|
|
1165
|
+
SCOPED_TRACE("snap=" + std::to_string(i));
|
|
1166
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1167
|
+
VerifySeekBothIndexes(snaps[i], key, key, expected_val));
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
for (auto* snap : snaps) {
|
|
1171
|
+
db_->ReleaseSnapshot(snap);
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
TEST_F(TrieIndexDBTest, SameUserKeyPutThenDeleteAcrossBlocks) {
|
|
1176
|
+
// Same user key with a Put followed by a Delete, where both entries land in
|
|
1177
|
+
// different data blocks. A snapshot pins the Put version. After compaction,
|
|
1178
|
+
// the current view shows NotFound while the snapshot view shows the Put.
|
|
1179
|
+
// This tests the seqno side-table with mixed value types for the same key.
|
|
1180
|
+
options_.disable_auto_compactions = true;
|
|
1181
|
+
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
1182
|
+
|
|
1183
|
+
// Write a Put, take snapshot, then Delete.
|
|
1184
|
+
ASSERT_OK(db_->Put(WriteOptions(), "del_key", "put_value"));
|
|
1185
|
+
const Snapshot* snap = db_->GetSnapshot();
|
|
1186
|
+
ASSERT_OK(db_->Delete(WriteOptions(), "del_key"));
|
|
1187
|
+
|
|
1188
|
+
// Add surrounding keys to create more data blocks and exercise trie
|
|
1189
|
+
// separators around the duplicated key.
|
|
1190
|
+
ASSERT_OK(db_->Put(WriteOptions(), "aaa_before", "before_val"));
|
|
1191
|
+
ASSERT_OK(db_->Put(WriteOptions(), "zzz_after", "after_val"));
|
|
1192
|
+
|
|
1193
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1194
|
+
|
|
1195
|
+
// Current view: del_key is deleted.
|
|
1196
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("del_key"));
|
|
1197
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("aaa_before", "before_val"));
|
|
1198
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("zzz_after", "after_val"));
|
|
1199
|
+
|
|
1200
|
+
// Snapshot view: del_key is visible with the Put value.
|
|
1201
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "del_key", "put_value"));
|
|
1202
|
+
|
|
1203
|
+
// Seek to del_key with snapshot through both indexes.
|
|
1204
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1205
|
+
VerifySeekBothIndexes(snap, "del_key", "del_key", "put_value"));
|
|
1206
|
+
|
|
1207
|
+
// Compact to merge the Put + Delete. Snapshot prevents GC of the Put.
|
|
1208
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
1209
|
+
|
|
1210
|
+
// After compaction, same behavior.
|
|
1211
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("del_key"));
|
|
1212
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "del_key", "put_value"));
|
|
1213
|
+
|
|
1214
|
+
db_->ReleaseSnapshot(snap);
|
|
1215
|
+
}
|
|
1216
|
+
|
|
1217
|
+
TEST_F(TrieIndexDBTest, SameUserKeyManyVersionsSeekCorrectness) {
|
|
1218
|
+
// Writes many versions of three different keys (with snapshots), using a
|
|
1219
|
+
// tiny block_size to force same-user-key block boundaries. Verifies that
|
|
1220
|
+
// Seek + Get through the trie index returns the correct version for each
|
|
1221
|
+
// snapshot, testing the seqno side-table's overflow handling with multiple
|
|
1222
|
+
// keys interleaved in the SST.
|
|
1223
|
+
options_.disable_auto_compactions = true;
|
|
1224
|
+
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
1225
|
+
|
|
1226
|
+
const std::vector<std::string> keys = {"key_aaa", "key_mmm", "key_zzz"};
|
|
1227
|
+
constexpr int kVersionsPerKey = 8;
|
|
1228
|
+
// snaps[v] is taken after writing version v of all keys.
|
|
1229
|
+
std::vector<const Snapshot*> snaps;
|
|
1230
|
+
|
|
1231
|
+
for (int v = 0; v < kVersionsPerKey; v++) {
|
|
1232
|
+
for (const auto& k : keys) {
|
|
1233
|
+
std::string val = k + "_v" + std::to_string(v);
|
|
1234
|
+
ASSERT_OK(db_->Put(WriteOptions(), k, val));
|
|
1235
|
+
}
|
|
1236
|
+
snaps.push_back(db_->GetSnapshot());
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1240
|
+
|
|
1241
|
+
// Verify each snapshot sees the correct version of each key via Get and Seek.
|
|
1242
|
+
for (int v = 0; v < kVersionsPerKey; v++) {
|
|
1243
|
+
for (const auto& k : keys) {
|
|
1244
|
+
std::string expected_val = k + "_v" + std::to_string(v);
|
|
1245
|
+
SCOPED_TRACE("key=" + k + " v=" + std::to_string(v));
|
|
1246
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snaps[v], k, expected_val));
|
|
1247
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1248
|
+
VerifySeekBothIndexes(snaps[v], k, k, expected_val));
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
// Compact and re-verify. Compaction must preserve all versions because
|
|
1253
|
+
// snapshots are held.
|
|
1254
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
1255
|
+
|
|
1256
|
+
for (int v = 0; v < kVersionsPerKey; v++) {
|
|
1257
|
+
for (const auto& k : keys) {
|
|
1258
|
+
std::string expected_val = k + "_v" + std::to_string(v);
|
|
1259
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snaps[v], k, expected_val));
|
|
1260
|
+
}
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
for (auto* snap : snaps) {
|
|
1264
|
+
db_->ReleaseSnapshot(snap);
|
|
1265
|
+
}
|
|
1266
|
+
}
|
|
1267
|
+
|
|
1268
|
+
// ============================================================================
|
|
1269
|
+
// MultiGet test
|
|
1270
|
+
// ============================================================================
|
|
1271
|
+
|
|
1272
|
+
TEST_F(TrieIndexDBTest, MultiGetWithTrieUDI) {
|
|
1273
|
+
// Verifies that the batched MultiGet API works correctly with the trie UDI.
|
|
1274
|
+
// MultiGet is a separate code path from single Get and uses batched block
|
|
1275
|
+
// lookups, so it needs dedicated testing.
|
|
1276
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
1277
|
+
options_.disable_auto_compactions = true;
|
|
1278
|
+
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
1279
|
+
|
|
1280
|
+
// Write a mix of operation types.
|
|
1281
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_01", "val_01"));
|
|
1282
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key_02", "merge_02"));
|
|
1283
|
+
ASSERT_OK(db_->Delete(WriteOptions(), "key_03"));
|
|
1284
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_04", "val_04"));
|
|
1285
|
+
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), "key_05",
|
|
1286
|
+
WideColumns{{"", "entity_05"}}));
|
|
1287
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_06", "val_06"));
|
|
1288
|
+
|
|
1289
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1290
|
+
|
|
1291
|
+
// MultiGet through both indexes.
|
|
1292
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1293
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1294
|
+
|
|
1295
|
+
std::vector<Slice> mg_keys = {"key_01", "key_02", "key_03",
|
|
1296
|
+
"key_04", "key_05", "key_06",
|
|
1297
|
+
"key_nonexistent"};
|
|
1298
|
+
std::vector<std::string> mg_values(mg_keys.size());
|
|
1299
|
+
std::vector<Status> mg_statuses = db_->MultiGet(ro, mg_keys, &mg_values);
|
|
1300
|
+
|
|
1301
|
+
ASSERT_EQ(mg_statuses.size(), mg_keys.size());
|
|
1302
|
+
ASSERT_OK(mg_statuses[0]);
|
|
1303
|
+
ASSERT_EQ(mg_values[0], "val_01");
|
|
1304
|
+
ASSERT_OK(mg_statuses[1]);
|
|
1305
|
+
ASSERT_EQ(mg_values[1], "merge_02");
|
|
1306
|
+
ASSERT_TRUE(mg_statuses[2].IsNotFound());
|
|
1307
|
+
ASSERT_OK(mg_statuses[3]);
|
|
1308
|
+
ASSERT_EQ(mg_values[3], "val_04");
|
|
1309
|
+
ASSERT_OK(mg_statuses[4]);
|
|
1310
|
+
ASSERT_EQ(mg_values[4], "entity_05");
|
|
1311
|
+
ASSERT_OK(mg_statuses[5]);
|
|
1312
|
+
ASSERT_EQ(mg_values[5], "val_06");
|
|
1313
|
+
ASSERT_TRUE(mg_statuses[6].IsNotFound());
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
|
|
1317
|
+
// ============================================================================
|
|
1318
|
+
// WAL replay / crash recovery test
|
|
1319
|
+
// ============================================================================
|
|
1320
|
+
|
|
1321
|
+
TEST_F(TrieIndexDBTest, WALReplayRecovery) {
|
|
1322
|
+
// Writes data without flushing, then closes and reopens the DB. The data
|
|
1323
|
+
// must be recovered from the WAL and then flushed. This tests that the trie
|
|
1324
|
+
// UDI builder handles entries replayed from the WAL correctly.
|
|
1325
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
1326
|
+
options_.disable_auto_compactions = true;
|
|
1327
|
+
// WAL is enabled by default (WriteOptions::disableWAL = false).
|
|
1328
|
+
ASSERT_OK(OpenDB());
|
|
1329
|
+
|
|
1330
|
+
// Write data — do NOT flush. Data lives only in the WAL + memtable.
|
|
1331
|
+
ASSERT_OK(db_->Put(WriteOptions(), "wal_key_01", "wal_val_01"));
|
|
1332
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "wal_key_02", "wal_merge"));
|
|
1333
|
+
ASSERT_OK(db_->Put(WriteOptions(), "wal_key_03", "wal_val_03"));
|
|
1334
|
+
ASSERT_OK(db_->Delete(WriteOptions(), "wal_key_03"));
|
|
1335
|
+
ASSERT_OK(db_->Put(WriteOptions(), "wal_key_04", "wal_val_04"));
|
|
1336
|
+
|
|
1337
|
+
// Close and reopen — triggers WAL replay.
|
|
1338
|
+
ASSERT_OK(db_->Close());
|
|
1339
|
+
db_.reset();
|
|
1340
|
+
ASSERT_OK(OpenDB());
|
|
1341
|
+
|
|
1342
|
+
// After WAL replay, data should be in a memtable. Flush to create SST with
|
|
1343
|
+
// the trie UDI.
|
|
1344
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1345
|
+
|
|
1346
|
+
// Verify data through both indexes.
|
|
1347
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("wal_key_01", "wal_val_01"));
|
|
1348
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("wal_key_02", "wal_merge"));
|
|
1349
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("wal_key_03"));
|
|
1350
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("wal_key_04", "wal_val_04"));
|
|
1351
|
+
|
|
1352
|
+
// Forward scan.
|
|
1353
|
+
{
|
|
1354
|
+
std::vector<std::pair<std::string, std::string>> expected = {
|
|
1355
|
+
{"wal_key_01", "wal_val_01"},
|
|
1356
|
+
{"wal_key_02", "wal_merge"},
|
|
1357
|
+
{"wal_key_04", "wal_val_04"}};
|
|
1358
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
// ============================================================================
|
|
1363
|
+
// Multiple column families test
|
|
1364
|
+
// ============================================================================
|
|
1365
|
+
|
|
1366
|
+
TEST_F(TrieIndexDBTest, MultipleColumnFamilies) {
|
|
1367
|
+
// Opens a DB with multiple column families, each using the trie UDI. Writes
|
|
1368
|
+
// different data to each CF, flushes, and verifies reads through both indexes
|
|
1369
|
+
// for each CF. This tests that the UDI builder/reader are correctly isolated
|
|
1370
|
+
// per-CF.
|
|
1371
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
1372
|
+
options_.disable_auto_compactions = true;
|
|
1373
|
+
options_.create_if_missing = true;
|
|
1374
|
+
|
|
1375
|
+
BlockBasedTableOptions table_options;
|
|
1376
|
+
table_options.user_defined_index_factory = trie_factory_;
|
|
1377
|
+
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
1378
|
+
last_options_ = options_;
|
|
1379
|
+
|
|
1380
|
+
// Open with default CF first.
|
|
1381
|
+
ASSERT_OK(DB::Open(options_, dbname_, &db_));
|
|
1382
|
+
|
|
1383
|
+
// Create two additional CFs with the same trie UDI options.
|
|
1384
|
+
ColumnFamilyHandle* cf1 = nullptr;
|
|
1385
|
+
ColumnFamilyHandle* cf2 = nullptr;
|
|
1386
|
+
ASSERT_OK(db_->CreateColumnFamily(options_, "cf_one", &cf1));
|
|
1387
|
+
ASSERT_OK(db_->CreateColumnFamily(options_, "cf_two", &cf2));
|
|
1388
|
+
|
|
1389
|
+
// Write different data to each CF.
|
|
1390
|
+
ASSERT_OK(db_->Put(WriteOptions(), "default_key", "default_val"));
|
|
1391
|
+
ASSERT_OK(db_->Put(WriteOptions(), cf1, "cf1_key_a", "cf1_val_a"));
|
|
1392
|
+
ASSERT_OK(db_->Merge(WriteOptions(), cf1, "cf1_key_b", "cf1_merge"));
|
|
1393
|
+
ASSERT_OK(db_->Put(WriteOptions(), cf2, "cf2_key_x", "cf2_val_x"));
|
|
1394
|
+
ASSERT_OK(db_->Delete(WriteOptions(), cf2, "cf2_key_y"));
|
|
1395
|
+
ASSERT_OK(db_->Put(WriteOptions(), cf2, "cf2_key_z", "cf2_val_z"));
|
|
1396
|
+
|
|
1397
|
+
// Flush all CFs.
|
|
1398
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1399
|
+
ASSERT_OK(db_->Flush(FlushOptions(), cf1));
|
|
1400
|
+
ASSERT_OK(db_->Flush(FlushOptions(), cf2));
|
|
1401
|
+
|
|
1402
|
+
// Verify default CF.
|
|
1403
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1404
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1405
|
+
std::string value;
|
|
1406
|
+
ASSERT_OK(db_->Get(ro, "default_key", &value));
|
|
1407
|
+
ASSERT_EQ(value, "default_val");
|
|
1408
|
+
}
|
|
1409
|
+
|
|
1410
|
+
// Verify cf_one through both indexes.
|
|
1411
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1412
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1413
|
+
std::string value;
|
|
1414
|
+
ASSERT_OK(db_->Get(ro, cf1, "cf1_key_a", &value));
|
|
1415
|
+
ASSERT_EQ(value, "cf1_val_a");
|
|
1416
|
+
ASSERT_OK(db_->Get(ro, cf1, "cf1_key_b", &value));
|
|
1417
|
+
ASSERT_EQ(value, "cf1_merge");
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
// Verify cf_two through both indexes.
|
|
1421
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1422
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1423
|
+
std::string value;
|
|
1424
|
+
ASSERT_OK(db_->Get(ro, cf2, "cf2_key_x", &value));
|
|
1425
|
+
ASSERT_EQ(value, "cf2_val_x");
|
|
1426
|
+
ASSERT_TRUE(db_->Get(ro, cf2, "cf2_key_y", &value).IsNotFound());
|
|
1427
|
+
ASSERT_OK(db_->Get(ro, cf2, "cf2_key_z", &value));
|
|
1428
|
+
ASSERT_EQ(value, "cf2_val_z");
|
|
1429
|
+
}
|
|
1430
|
+
|
|
1431
|
+
// Forward scan on each CF via both indexes.
|
|
1432
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1433
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1434
|
+
|
|
1435
|
+
// cf_one scan.
|
|
1436
|
+
std::unique_ptr<Iterator> it1(db_->NewIterator(ro, cf1));
|
|
1437
|
+
it1->SeekToFirst();
|
|
1438
|
+
ASSERT_TRUE(it1->Valid());
|
|
1439
|
+
ASSERT_EQ(it1->key().ToString(), "cf1_key_a");
|
|
1440
|
+
it1->Next();
|
|
1441
|
+
ASSERT_TRUE(it1->Valid());
|
|
1442
|
+
ASSERT_EQ(it1->key().ToString(), "cf1_key_b");
|
|
1443
|
+
it1->Next();
|
|
1444
|
+
ASSERT_FALSE(it1->Valid());
|
|
1445
|
+
ASSERT_OK(it1->status());
|
|
1446
|
+
|
|
1447
|
+
// cf_two scan.
|
|
1448
|
+
std::unique_ptr<Iterator> it2(db_->NewIterator(ro, cf2));
|
|
1449
|
+
it2->SeekToFirst();
|
|
1450
|
+
ASSERT_TRUE(it2->Valid());
|
|
1451
|
+
ASSERT_EQ(it2->key().ToString(), "cf2_key_x");
|
|
1452
|
+
it2->Next();
|
|
1453
|
+
ASSERT_TRUE(it2->Valid());
|
|
1454
|
+
ASSERT_EQ(it2->key().ToString(), "cf2_key_z");
|
|
1455
|
+
it2->Next();
|
|
1456
|
+
ASSERT_FALSE(it2->Valid());
|
|
1457
|
+
ASSERT_OK(it2->status());
|
|
1458
|
+
}
|
|
1459
|
+
|
|
1460
|
+
// Clean up CF handles before closing.
|
|
1461
|
+
ASSERT_OK(db_->DestroyColumnFamilyHandle(cf1));
|
|
1462
|
+
ASSERT_OK(db_->DestroyColumnFamilyHandle(cf2));
|
|
1463
|
+
|
|
1464
|
+
// Close the DB. Need to clear db_ first since TearDown will also close.
|
|
1465
|
+
ASSERT_OK(db_->Close());
|
|
1466
|
+
db_.reset();
|
|
1467
|
+
|
|
1468
|
+
// Reopen with all CFs to verify persistence.
|
|
1469
|
+
{
|
|
1470
|
+
std::vector<ColumnFamilyDescriptor> cf_descs = {
|
|
1471
|
+
ColumnFamilyDescriptor(kDefaultColumnFamilyName, options_),
|
|
1472
|
+
ColumnFamilyDescriptor("cf_one", options_),
|
|
1473
|
+
ColumnFamilyDescriptor("cf_two", options_)};
|
|
1474
|
+
std::vector<ColumnFamilyHandle*> cf_handles;
|
|
1475
|
+
std::unique_ptr<DB> reopen_db;
|
|
1476
|
+
ASSERT_OK(DB::Open(options_, dbname_, cf_descs, &cf_handles, &reopen_db));
|
|
1477
|
+
db_ = std::move(reopen_db);
|
|
1478
|
+
|
|
1479
|
+
// Verify data survives reopen.
|
|
1480
|
+
auto ro = TrieIndexReadOptions();
|
|
1481
|
+
std::string value;
|
|
1482
|
+
ASSERT_OK(db_->Get(ro, cf_handles[0], "default_key", &value));
|
|
1483
|
+
ASSERT_EQ(value, "default_val");
|
|
1484
|
+
ASSERT_OK(db_->Get(ro, cf_handles[1], "cf1_key_a", &value));
|
|
1485
|
+
ASSERT_EQ(value, "cf1_val_a");
|
|
1486
|
+
ASSERT_OK(db_->Get(ro, cf_handles[2], "cf2_key_z", &value));
|
|
1487
|
+
ASSERT_EQ(value, "cf2_val_z");
|
|
1488
|
+
|
|
1489
|
+
for (auto* h : cf_handles) {
|
|
1490
|
+
ASSERT_OK(db_->DestroyColumnFamilyHandle(h));
|
|
1491
|
+
}
|
|
1492
|
+
}
|
|
1493
|
+
}
|
|
1494
|
+
|
|
1495
|
+
// ---------------------------------------------------------------------------
|
|
1496
|
+
// BatchedPrefixScan: reproduces the test_batches_snapshots pattern.
|
|
1497
|
+
//
|
|
1498
|
+
// Writes batches of 10 keys {digit+key_body : value_body+digit} for digit in
|
|
1499
|
+
// 0..9, exactly as the crash-test stress tool does. Then scans each prefix
|
|
1500
|
+
// concurrently (same snapshot) and checks that:
|
|
1501
|
+
// (a) all 10 iterators yield the same key bodies in lockstep, and
|
|
1502
|
+
// (b) the values stripped of the trailing digit are identical across
|
|
1503
|
+
// prefixes.
|
|
1504
|
+
//
|
|
1505
|
+
// We run with both the standard index and the trie index and compare.
|
|
1506
|
+
// ---------------------------------------------------------------------------
|
|
1507
|
+
TEST_F(TrieIndexDBTest, BatchedPrefixScan) {
|
|
1508
|
+
// Small block size to force many data blocks (and thus many trie entries).
|
|
1509
|
+
ASSERT_OK(OpenDB(/*block_size=*/256));
|
|
1510
|
+
|
|
1511
|
+
const int kNumBatches = 200;
|
|
1512
|
+
const int kNumPrefixes = 10;
|
|
1513
|
+
Random rnd(42);
|
|
1514
|
+
|
|
1515
|
+
// Phase 1: Write batches.
|
|
1516
|
+
for (int b = 0; b < kNumBatches; ++b) {
|
|
1517
|
+
WriteBatch batch;
|
|
1518
|
+
std::string key_body = MakeKeyBody(b);
|
|
1519
|
+
std::string value_body = rnd.RandomString(20);
|
|
1520
|
+
|
|
1521
|
+
for (int d = kNumPrefixes - 1; d >= 0; --d) {
|
|
1522
|
+
std::string k = std::to_string(d) + key_body;
|
|
1523
|
+
std::string v = value_body + std::to_string(d);
|
|
1524
|
+
ASSERT_OK(batch.Put(k, v));
|
|
1525
|
+
}
|
|
1526
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
1527
|
+
}
|
|
1528
|
+
|
|
1529
|
+
// Flush so data is in SSTs with trie index.
|
|
1530
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1531
|
+
|
|
1532
|
+
// Phase 2: Prefix scan with both indexes.
|
|
1533
|
+
for (int idx_type = 0; idx_type < 2; ++idx_type) {
|
|
1534
|
+
ReadOptions base_ro =
|
|
1535
|
+
idx_type == 0 ? StandardIndexReadOptions() : TrieIndexReadOptions();
|
|
1536
|
+
SCOPED_TRACE(idx_type == 0 ? "standard index" : "trie index");
|
|
1537
|
+
|
|
1538
|
+
const Snapshot* snap = db_->GetSnapshot();
|
|
1539
|
+
base_ro.snapshot = snap;
|
|
1540
|
+
|
|
1541
|
+
uint64_t count = VerifyPrefixScanLockstep(base_ro, kNumPrefixes,
|
|
1542
|
+
/*use_upper_bounds=*/true,
|
|
1543
|
+
/*verify_values=*/true);
|
|
1544
|
+
ASSERT_EQ(count, static_cast<uint64_t>(kNumBatches))
|
|
1545
|
+
<< "expected " << kNumBatches << " entries per prefix";
|
|
1546
|
+
|
|
1547
|
+
db_->ReleaseSnapshot(snap);
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
// Same as above but with multiple flushes, compaction, and a DB reopen
|
|
1552
|
+
// in between to simulate the crash-recovery path.
|
|
1553
|
+
TEST_F(TrieIndexDBTest, BatchedPrefixScanAfterReopen) {
|
|
1554
|
+
ASSERT_OK(OpenDB(/*block_size=*/256));
|
|
1555
|
+
|
|
1556
|
+
const int kNumBatches = 100;
|
|
1557
|
+
const int kNumPrefixes = 10;
|
|
1558
|
+
Random rnd(123);
|
|
1559
|
+
|
|
1560
|
+
for (int b = 0; b < kNumBatches; ++b) {
|
|
1561
|
+
WriteBatch batch;
|
|
1562
|
+
std::string key_body = MakeKeyBody(b);
|
|
1563
|
+
std::string value_body = rnd.RandomString(20);
|
|
1564
|
+
|
|
1565
|
+
for (int d = kNumPrefixes - 1; d >= 0; --d) {
|
|
1566
|
+
std::string k = std::to_string(d) + key_body;
|
|
1567
|
+
std::string v = value_body + std::to_string(d);
|
|
1568
|
+
ASSERT_OK(batch.Put(k, v));
|
|
1569
|
+
}
|
|
1570
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
1571
|
+
|
|
1572
|
+
// Flush every 20 batches to create multiple SSTs.
|
|
1573
|
+
if ((b + 1) % 20 == 0) {
|
|
1574
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1578
|
+
|
|
1579
|
+
// Compact to merge SSTs.
|
|
1580
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
1581
|
+
|
|
1582
|
+
// Close and reopen (simulating recovery).
|
|
1583
|
+
ASSERT_OK(db_->Close());
|
|
1584
|
+
db_.reset();
|
|
1585
|
+
ASSERT_OK(OpenDB(/*block_size=*/256));
|
|
1586
|
+
|
|
1587
|
+
// Prefix scan with trie index after reopen.
|
|
1588
|
+
ReadOptions base_ro = TrieIndexReadOptions();
|
|
1589
|
+
const Snapshot* snap = db_->GetSnapshot();
|
|
1590
|
+
base_ro.snapshot = snap;
|
|
1591
|
+
|
|
1592
|
+
uint64_t count =
|
|
1593
|
+
VerifyPrefixScanLockstep(base_ro, kNumPrefixes, /*use_upper_bounds=*/true,
|
|
1594
|
+
/*verify_values=*/false);
|
|
1595
|
+
ASSERT_EQ(count, static_cast<uint64_t>(kNumBatches));
|
|
1596
|
+
db_->ReleaseSnapshot(snap);
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
// Test with overwrites: multiple writes to the same key body, ensuring
|
|
1600
|
+
// the latest value is consistent across all prefixes.
|
|
1601
|
+
TEST_F(TrieIndexDBTest, BatchedPrefixScanWithOverwrites) {
|
|
1602
|
+
ASSERT_OK(OpenDB(/*block_size=*/256));
|
|
1603
|
+
|
|
1604
|
+
const int kNumKeys = 50;
|
|
1605
|
+
const int kNumOverwrites = 5;
|
|
1606
|
+
const int kNumPrefixes = 10;
|
|
1607
|
+
Random rnd(999);
|
|
1608
|
+
|
|
1609
|
+
// Write each key body multiple times.
|
|
1610
|
+
for (int round = 0; round < kNumOverwrites; ++round) {
|
|
1611
|
+
for (int k = 0; k < kNumKeys; ++k) {
|
|
1612
|
+
WriteBatch batch;
|
|
1613
|
+
std::string key_body = MakeKeyBody(k);
|
|
1614
|
+
std::string value_body = rnd.RandomString(20);
|
|
1615
|
+
|
|
1616
|
+
for (int d = kNumPrefixes - 1; d >= 0; --d) {
|
|
1617
|
+
std::string key = std::to_string(d) + key_body;
|
|
1618
|
+
std::string v = value_body + std::to_string(d);
|
|
1619
|
+
ASSERT_OK(batch.Put(key, v));
|
|
1620
|
+
}
|
|
1621
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
1622
|
+
}
|
|
1623
|
+
|
|
1624
|
+
// Flush after each round.
|
|
1625
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
// Now verify with both indexes.
|
|
1629
|
+
for (int idx_type = 0; idx_type < 2; ++idx_type) {
|
|
1630
|
+
ReadOptions base_ro =
|
|
1631
|
+
idx_type == 0 ? StandardIndexReadOptions() : TrieIndexReadOptions();
|
|
1632
|
+
SCOPED_TRACE(idx_type == 0 ? "standard index" : "trie index");
|
|
1633
|
+
|
|
1634
|
+
const Snapshot* snap = db_->GetSnapshot();
|
|
1635
|
+
base_ro.snapshot = snap;
|
|
1636
|
+
|
|
1637
|
+
uint64_t count = VerifyPrefixScanLockstep(base_ro, kNumPrefixes,
|
|
1638
|
+
/*use_upper_bounds=*/false,
|
|
1639
|
+
/*verify_values=*/true);
|
|
1640
|
+
ASSERT_EQ(count, static_cast<uint64_t>(kNumKeys));
|
|
1641
|
+
db_->ReleaseSnapshot(snap);
|
|
1642
|
+
}
|
|
1643
|
+
}
|
|
1644
|
+
|
|
1645
|
+
// Stress-like test: write + delete + rewrite many keys, flush between rounds,
|
|
1646
|
+
// then verify prefix scan consistency. Simulates the crash test pattern that
|
|
1647
|
+
// triggered failures.
|
|
1648
|
+
TEST_F(TrieIndexDBTest, BatchedPrefixScanStressLike) {
|
|
1649
|
+
ASSERT_OK(OpenDB(/*block_size=*/4096));
|
|
1650
|
+
|
|
1651
|
+
const int kMaxKey = 10000;
|
|
1652
|
+
const int kNumPrefixes = 10;
|
|
1653
|
+
const int kNumRounds = 20;
|
|
1654
|
+
Random rnd(7777);
|
|
1655
|
+
|
|
1656
|
+
for (int round = 0; round < kNumRounds; ++round) {
|
|
1657
|
+
// Write a batch of random keys
|
|
1658
|
+
int num_writes = 100 + rnd.Uniform(200);
|
|
1659
|
+
for (int w = 0; w < num_writes; ++w) {
|
|
1660
|
+
int k = rnd.Uniform(kMaxKey);
|
|
1661
|
+
WriteBatch batch;
|
|
1662
|
+
std::string key_body = MakeKeyBody(k);
|
|
1663
|
+
std::string value_body = rnd.RandomString(rnd.Uniform(60) + 4);
|
|
1664
|
+
for (int d = kNumPrefixes - 1; d >= 0; --d) {
|
|
1665
|
+
std::string key = std::to_string(d) + key_body;
|
|
1666
|
+
std::string v = value_body + std::to_string(d);
|
|
1667
|
+
ASSERT_OK(batch.Put(key, v));
|
|
1668
|
+
}
|
|
1669
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
1670
|
+
}
|
|
1671
|
+
|
|
1672
|
+
// Delete some random keys
|
|
1673
|
+
int num_deletes = 50 + rnd.Uniform(100);
|
|
1674
|
+
for (int w = 0; w < num_deletes; ++w) {
|
|
1675
|
+
int k = rnd.Uniform(kMaxKey);
|
|
1676
|
+
WriteBatch batch;
|
|
1677
|
+
std::string key_body = MakeKeyBody(k);
|
|
1678
|
+
for (int d = kNumPrefixes - 1; d >= 0; --d) {
|
|
1679
|
+
std::string key = std::to_string(d) + key_body;
|
|
1680
|
+
ASSERT_OK(batch.Delete(key));
|
|
1681
|
+
}
|
|
1682
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
1683
|
+
}
|
|
1684
|
+
|
|
1685
|
+
// Flush every few rounds
|
|
1686
|
+
if (round % 3 == 0) {
|
|
1687
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
// Verify prefix scan consistency with trie index.
|
|
1691
|
+
{
|
|
1692
|
+
ReadOptions base_ro = TrieIndexReadOptions();
|
|
1693
|
+
const Snapshot* snap = db_->GetSnapshot();
|
|
1694
|
+
base_ro.snapshot = snap;
|
|
1695
|
+
|
|
1696
|
+
VerifyPrefixScanLockstep(base_ro, kNumPrefixes,
|
|
1697
|
+
/*use_upper_bounds=*/false,
|
|
1698
|
+
/*verify_values=*/true,
|
|
1699
|
+
"round=" + std::to_string(round));
|
|
1700
|
+
|
|
1701
|
+
db_->ReleaseSnapshot(snap);
|
|
1702
|
+
}
|
|
1703
|
+
}
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
// ---------------------------------------------------------------------------
|
|
1707
|
+
// Regression test for the FindShortSuccessor last-block bug.
|
|
1708
|
+
//
|
|
1709
|
+
// Before the fix, TrieIndexBuilder::AddIndexEntry called
|
|
1710
|
+
// FindShortSuccessor() on the last block's separator key, producing a
|
|
1711
|
+
// shorter key that covered a wider range than the actual data. For example,
|
|
1712
|
+
// if the last key's user key was "9\xff\xff", FindShortSuccessor would
|
|
1713
|
+
// produce ":" (0x3A), making the trie claim it covers keys up to ":". A
|
|
1714
|
+
// seek for "9\xff\xff\x01" (between the real last key and ":") would find a
|
|
1715
|
+
// block via the trie but not via the standard index, causing prefix scan
|
|
1716
|
+
// iterators to desynchronize.
|
|
1717
|
+
//
|
|
1718
|
+
// The standard ShortenedIndexBuilder (with default kShortenSeparators mode)
|
|
1719
|
+
// does NOT call FindShortSuccessor on the last block — it uses the last key
|
|
1720
|
+
// as-is. The fix makes the trie builder match this behavior.
|
|
1721
|
+
// ---------------------------------------------------------------------------
|
|
1722
|
+
TEST_F(TrieIndexDBTest, LastBlockSeparatorNotShortened) {
|
|
1723
|
+
// Use a small block size so each key lands in its own block.
|
|
1724
|
+
ASSERT_OK(OpenDB(/*block_size=*/32));
|
|
1725
|
+
|
|
1726
|
+
// Write keys where the last key has trailing 0xFF bytes, which
|
|
1727
|
+
// FindShortSuccessor would shorten by incrementing the byte before the
|
|
1728
|
+
// 0xFF suffix ("9\xff\xff" -> ":").
|
|
1729
|
+
ASSERT_OK(db_->Put(WriteOptions(), "1aaa", "v1"));
|
|
1730
|
+
ASSERT_OK(db_->Put(WriteOptions(), "5bbb", "v2"));
|
|
1731
|
+
ASSERT_OK(db_->Put(WriteOptions(), std::string("9\xff\xff", 3), "v3"));
|
|
1732
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1733
|
+
|
|
1734
|
+
// The key "9\xff\xff\x01" is lexicographically after "9\xff\xff" but
|
|
1735
|
+
// before ":" (0x3A). With the old bug, the trie would return a valid
|
|
1736
|
+
// block for this key. With the fix, both indexes correctly say "not
|
|
1737
|
+
// found".
|
|
1738
|
+
std::string seek_target = std::string("9\xff\xff\x01", 4);
|
|
1739
|
+
|
|
1740
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1741
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1742
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
1743
|
+
|
|
1744
|
+
iter->Seek(seek_target);
|
|
1745
|
+
ASSERT_TRUE(!iter->Valid()) << "Expected no key at or after seek_target, "
|
|
1746
|
+
<< "but got: " << iter->key().ToString(true);
|
|
1747
|
+
ASSERT_OK(iter->status());
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
// Also verify the actual last key is still findable.
|
|
1751
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1752
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1753
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
1754
|
+
|
|
1755
|
+
iter->Seek(std::string("9\xff\xff", 3));
|
|
1756
|
+
ASSERT_TRUE(iter->Valid());
|
|
1757
|
+
ASSERT_EQ(iter->key().ToString(), std::string("9\xff\xff", 3));
|
|
1758
|
+
ASSERT_EQ(iter->value().ToString(), "v3");
|
|
1759
|
+
|
|
1760
|
+
// After this key, there should be nothing more.
|
|
1761
|
+
iter->Next();
|
|
1762
|
+
ASSERT_TRUE(!iter->Valid());
|
|
1763
|
+
ASSERT_OK(iter->status());
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
|
|
1767
|
+
// Variant: tests that when deletes remove the last key, seeking past the last
|
|
1768
|
+
// remaining key correctly returns "not found" with both indexes.
|
|
1769
|
+
TEST_F(TrieIndexDBTest, LastBlockSeparatorWithDeletes) {
|
|
1770
|
+
ASSERT_OK(OpenDB(/*block_size=*/32));
|
|
1771
|
+
|
|
1772
|
+
// Write and flush initial data.
|
|
1773
|
+
ASSERT_OK(db_->Put(WriteOptions(), "1aaa", "v1"));
|
|
1774
|
+
ASSERT_OK(db_->Put(WriteOptions(), "5bbb", "v2"));
|
|
1775
|
+
ASSERT_OK(db_->Put(WriteOptions(), std::string("9\xff\xff", 3), "v3"));
|
|
1776
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1777
|
+
|
|
1778
|
+
// Delete the last key and flush (creates a tombstone in a new SST).
|
|
1779
|
+
ASSERT_OK(db_->Delete(WriteOptions(), std::string("9\xff\xff", 3)));
|
|
1780
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1781
|
+
|
|
1782
|
+
// Now seeking for the deleted key should yield "5bbb" or nothing,
|
|
1783
|
+
// depending on the seek target. Both indexes must agree.
|
|
1784
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1785
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1786
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
1787
|
+
|
|
1788
|
+
// Seek to the deleted key — should skip it and land on nothing (it was
|
|
1789
|
+
// the last key).
|
|
1790
|
+
iter->Seek(std::string("9\xff\xff", 3));
|
|
1791
|
+
ASSERT_TRUE(!iter->Valid())
|
|
1792
|
+
<< "Deleted key should not be visible, but got: "
|
|
1793
|
+
<< iter->key().ToString(true);
|
|
1794
|
+
ASSERT_OK(iter->status());
|
|
1795
|
+
|
|
1796
|
+
// Seek to a key between "5bbb" and the deleted key — should find "5bbb"
|
|
1797
|
+
// or nothing depending on order. Actually, "6" > "5bbb" and "6" <
|
|
1798
|
+
// "9\xff\xff", so seeking "6" should find nothing since there's no key
|
|
1799
|
+
// >= "6" that's still alive.
|
|
1800
|
+
iter->Seek("6");
|
|
1801
|
+
ASSERT_TRUE(!iter->Valid()) << "No live key >= '6' should exist, but got: "
|
|
1802
|
+
<< iter->key().ToString(true);
|
|
1803
|
+
ASSERT_OK(iter->status());
|
|
1804
|
+
}
|
|
1805
|
+
|
|
1806
|
+
// Compact to merge the tombstone, then verify again.
|
|
1807
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
1808
|
+
|
|
1809
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1810
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1811
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
1812
|
+
|
|
1813
|
+
iter->SeekToFirst();
|
|
1814
|
+
ASSERT_TRUE(iter->Valid());
|
|
1815
|
+
ASSERT_EQ(iter->key().ToString(), "1aaa");
|
|
1816
|
+
iter->Next();
|
|
1817
|
+
ASSERT_TRUE(iter->Valid());
|
|
1818
|
+
ASSERT_EQ(iter->key().ToString(), "5bbb");
|
|
1819
|
+
iter->Next();
|
|
1820
|
+
ASSERT_TRUE(!iter->Valid());
|
|
1821
|
+
ASSERT_OK(iter->status());
|
|
1822
|
+
}
|
|
1823
|
+
}
|
|
1824
|
+
|
|
1825
|
+
// Single-entry SST: the trie has exactly one leaf. Validates that Seek,
|
|
1826
|
+
// SeekToFirst, Next, and Get all work with a one-block, one-key SST.
|
|
1827
|
+
TEST_F(TrieIndexDBTest, SingleEntrySST) {
|
|
1828
|
+
ASSERT_OK(OpenDB());
|
|
1829
|
+
ASSERT_OK(db_->Put(WriteOptions(), "only_key", "only_val"));
|
|
1830
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1831
|
+
|
|
1832
|
+
// Point lookup.
|
|
1833
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("only_key", "only_val"));
|
|
1834
|
+
|
|
1835
|
+
// Forward scan: exactly one result.
|
|
1836
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(
|
|
1837
|
+
std::vector<std::pair<std::string, std::string>>{
|
|
1838
|
+
{"only_key", "only_val"}}));
|
|
1839
|
+
|
|
1840
|
+
// Seek to the exact key.
|
|
1841
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1842
|
+
VerifySeekBothIndexes("only_key", "only_key", "only_val"));
|
|
1843
|
+
|
|
1844
|
+
// Seek before the key — should land on it.
|
|
1845
|
+
ASSERT_NO_FATAL_FAILURE(VerifySeekBothIndexes("a", "only_key", "only_val"));
|
|
1846
|
+
|
|
1847
|
+
// Seek past the key — should be invalid.
|
|
1848
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1849
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
1850
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
1851
|
+
iter->Seek("z");
|
|
1852
|
+
ASSERT_FALSE(iter->Valid());
|
|
1853
|
+
ASSERT_OK(iter->status());
|
|
1854
|
+
}
|
|
1855
|
+
}
|
|
1856
|
+
|
|
1857
|
+
// Deletion-only SST: flush a Put, then flush a Delete for that key so the
|
|
1858
|
+
// second SST contains only a tombstone. After compaction, the key is gone.
|
|
1859
|
+
TEST_F(TrieIndexDBTest, DeletionOnlySST) {
|
|
1860
|
+
ASSERT_OK(OpenDB());
|
|
1861
|
+
|
|
1862
|
+
// Flush 1: a real Put.
|
|
1863
|
+
ASSERT_OK(db_->Put(WriteOptions(), "del_target", "val"));
|
|
1864
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1865
|
+
|
|
1866
|
+
// Flush 2: only a Delete — this creates an SST whose only entry is a
|
|
1867
|
+
// tombstone (the trie still builds an index for the block containing it).
|
|
1868
|
+
ASSERT_OK(db_->Delete(WriteOptions(), "del_target"));
|
|
1869
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1870
|
+
|
|
1871
|
+
// The tombstone hides the Put.
|
|
1872
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("del_target"));
|
|
1873
|
+
|
|
1874
|
+
// Forward scan: nothing visible.
|
|
1875
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1876
|
+
VerifyForwardScanBothIndexes(std::vector<std::string>{}));
|
|
1877
|
+
|
|
1878
|
+
// Compact to merge: key is fully removed.
|
|
1879
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
1880
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("del_target"));
|
|
1881
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1882
|
+
VerifyForwardScanBothIndexes(std::vector<std::string>{}));
|
|
1883
|
+
}
|
|
1884
|
+
|
|
1885
|
+
// All-same-key SST: multiple versions of the same user key (via snapshots)
|
|
1886
|
+
// land in the same SST, possibly spanning multiple blocks. Validates that
|
|
1887
|
+
// the trie's same-key-run handling (seqno-based separators) works at the
|
|
1888
|
+
// DB level through both indexes.
|
|
1889
|
+
TEST_F(TrieIndexDBTest, AllSameKeySST) {
|
|
1890
|
+
options_.disable_auto_compactions = true;
|
|
1891
|
+
// Small block size to force multiple blocks for the same user key.
|
|
1892
|
+
ASSERT_OK(OpenDB(/*block_size=*/32));
|
|
1893
|
+
|
|
1894
|
+
// Write several versions of the same key with snapshots to prevent GC.
|
|
1895
|
+
std::vector<const Snapshot*> snaps;
|
|
1896
|
+
for (int i = 0; i < 10; i++) {
|
|
1897
|
+
ASSERT_OK(db_->Put(WriteOptions(), "same_key", "val_" + std::to_string(i)));
|
|
1898
|
+
snaps.push_back(db_->GetSnapshot());
|
|
1899
|
+
}
|
|
1900
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1901
|
+
|
|
1902
|
+
// Latest value is visible.
|
|
1903
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("same_key", "val_9"));
|
|
1904
|
+
|
|
1905
|
+
// Forward scan: only the latest version is visible (without snapshot).
|
|
1906
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(
|
|
1907
|
+
std::vector<std::pair<std::string, std::string>>{{"same_key", "val_9"}}));
|
|
1908
|
+
|
|
1909
|
+
// Each snapshot should see the correct version.
|
|
1910
|
+
for (int i = 0; i < 10; i++) {
|
|
1911
|
+
SCOPED_TRACE("snapshot " + std::to_string(i));
|
|
1912
|
+
std::string expected = "val_" + std::to_string(i);
|
|
1913
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1914
|
+
VerifyGetBothIndexes(snaps[i], "same_key", expected));
|
|
1915
|
+
|
|
1916
|
+
// Forward scan with snapshot.
|
|
1917
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1918
|
+
VerifyForwardScanBothIndexes(snaps[i], {{"same_key", expected}}));
|
|
1919
|
+
}
|
|
1920
|
+
|
|
1921
|
+
// Seek with earliest snapshot — should find the earliest version.
|
|
1922
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
1923
|
+
VerifySeekBothIndexes(snaps[0], "same_key", "same_key", "val_0"));
|
|
1924
|
+
|
|
1925
|
+
for (auto* snap : snaps) {
|
|
1926
|
+
db_->ReleaseSnapshot(snap);
|
|
1927
|
+
}
|
|
1928
|
+
}
|
|
1929
|
+
|
|
1930
|
+
// Operations on a completely empty DB: nothing should crash, and after
|
|
1931
|
+
// creating + deleting all data, the DB should correctly return nothing.
|
|
1932
|
+
TEST_F(TrieIndexDBTest, EmptyDBOperations) {
|
|
1933
|
+
ASSERT_OK(OpenDB());
|
|
1934
|
+
|
|
1935
|
+
// Get / Seek / SeekToFirst on empty memtable (no SSTs yet).
|
|
1936
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("anything"));
|
|
1937
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1938
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
1939
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
1940
|
+
iter->SeekToFirst();
|
|
1941
|
+
ASSERT_FALSE(iter->Valid());
|
|
1942
|
+
ASSERT_OK(iter->status());
|
|
1943
|
+
iter->Seek("anything");
|
|
1944
|
+
ASSERT_FALSE(iter->Valid());
|
|
1945
|
+
ASSERT_OK(iter->status());
|
|
1946
|
+
}
|
|
1947
|
+
|
|
1948
|
+
// Create an SST, delete its only key, compact → DB has no live data but
|
|
1949
|
+
// the trie code path was exercised during flush.
|
|
1950
|
+
ASSERT_OK(db_->Put(WriteOptions(), "temp", "val"));
|
|
1951
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1952
|
+
ASSERT_OK(db_->Delete(WriteOptions(), "temp"));
|
|
1953
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1954
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
1955
|
+
|
|
1956
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("temp"));
|
|
1957
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1958
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
1959
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
1960
|
+
iter->SeekToFirst();
|
|
1961
|
+
ASSERT_FALSE(iter->Valid());
|
|
1962
|
+
ASSERT_OK(iter->status());
|
|
1963
|
+
}
|
|
1964
|
+
}
|
|
1965
|
+
|
|
1966
|
+
// Focused seek-pattern tests: before all data, between blocks, exact match,
|
|
1967
|
+
// after all data, and empty-key seek.
|
|
1968
|
+
TEST_F(TrieIndexDBTest, SeekEdgeCases) {
|
|
1969
|
+
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
1970
|
+
|
|
1971
|
+
// Write keys with deliberate gaps.
|
|
1972
|
+
for (const auto& k : {"bbb", "ddd", "fff", "hhh"}) {
|
|
1973
|
+
ASSERT_OK(db_->Put(WriteOptions(), k, std::string("v_") + k));
|
|
1974
|
+
}
|
|
1975
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
1976
|
+
|
|
1977
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
1978
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
1979
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
1980
|
+
|
|
1981
|
+
// Before first key.
|
|
1982
|
+
iter->Seek("aaa");
|
|
1983
|
+
ASSERT_TRUE(iter->Valid());
|
|
1984
|
+
ASSERT_EQ(iter->key().ToString(), "bbb");
|
|
1985
|
+
|
|
1986
|
+
// Exact first key.
|
|
1987
|
+
iter->Seek("bbb");
|
|
1988
|
+
ASSERT_TRUE(iter->Valid());
|
|
1989
|
+
ASSERT_EQ(iter->key().ToString(), "bbb");
|
|
1990
|
+
|
|
1991
|
+
// Between keys.
|
|
1992
|
+
iter->Seek("ccc");
|
|
1993
|
+
ASSERT_TRUE(iter->Valid());
|
|
1994
|
+
ASSERT_EQ(iter->key().ToString(), "ddd");
|
|
1995
|
+
|
|
1996
|
+
// Between keys (eee → fff).
|
|
1997
|
+
iter->Seek("eee");
|
|
1998
|
+
ASSERT_TRUE(iter->Valid());
|
|
1999
|
+
ASSERT_EQ(iter->key().ToString(), "fff");
|
|
2000
|
+
|
|
2001
|
+
// Exact last key.
|
|
2002
|
+
iter->Seek("hhh");
|
|
2003
|
+
ASSERT_TRUE(iter->Valid());
|
|
2004
|
+
ASSERT_EQ(iter->key().ToString(), "hhh");
|
|
2005
|
+
|
|
2006
|
+
// After last key.
|
|
2007
|
+
iter->Seek("zzz");
|
|
2008
|
+
ASSERT_FALSE(iter->Valid());
|
|
2009
|
+
ASSERT_OK(iter->status());
|
|
2010
|
+
|
|
2011
|
+
// Empty key (smallest possible key for BytewiseComparator).
|
|
2012
|
+
iter->Seek("");
|
|
2013
|
+
ASSERT_TRUE(iter->Valid());
|
|
2014
|
+
ASSERT_EQ(iter->key().ToString(), "bbb");
|
|
2015
|
+
}
|
|
2016
|
+
}
|
|
2017
|
+
|
|
2018
|
+
// PutEntity + GetEntity through the trie index read path.
|
|
2019
|
+
TEST_F(TrieIndexDBTest, GetEntityWithTrieUDI) {
|
|
2020
|
+
ASSERT_OK(OpenDB());
|
|
2021
|
+
|
|
2022
|
+
// PutEntity with wide columns.
|
|
2023
|
+
WideColumns columns{
|
|
2024
|
+
{kDefaultWideColumnName, "default_val"},
|
|
2025
|
+
{"col_a", "val_a"},
|
|
2026
|
+
{"col_b", "val_b"},
|
|
2027
|
+
};
|
|
2028
|
+
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
2029
|
+
"entity_key", columns));
|
|
2030
|
+
// Also a regular Put to verify GetEntity reads it as a single default column.
|
|
2031
|
+
ASSERT_OK(db_->Put(WriteOptions(), "regular_key", "regular_val"));
|
|
2032
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2033
|
+
|
|
2034
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2035
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
2036
|
+
|
|
2037
|
+
// GetEntity on a PutEntity key.
|
|
2038
|
+
PinnableWideColumns result;
|
|
2039
|
+
ASSERT_OK(
|
|
2040
|
+
db_->GetEntity(ro, db_->DefaultColumnFamily(), "entity_key", &result));
|
|
2041
|
+
ASSERT_EQ(result.columns().size(), 3u);
|
|
2042
|
+
ASSERT_EQ(result.columns()[0].name(), kDefaultWideColumnName);
|
|
2043
|
+
ASSERT_EQ(result.columns()[0].value(), "default_val");
|
|
2044
|
+
ASSERT_EQ(result.columns()[1].name(), "col_a");
|
|
2045
|
+
ASSERT_EQ(result.columns()[1].value(), "val_a");
|
|
2046
|
+
ASSERT_EQ(result.columns()[2].name(), "col_b");
|
|
2047
|
+
ASSERT_EQ(result.columns()[2].value(), "val_b");
|
|
2048
|
+
|
|
2049
|
+
// GetEntity on a regular Put key returns single default column.
|
|
2050
|
+
PinnableWideColumns result2;
|
|
2051
|
+
ASSERT_OK(db_->GetEntity(ro, db_->DefaultColumnFamily(), "regular_key",
|
|
2052
|
+
&result2));
|
|
2053
|
+
ASSERT_EQ(result2.columns().size(), 1u);
|
|
2054
|
+
ASSERT_EQ(result2.columns()[0].name(), kDefaultWideColumnName);
|
|
2055
|
+
ASSERT_EQ(result2.columns()[0].value(), "regular_val");
|
|
2056
|
+
|
|
2057
|
+
// GetEntity on nonexistent key.
|
|
2058
|
+
PinnableWideColumns result3;
|
|
2059
|
+
ASSERT_TRUE(
|
|
2060
|
+
db_->GetEntity(ro, db_->DefaultColumnFamily(), "no_such_key", &result3)
|
|
2061
|
+
.IsNotFound());
|
|
2062
|
+
}
|
|
2063
|
+
}
|
|
2064
|
+
|
|
2065
|
+
// Multiple overlapping L0 SSTs: the level iterator must coordinate trie
|
|
2066
|
+
// iterators across multiple SST files with overlapping key ranges.
|
|
2067
|
+
TEST_F(TrieIndexDBTest, OverlappingL0SSTs) {
|
|
2068
|
+
options_.disable_auto_compactions = true;
|
|
2069
|
+
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
2070
|
+
|
|
2071
|
+
// SST1: keys 00..49.
|
|
2072
|
+
for (int i = 0; i < 50; i++) {
|
|
2073
|
+
char key[16];
|
|
2074
|
+
snprintf(key, sizeof(key), "key_%03d", i);
|
|
2075
|
+
ASSERT_OK(db_->Put(WriteOptions(), key, "sst1_" + std::to_string(i)));
|
|
2076
|
+
}
|
|
2077
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2078
|
+
|
|
2079
|
+
// SST2: keys 25..74 (overlapping with SST1).
|
|
2080
|
+
for (int i = 25; i < 75; i++) {
|
|
2081
|
+
char key[16];
|
|
2082
|
+
snprintf(key, sizeof(key), "key_%03d", i);
|
|
2083
|
+
ASSERT_OK(db_->Put(WriteOptions(), key, "sst2_" + std::to_string(i)));
|
|
2084
|
+
}
|
|
2085
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2086
|
+
|
|
2087
|
+
// SST3: keys 50..99 (overlapping with SST2).
|
|
2088
|
+
for (int i = 50; i < 100; i++) {
|
|
2089
|
+
char key[16];
|
|
2090
|
+
snprintf(key, sizeof(key), "key_%03d", i);
|
|
2091
|
+
ASSERT_OK(db_->Put(WriteOptions(), key, "sst3_" + std::to_string(i)));
|
|
2092
|
+
}
|
|
2093
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2094
|
+
|
|
2095
|
+
// Verify: latest writer wins for overlapping keys.
|
|
2096
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2097
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
2098
|
+
auto kvs = ScanAllKeyValues(ro);
|
|
2099
|
+
ASSERT_EQ(kvs.size(), 100u);
|
|
2100
|
+
for (int i = 0; i < 100; i++) {
|
|
2101
|
+
char key[16];
|
|
2102
|
+
snprintf(key, sizeof(key), "key_%03d", i);
|
|
2103
|
+
ASSERT_EQ(kvs[i].first, key);
|
|
2104
|
+
if (i < 25) {
|
|
2105
|
+
ASSERT_EQ(kvs[i].second, "sst1_" + std::to_string(i));
|
|
2106
|
+
} else if (i < 50) {
|
|
2107
|
+
ASSERT_EQ(kvs[i].second, "sst2_" + std::to_string(i));
|
|
2108
|
+
} else {
|
|
2109
|
+
ASSERT_EQ(kvs[i].second, "sst3_" + std::to_string(i));
|
|
2110
|
+
}
|
|
2111
|
+
}
|
|
2112
|
+
}
|
|
2113
|
+
|
|
2114
|
+
// Compact all L0 → L1, re-verify.
|
|
2115
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
2116
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2117
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
2118
|
+
ASSERT_EQ(ScanAllKeyValues(ro).size(), 100u);
|
|
2119
|
+
}
|
|
2120
|
+
}
|
|
2121
|
+
|
|
2122
|
+
// CompactRange with a sub-range: only part of the key space is compacted.
|
|
2123
|
+
TEST_F(TrieIndexDBTest, CompactRangeSubset) {
|
|
2124
|
+
options_.disable_auto_compactions = true;
|
|
2125
|
+
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
2126
|
+
|
|
2127
|
+
for (int i = 0; i < 26; i++) {
|
|
2128
|
+
char key[16];
|
|
2129
|
+
snprintf(key, sizeof(key), "key_%c", 'a' + i);
|
|
2130
|
+
ASSERT_OK(db_->Put(WriteOptions(), key, "val_" + std::to_string(i)));
|
|
2131
|
+
}
|
|
2132
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2133
|
+
|
|
2134
|
+
// Compact only the middle range [key_f, key_p).
|
|
2135
|
+
std::string begin = "key_f";
|
|
2136
|
+
std::string end = "key_p";
|
|
2137
|
+
Slice begin_s(begin);
|
|
2138
|
+
Slice end_s(end);
|
|
2139
|
+
CompactRangeOptions cro;
|
|
2140
|
+
ASSERT_OK(db_->CompactRange(cro, &begin_s, &end_s));
|
|
2141
|
+
|
|
2142
|
+
// All 26 keys should still be readable.
|
|
2143
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2144
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
2145
|
+
ASSERT_EQ(ScanAllKeys(ro).size(), 26u);
|
|
2146
|
+
}
|
|
2147
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_a", "val_0"));
|
|
2148
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_z", "val_25"));
|
|
2149
|
+
}
|
|
2150
|
+
|
|
2151
|
+
// Write keys, delete all of them, compact. The DB should be empty.
|
|
2152
|
+
TEST_F(TrieIndexDBTest, AllKeysDeletedCompaction) {
|
|
2153
|
+
ASSERT_OK(OpenDB());
|
|
2154
|
+
|
|
2155
|
+
for (int i = 0; i < 20; i++) {
|
|
2156
|
+
char key[16];
|
|
2157
|
+
snprintf(key, sizeof(key), "key_%02d", i);
|
|
2158
|
+
ASSERT_OK(db_->Put(WriteOptions(), key, "val"));
|
|
2159
|
+
}
|
|
2160
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2161
|
+
|
|
2162
|
+
// Delete all keys.
|
|
2163
|
+
for (int i = 0; i < 20; i++) {
|
|
2164
|
+
char key[16];
|
|
2165
|
+
snprintf(key, sizeof(key), "key_%02d", i);
|
|
2166
|
+
ASSERT_OK(db_->Delete(WriteOptions(), key));
|
|
2167
|
+
}
|
|
2168
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2169
|
+
|
|
2170
|
+
// Before compaction: tombstones hide all keys.
|
|
2171
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2172
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
2173
|
+
ASSERT_EQ(ScanAllKeys(ro).size(), 0u);
|
|
2174
|
+
}
|
|
2175
|
+
|
|
2176
|
+
// After compaction: all tombstones and data are gone.
|
|
2177
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
2178
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2179
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
2180
|
+
ASSERT_EQ(ScanAllKeys(ro).size(), 0u);
|
|
2181
|
+
}
|
|
2182
|
+
}
|
|
2183
|
+
|
|
2184
|
+
// Keys with special byte values: 0x00, 0xFF, embedded nulls, very short keys.
|
|
2185
|
+
// These exercise trie byte-traversal edge cases.
|
|
2186
|
+
TEST_F(TrieIndexDBTest, BinaryKeyEdgeCases) {
|
|
2187
|
+
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
2188
|
+
|
|
2189
|
+
// All keys in sorted order (BytewiseComparator).
|
|
2190
|
+
std::vector<std::pair<std::string, std::string>> kvs = {
|
|
2191
|
+
{std::string("\x00", 1), "val_null"},
|
|
2192
|
+
{std::string("\x00\x00\x00", 3), "val_triple_null"},
|
|
2193
|
+
{std::string("\x01", 1), "val_0x01"},
|
|
2194
|
+
{"a", "val_a"},
|
|
2195
|
+
{std::string("a\x00"
|
|
2196
|
+
"b",
|
|
2197
|
+
3),
|
|
2198
|
+
"val_a_null_b"},
|
|
2199
|
+
{"mid", "val_mid"},
|
|
2200
|
+
{std::string("\xfe", 1), "val_0xfe"},
|
|
2201
|
+
{std::string("\xff", 1), "val_0xff"},
|
|
2202
|
+
{std::string("\xff\xff\xff", 3), "val_triple_ff"},
|
|
2203
|
+
};
|
|
2204
|
+
|
|
2205
|
+
for (const auto& kv : kvs) {
|
|
2206
|
+
ASSERT_OK(db_->Put(WriteOptions(), kv.first, kv.second));
|
|
2207
|
+
}
|
|
2208
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2209
|
+
|
|
2210
|
+
// Forward scan: all keys in order through both indexes.
|
|
2211
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2212
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
2213
|
+
auto actual = ScanAllKeyValues(ro);
|
|
2214
|
+
ASSERT_EQ(actual.size(), kvs.size());
|
|
2215
|
+
for (size_t i = 0; i < kvs.size(); i++) {
|
|
2216
|
+
SCOPED_TRACE("key index " + std::to_string(i));
|
|
2217
|
+
ASSERT_EQ(actual[i].first, kvs[i].first);
|
|
2218
|
+
ASSERT_EQ(actual[i].second, kvs[i].second);
|
|
2219
|
+
}
|
|
2220
|
+
}
|
|
2221
|
+
|
|
2222
|
+
// Point lookups for boundary keys.
|
|
2223
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
2224
|
+
VerifyGetBothIndexes(std::string("\x00", 1), "val_null"));
|
|
2225
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
2226
|
+
VerifyGetBothIndexes(std::string("\xff\xff\xff", 3), "val_triple_ff"));
|
|
2227
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(std::string("a\x00"
|
|
2228
|
+
"b",
|
|
2229
|
+
3),
|
|
2230
|
+
"val_a_null_b"));
|
|
2231
|
+
|
|
2232
|
+
// Seek to embedded-null key.
|
|
2233
|
+
ASSERT_NO_FATAL_FAILURE(VerifySeekBothIndexes(
|
|
2234
|
+
std::string("\x00", 1), std::string("\x00", 1), "val_null"));
|
|
2235
|
+
}
|
|
2236
|
+
|
|
2237
|
+
// Puts with empty string values.
|
|
2238
|
+
TEST_F(TrieIndexDBTest, EmptyValuePuts) {
|
|
2239
|
+
ASSERT_OK(OpenDB());
|
|
2240
|
+
|
|
2241
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key1", ""));
|
|
2242
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key2", "non_empty"));
|
|
2243
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key3", ""));
|
|
2244
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2245
|
+
|
|
2246
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key1", ""));
|
|
2247
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key2", "non_empty"));
|
|
2248
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key3", ""));
|
|
2249
|
+
|
|
2250
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(
|
|
2251
|
+
std::vector<std::pair<std::string, std::string>>{
|
|
2252
|
+
{"key1", ""}, {"key2", "non_empty"}, {"key3", ""}}));
|
|
2253
|
+
}
|
|
2254
|
+
|
|
2255
|
+
// Zlib compression: data blocks are compressed, UDI block is not.
|
|
2256
|
+
// Verifies that reads through the trie index work with compressed data.
|
|
2257
|
+
TEST_F(TrieIndexDBTest, CompressionZlib) {
|
|
2258
|
+
if (!Zlib_Supported()) {
|
|
2259
|
+
ROCKSDB_GTEST_SKIP("Zlib not linked");
|
|
2260
|
+
return;
|
|
2261
|
+
}
|
|
2262
|
+
options_.compression = kZlibCompression;
|
|
2263
|
+
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
2264
|
+
|
|
2265
|
+
for (int i = 0; i < 100; i++) {
|
|
2266
|
+
char key[16];
|
|
2267
|
+
snprintf(key, sizeof(key), "key_%04d", i);
|
|
2268
|
+
// Compressible value (repeated pattern).
|
|
2269
|
+
ASSERT_OK(db_->Put(WriteOptions(), key, std::string(200, 'A' + (i % 26))));
|
|
2270
|
+
}
|
|
2271
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2272
|
+
|
|
2273
|
+
// Forward scan.
|
|
2274
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2275
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
2276
|
+
ASSERT_EQ(ScanAllKeys(ro).size(), 100u);
|
|
2277
|
+
}
|
|
2278
|
+
|
|
2279
|
+
// Spot-check a few keys.
|
|
2280
|
+
for (int i : {0, 49, 99}) {
|
|
2281
|
+
char key[16];
|
|
2282
|
+
snprintf(key, sizeof(key), "key_%04d", i);
|
|
2283
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
2284
|
+
VerifyGetBothIndexes(key, std::string(200, 'A' + (i % 26))));
|
|
2285
|
+
}
|
|
2286
|
+
}
|
|
2287
|
+
|
|
2288
|
+
// Iterator stability: an iterator pinned to a snapshot should not see data
|
|
2289
|
+
// written after the iterator was created, even after flush.
|
|
2290
|
+
TEST_F(TrieIndexDBTest, IteratorStabilityDuringFlush) {
|
|
2291
|
+
ASSERT_OK(OpenDB());
|
|
2292
|
+
|
|
2293
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key1", "v1"));
|
|
2294
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key2", "v2"));
|
|
2295
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2296
|
+
|
|
2297
|
+
// Open iterator (implicitly pins a snapshot).
|
|
2298
|
+
auto ro = TrieIndexReadOptions();
|
|
2299
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
2300
|
+
iter->SeekToFirst();
|
|
2301
|
+
ASSERT_TRUE(iter->Valid());
|
|
2302
|
+
ASSERT_EQ(iter->key().ToString(), "key1");
|
|
2303
|
+
|
|
2304
|
+
// Write + flush new data while iterator is open.
|
|
2305
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key3", "v3"));
|
|
2306
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2307
|
+
|
|
2308
|
+
// Existing iterator should NOT see key3.
|
|
2309
|
+
iter->Next();
|
|
2310
|
+
ASSERT_TRUE(iter->Valid());
|
|
2311
|
+
ASSERT_EQ(iter->key().ToString(), "key2");
|
|
2312
|
+
iter->Next();
|
|
2313
|
+
ASSERT_FALSE(iter->Valid());
|
|
2314
|
+
ASSERT_OK(iter->status());
|
|
2315
|
+
|
|
2316
|
+
// New iterator should see all three keys.
|
|
2317
|
+
std::unique_ptr<Iterator> iter2(db_->NewIterator(ro));
|
|
2318
|
+
iter2->SeekToFirst();
|
|
2319
|
+
ASSERT_TRUE(iter2->Valid());
|
|
2320
|
+
ASSERT_EQ(iter2->key().ToString(), "key1");
|
|
2321
|
+
iter2->Next();
|
|
2322
|
+
ASSERT_TRUE(iter2->Valid());
|
|
2323
|
+
ASSERT_EQ(iter2->key().ToString(), "key2");
|
|
2324
|
+
iter2->Next();
|
|
2325
|
+
ASSERT_TRUE(iter2->Valid());
|
|
2326
|
+
ASSERT_EQ(iter2->key().ToString(), "key3");
|
|
2327
|
+
ASSERT_OK(iter2->status());
|
|
2328
|
+
}
|
|
2329
|
+
|
|
2330
|
+
// iterate_upper_bound without prefix scan: the iterator should stop at the
|
|
2331
|
+
// upper bound.
|
|
2332
|
+
TEST_F(TrieIndexDBTest, IteratorUpperBound) {
|
|
2333
|
+
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
2334
|
+
|
|
2335
|
+
for (const auto& k : {"aa", "bb", "cc", "dd", "ee", "ff"}) {
|
|
2336
|
+
ASSERT_OK(db_->Put(WriteOptions(), k, std::string("v_") + k));
|
|
2337
|
+
}
|
|
2338
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2339
|
+
|
|
2340
|
+
for (const auto& base_ro :
|
|
2341
|
+
{StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2342
|
+
SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard");
|
|
2343
|
+
|
|
2344
|
+
// Upper bound = "dd" → should see aa, bb, cc only.
|
|
2345
|
+
std::string ub_str = "dd";
|
|
2346
|
+
Slice ub(ub_str);
|
|
2347
|
+
ReadOptions ro = base_ro;
|
|
2348
|
+
ro.iterate_upper_bound = &ub;
|
|
2349
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
2350
|
+
std::vector<std::string> keys;
|
|
2351
|
+
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
2352
|
+
keys.push_back(iter->key().ToString());
|
|
2353
|
+
}
|
|
2354
|
+
ASSERT_OK(iter->status());
|
|
2355
|
+
ASSERT_EQ(keys, (std::vector<std::string>{"aa", "bb", "cc"}));
|
|
2356
|
+
|
|
2357
|
+
// Upper bound = "aa" → should see nothing.
|
|
2358
|
+
std::string ub2_str = "aa";
|
|
2359
|
+
Slice ub2(ub2_str);
|
|
2360
|
+
ReadOptions ro2 = base_ro;
|
|
2361
|
+
ro2.iterate_upper_bound = &ub2;
|
|
2362
|
+
std::unique_ptr<Iterator> iter2(db_->NewIterator(ro2));
|
|
2363
|
+
iter2->SeekToFirst();
|
|
2364
|
+
ASSERT_FALSE(iter2->Valid());
|
|
2365
|
+
ASSERT_OK(iter2->status());
|
|
2366
|
+
|
|
2367
|
+
// Upper bound after all data → should see everything.
|
|
2368
|
+
std::string ub3_str = "zz";
|
|
2369
|
+
Slice ub3(ub3_str);
|
|
2370
|
+
ReadOptions ro3 = base_ro;
|
|
2371
|
+
ro3.iterate_upper_bound = &ub3;
|
|
2372
|
+
std::unique_ptr<Iterator> iter3(db_->NewIterator(ro3));
|
|
2373
|
+
std::vector<std::string> all_keys;
|
|
2374
|
+
for (iter3->SeekToFirst(); iter3->Valid(); iter3->Next()) {
|
|
2375
|
+
all_keys.push_back(iter3->key().ToString());
|
|
2376
|
+
}
|
|
2377
|
+
ASSERT_OK(iter3->status());
|
|
2378
|
+
ASSERT_EQ(all_keys.size(), 6u);
|
|
2379
|
+
}
|
|
2380
|
+
}
|
|
2381
|
+
|
|
2382
|
+
// Combined snapshot + upper_bound: iterator sees the snapshot's view of data,
|
|
2383
|
+
// bounded by iterate_upper_bound.
|
|
2384
|
+
TEST_F(TrieIndexDBTest, IteratorSnapshotAndUpperBound) {
|
|
2385
|
+
ASSERT_OK(OpenDB());
|
|
2386
|
+
|
|
2387
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_a", "old_a"));
|
|
2388
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_b", "old_b"));
|
|
2389
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_c", "old_c"));
|
|
2390
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_d", "old_d"));
|
|
2391
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2392
|
+
|
|
2393
|
+
const Snapshot* snap = db_->GetSnapshot();
|
|
2394
|
+
|
|
2395
|
+
// Overwrite some keys after the snapshot.
|
|
2396
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_a", "new_a"));
|
|
2397
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_c", "new_c"));
|
|
2398
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_e", "new_e"));
|
|
2399
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2400
|
+
|
|
2401
|
+
for (const auto& base_ro :
|
|
2402
|
+
{StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2403
|
+
SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard");
|
|
2404
|
+
|
|
2405
|
+
std::string ub_str = "key_d";
|
|
2406
|
+
Slice ub(ub_str);
|
|
2407
|
+
ReadOptions ro = base_ro;
|
|
2408
|
+
ro.snapshot = snap;
|
|
2409
|
+
ro.iterate_upper_bound = &ub;
|
|
2410
|
+
|
|
2411
|
+
auto kvs = ScanAllKeyValues(ro);
|
|
2412
|
+
// Snapshot view: old values. Upper bound excludes key_d and key_e.
|
|
2413
|
+
ASSERT_EQ(kvs.size(), 3u);
|
|
2414
|
+
ASSERT_EQ(kvs[0],
|
|
2415
|
+
std::make_pair(std::string("key_a"), std::string("old_a")));
|
|
2416
|
+
ASSERT_EQ(kvs[1],
|
|
2417
|
+
std::make_pair(std::string("key_b"), std::string("old_b")));
|
|
2418
|
+
ASSERT_EQ(kvs[2],
|
|
2419
|
+
std::make_pair(std::string("key_c"), std::string("old_c")));
|
|
2420
|
+
}
|
|
2421
|
+
db_->ReleaseSnapshot(snap);
|
|
2422
|
+
}
|
|
2423
|
+
|
|
2424
|
+
// VerifyChecksum goes through SeekToFirst+Next on the index iterator.
|
|
2425
|
+
TEST_F(TrieIndexDBTest, VerifyChecksumWithTrieUDI) {
|
|
2426
|
+
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
2427
|
+
|
|
2428
|
+
for (int i = 0; i < 50; i++) {
|
|
2429
|
+
char key[16];
|
|
2430
|
+
snprintf(key, sizeof(key), "key_%03d", i);
|
|
2431
|
+
ASSERT_OK(db_->Put(WriteOptions(), key, "value_" + std::to_string(i)));
|
|
2432
|
+
}
|
|
2433
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2434
|
+
|
|
2435
|
+
// VerifyChecksum with default ReadOptions (standard index).
|
|
2436
|
+
ASSERT_OK(db_->VerifyChecksum());
|
|
2437
|
+
|
|
2438
|
+
// VerifyChecksum with trie ReadOptions.
|
|
2439
|
+
ASSERT_OK(db_->VerifyChecksum(TrieIndexReadOptions()));
|
|
2440
|
+
}
|
|
2441
|
+
|
|
2442
|
+
// Many small SSTs from frequent flushes: exercises trie iteration across
|
|
2443
|
+
// many L0 files without compaction.
|
|
2444
|
+
TEST_F(TrieIndexDBTest, ManySmallSSTs) {
|
|
2445
|
+
options_.disable_auto_compactions = true;
|
|
2446
|
+
ASSERT_OK(OpenDB());
|
|
2447
|
+
|
|
2448
|
+
// 50 flushes, 2 keys each → 50 SSTs.
|
|
2449
|
+
for (int f = 0; f < 50; f++) {
|
|
2450
|
+
char k1[16];
|
|
2451
|
+
char k2[16];
|
|
2452
|
+
snprintf(k1, sizeof(k1), "key_%04d", f * 2);
|
|
2453
|
+
snprintf(k2, sizeof(k2), "key_%04d", f * 2 + 1);
|
|
2454
|
+
ASSERT_OK(db_->Put(WriteOptions(), k1, "v" + std::to_string(f * 2)));
|
|
2455
|
+
ASSERT_OK(db_->Put(WriteOptions(), k2, "v" + std::to_string(f * 2 + 1)));
|
|
2456
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2457
|
+
}
|
|
2458
|
+
|
|
2459
|
+
// Verify all 100 keys are readable.
|
|
2460
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2461
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
2462
|
+
auto keys = ScanAllKeys(ro);
|
|
2463
|
+
ASSERT_EQ(keys.size(), 100u);
|
|
2464
|
+
}
|
|
2465
|
+
|
|
2466
|
+
// Spot-check first and last.
|
|
2467
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_0000", "v0"));
|
|
2468
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_0099", "v99"));
|
|
2469
|
+
|
|
2470
|
+
// Compact everything into one SST, re-verify.
|
|
2471
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
2472
|
+
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2473
|
+
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
2474
|
+
ASSERT_EQ(ScanAllKeys(ro).size(), 100u);
|
|
2475
|
+
}
|
|
2476
|
+
}
|
|
2477
|
+
|
|
2478
|
+
// Merge values accumulate across multiple compaction rounds.
|
|
2479
|
+
TEST_F(TrieIndexDBTest, MergeAcrossMultipleCompactions) {
|
|
2480
|
+
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
2481
|
+
ASSERT_OK(OpenDB());
|
|
2482
|
+
|
|
2483
|
+
// Round 1: Put base value.
|
|
2484
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key", "base"));
|
|
2485
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2486
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
2487
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key", "base"));
|
|
2488
|
+
|
|
2489
|
+
// Round 2: Merge "m1".
|
|
2490
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key", "m1"));
|
|
2491
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2492
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
2493
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key", "base,m1"));
|
|
2494
|
+
|
|
2495
|
+
// Round 3: Merge "m2".
|
|
2496
|
+
ASSERT_OK(db_->Merge(WriteOptions(), "key", "m2"));
|
|
2497
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2498
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
2499
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key", "base,m1,m2"));
|
|
2500
|
+
|
|
2501
|
+
// Forward scan also returns the accumulated value.
|
|
2502
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(
|
|
2503
|
+
std::vector<std::pair<std::string, std::string>>{{"key", "base,m1,m2"}}));
|
|
2504
|
+
}
|
|
2505
|
+
|
|
2506
|
+
// Graceful degradation: reopen a DB that was written with UDI, but without
|
|
2507
|
+
// the UDI factory configured. Reads should fall back to the standard index.
|
|
2508
|
+
TEST_F(TrieIndexDBTest, ReopenWithoutTrieUDI) {
|
|
2509
|
+
ASSERT_OK(OpenDB());
|
|
2510
|
+
|
|
2511
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_a", "val_a"));
|
|
2512
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_b", "val_b"));
|
|
2513
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2514
|
+
ASSERT_OK(db_->Close());
|
|
2515
|
+
db_.reset();
|
|
2516
|
+
|
|
2517
|
+
// Reopen WITHOUT UDI. The SST has a UDI meta block, but it's ignored.
|
|
2518
|
+
ASSERT_OK(OpenDBWithoutUDI());
|
|
2519
|
+
|
|
2520
|
+
// Reads via standard index should work (UDI meta block is just ignored).
|
|
2521
|
+
std::string val;
|
|
2522
|
+
ASSERT_OK(db_->Get(ReadOptions(), "key_a", &val));
|
|
2523
|
+
ASSERT_EQ(val, "val_a");
|
|
2524
|
+
ASSERT_OK(db_->Get(ReadOptions(), "key_b", &val));
|
|
2525
|
+
ASSERT_EQ(val, "val_b");
|
|
2526
|
+
|
|
2527
|
+
// Forward scan.
|
|
2528
|
+
auto keys = ScanAllKeys(ReadOptions());
|
|
2529
|
+
ASSERT_EQ(keys.size(), 2u);
|
|
2530
|
+
ASSERT_EQ(keys[0], "key_a");
|
|
2531
|
+
ASSERT_EQ(keys[1], "key_b");
|
|
2532
|
+
}
|
|
2533
|
+
|
|
2534
|
+
// Mixed SSTs: some written with UDI, some without. Both should be readable
|
|
2535
|
+
// through both index paths.
|
|
2536
|
+
TEST_F(TrieIndexDBTest, MixedSSTsWithAndWithoutUDI) {
|
|
2537
|
+
options_.disable_auto_compactions = true;
|
|
2538
|
+
|
|
2539
|
+
// Phase 1: Write with UDI → SST1 has UDI + standard index.
|
|
2540
|
+
ASSERT_OK(OpenDB());
|
|
2541
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_01", "udi_val1"));
|
|
2542
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_02", "udi_val2"));
|
|
2543
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2544
|
+
ASSERT_OK(db_->Close());
|
|
2545
|
+
db_.reset();
|
|
2546
|
+
|
|
2547
|
+
// Phase 2: Reopen WITHOUT UDI, write more → SST2 has only standard index.
|
|
2548
|
+
ASSERT_OK(OpenDBWithoutUDI());
|
|
2549
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_03", "noudi_val3"));
|
|
2550
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key_04", "noudi_val4"));
|
|
2551
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2552
|
+
ASSERT_OK(db_->Close());
|
|
2553
|
+
db_.reset();
|
|
2554
|
+
|
|
2555
|
+
// Phase 3: Reopen WITH UDI again. SST1 uses trie, SST2 falls back to
|
|
2556
|
+
// standard index (UDI block missing → logged warning, graceful fallback).
|
|
2557
|
+
options_.disable_auto_compactions = true;
|
|
2558
|
+
ASSERT_OK(OpenDB());
|
|
2559
|
+
|
|
2560
|
+
// All 4 keys should be readable through both index paths.
|
|
2561
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01", "udi_val1"));
|
|
2562
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02", "udi_val2"));
|
|
2563
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_03", "noudi_val3"));
|
|
2564
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_04", "noudi_val4"));
|
|
2565
|
+
|
|
2566
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
2567
|
+
VerifyForwardScanBothIndexes({"key_01", "key_02", "key_03", "key_04"}));
|
|
2568
|
+
|
|
2569
|
+
// Compact: merges UDI + non-UDI SSTs → new SST has UDI.
|
|
2570
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
2571
|
+
ASSERT_NO_FATAL_FAILURE(
|
|
2572
|
+
VerifyForwardScanBothIndexes({"key_01", "key_02", "key_03", "key_04"}));
|
|
2573
|
+
}
|
|
2574
|
+
|
|
2575
|
+
// TransactionDB commit: Put + Delete inside a transaction, then commit.
|
|
2576
|
+
TEST_F(TrieIndexDBTest, TransactionCommit) {
|
|
2577
|
+
options_.create_if_missing = true;
|
|
2578
|
+
BlockBasedTableOptions table_options;
|
|
2579
|
+
table_options.user_defined_index_factory = trie_factory_;
|
|
2580
|
+
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
2581
|
+
last_options_ = options_;
|
|
2582
|
+
|
|
2583
|
+
TransactionDB* txn_db = nullptr;
|
|
2584
|
+
ASSERT_OK(
|
|
2585
|
+
TransactionDB::Open(options_, TransactionDBOptions(), dbname_, &txn_db));
|
|
2586
|
+
db_.reset(txn_db);
|
|
2587
|
+
|
|
2588
|
+
// Pre-populate a key.
|
|
2589
|
+
ASSERT_OK(txn_db->Put(WriteOptions(), "pre_key", "pre_val"));
|
|
2590
|
+
ASSERT_OK(txn_db->Flush(FlushOptions()));
|
|
2591
|
+
|
|
2592
|
+
// Begin transaction: Put + Delete + Commit.
|
|
2593
|
+
std::unique_ptr<Transaction> txn(
|
|
2594
|
+
txn_db->BeginTransaction(WriteOptions(), TransactionOptions()));
|
|
2595
|
+
ASSERT_OK(txn->Put("txn_key1", "txn_val1"));
|
|
2596
|
+
ASSERT_OK(txn->Delete("pre_key"));
|
|
2597
|
+
ASSERT_OK(txn->Commit());
|
|
2598
|
+
|
|
2599
|
+
ASSERT_OK(txn_db->Flush(FlushOptions()));
|
|
2600
|
+
|
|
2601
|
+
// Verify through both indexes.
|
|
2602
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("txn_key1", "txn_val1"));
|
|
2603
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("pre_key"));
|
|
2604
|
+
}
|
|
2605
|
+
|
|
2606
|
+
// TransactionDB rollback: writes should be discarded. Rollback writes DELETE
|
|
2607
|
+
// entries to WAL, which was previously restricted for UDI.
|
|
2608
|
+
TEST_F(TrieIndexDBTest, TransactionRollback) {
|
|
2609
|
+
options_.create_if_missing = true;
|
|
2610
|
+
BlockBasedTableOptions table_options;
|
|
2611
|
+
table_options.user_defined_index_factory = trie_factory_;
|
|
2612
|
+
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
2613
|
+
last_options_ = options_;
|
|
2614
|
+
|
|
2615
|
+
TransactionDB* txn_db = nullptr;
|
|
2616
|
+
ASSERT_OK(
|
|
2617
|
+
TransactionDB::Open(options_, TransactionDBOptions(), dbname_, &txn_db));
|
|
2618
|
+
db_.reset(txn_db);
|
|
2619
|
+
|
|
2620
|
+
// Pre-populate data and flush.
|
|
2621
|
+
ASSERT_OK(txn_db->Put(WriteOptions(), "keep_key", "keep_val"));
|
|
2622
|
+
ASSERT_OK(txn_db->Flush(FlushOptions()));
|
|
2623
|
+
|
|
2624
|
+
// Begin transaction, write, then ROLLBACK.
|
|
2625
|
+
std::unique_ptr<Transaction> txn(
|
|
2626
|
+
txn_db->BeginTransaction(WriteOptions(), TransactionOptions()));
|
|
2627
|
+
ASSERT_OK(txn->Put("rollback_key", "rollback_val"));
|
|
2628
|
+
ASSERT_OK(txn->Delete("keep_key"));
|
|
2629
|
+
ASSERT_OK(txn->Rollback());
|
|
2630
|
+
|
|
2631
|
+
ASSERT_OK(txn_db->Flush(FlushOptions()));
|
|
2632
|
+
|
|
2633
|
+
// Original data should be unchanged. Rolled-back writes should not appear.
|
|
2634
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("keep_key", "keep_val"));
|
|
2635
|
+
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("rollback_key"));
|
|
2636
|
+
|
|
2637
|
+
// Forward scan: only the original key.
|
|
2638
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(
|
|
2639
|
+
std::vector<std::pair<std::string, std::string>>{
|
|
2640
|
+
{"keep_key", "keep_val"}}));
|
|
2641
|
+
}
|
|
2642
|
+
|
|
2643
|
+
// total_order_seek with prefix_extractor: a common stress-test configuration.
|
|
2644
|
+
// With total_order_seek=true, SeekToFirst and full forward scan should work
|
|
2645
|
+
// correctly even when a prefix extractor is configured.
|
|
2646
|
+
TEST_F(TrieIndexDBTest, TotalOrderSeekWithPrefixExtractor) {
|
|
2647
|
+
options_.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
|
2648
|
+
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
2649
|
+
|
|
2650
|
+
// Keys with different prefixes.
|
|
2651
|
+
ASSERT_OK(db_->Put(WriteOptions(), "aaa_1", "v1"));
|
|
2652
|
+
ASSERT_OK(db_->Put(WriteOptions(), "aaa_2", "v2"));
|
|
2653
|
+
ASSERT_OK(db_->Put(WriteOptions(), "bbb_1", "v3"));
|
|
2654
|
+
ASSERT_OK(db_->Put(WriteOptions(), "ccc_1", "v4"));
|
|
2655
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2656
|
+
|
|
2657
|
+
// With total_order_seek=true, scan all keys across prefixes.
|
|
2658
|
+
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2659
|
+
SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard");
|
|
2660
|
+
base_ro.total_order_seek = true;
|
|
2661
|
+
auto keys = ScanAllKeys(base_ro);
|
|
2662
|
+
ASSERT_EQ(keys.size(), 4u);
|
|
2663
|
+
ASSERT_EQ(keys[0], "aaa_1");
|
|
2664
|
+
ASSERT_EQ(keys[1], "aaa_2");
|
|
2665
|
+
ASSERT_EQ(keys[2], "bbb_1");
|
|
2666
|
+
ASSERT_EQ(keys[3], "ccc_1");
|
|
2667
|
+
|
|
2668
|
+
// Seek across prefix boundary.
|
|
2669
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(base_ro));
|
|
2670
|
+
iter->Seek("aab");
|
|
2671
|
+
ASSERT_TRUE(iter->Valid());
|
|
2672
|
+
ASSERT_EQ(iter->key().ToString(), "bbb_1");
|
|
2673
|
+
ASSERT_OK(iter->status());
|
|
2674
|
+
}
|
|
2675
|
+
|
|
2676
|
+
// auto_prefix_mode: let RocksDB decide per-seek.
|
|
2677
|
+
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
2678
|
+
SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard");
|
|
2679
|
+
base_ro.auto_prefix_mode = true;
|
|
2680
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(base_ro));
|
|
2681
|
+
iter->Seek("bbb_1");
|
|
2682
|
+
ASSERT_TRUE(iter->Valid());
|
|
2683
|
+
ASSERT_EQ(iter->key().ToString(), "bbb_1");
|
|
2684
|
+
ASSERT_OK(iter->status());
|
|
2685
|
+
}
|
|
2686
|
+
}
|
|
2687
|
+
|
|
2688
|
+
// ============================================================================
|
|
2689
|
+
// Multi-level SST + DeleteRange randomized test
|
|
2690
|
+
//
|
|
2691
|
+
// Historically bug-prone area: range tombstones interact with data across
|
|
2692
|
+
// LSM levels (L0, L1, L2+), and the trie index must correctly handle
|
|
2693
|
+
// seek/scan when blocks are partially or entirely covered by range deletions
|
|
2694
|
+
// at different levels.
|
|
2695
|
+
//
|
|
2696
|
+
// Strategy:
|
|
2697
|
+
// 1. Populate bottommost level with baseline data (flush + compact)
|
|
2698
|
+
// 2. Write overlapping data and DeleteRanges to L0 (multiple rounds)
|
|
2699
|
+
// 3. Partial compactions to create data at intermediate levels
|
|
2700
|
+
// 4. Verify reads match between standard and trie index after each mutation
|
|
2701
|
+
// 5. Snapshot before large DeleteRange, verify snapshot preserves state
|
|
2702
|
+
// 6. Re-insert into deleted ranges, compact, and re-verify
|
|
2703
|
+
// ============================================================================
|
|
2704
|
+
TEST_F(TrieIndexDBTest, MultiLevelDeleteRangeRandomized) {
|
|
2705
|
+
uint32_t seed = static_cast<uint32_t>(
|
|
2706
|
+
std::chrono::system_clock::now().time_since_epoch().count());
|
|
2707
|
+
SCOPED_TRACE("seed=" + std::to_string(seed));
|
|
2708
|
+
Random rnd(seed);
|
|
2709
|
+
|
|
2710
|
+
options_.disable_auto_compactions = true;
|
|
2711
|
+
// Small block size forces many data blocks (and thus many trie entries).
|
|
2712
|
+
ASSERT_OK(OpenDB(/*block_size=*/256));
|
|
2713
|
+
|
|
2714
|
+
const int kMaxKey = 500;
|
|
2715
|
+
|
|
2716
|
+
auto format_key = [](int k) {
|
|
2717
|
+
char buf[16];
|
|
2718
|
+
snprintf(buf, sizeof(buf), "key_%05d", k);
|
|
2719
|
+
return std::string(buf);
|
|
2720
|
+
};
|
|
2721
|
+
|
|
2722
|
+
// Core correctness check: forward scan via both indexes must match.
|
|
2723
|
+
auto verify_scan_consistency = [&]() {
|
|
2724
|
+
auto standard_kvs = ScanAllKeyValues(StandardIndexReadOptions());
|
|
2725
|
+
auto trie_kvs = ScanAllKeyValues(TrieIndexReadOptions());
|
|
2726
|
+
ASSERT_EQ(standard_kvs, trie_kvs)
|
|
2727
|
+
<< "Scan mismatch: standard=" << standard_kvs.size()
|
|
2728
|
+
<< " trie=" << trie_kvs.size();
|
|
2729
|
+
};
|
|
2730
|
+
|
|
2731
|
+
// Phase 1: Populate bottommost level with baseline data.
|
|
2732
|
+
for (int i = 0; i < 200; i++) {
|
|
2733
|
+
int k = rnd.Uniform(kMaxKey);
|
|
2734
|
+
ASSERT_OK(db_->Put(WriteOptions(), format_key(k),
|
|
2735
|
+
"base_" + rnd.RandomString(20)));
|
|
2736
|
+
}
|
|
2737
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2738
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
2739
|
+
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
2740
|
+
|
|
2741
|
+
// Phase 2: Write overlapping data + DeleteRanges across multiple rounds.
|
|
2742
|
+
// Each round creates L0 SSTs with a mix of Puts and DeleteRanges,
|
|
2743
|
+
// with occasional partial compactions to push data to intermediate levels.
|
|
2744
|
+
for (int round = 0; round < 5; round++) {
|
|
2745
|
+
SCOPED_TRACE("round=" + std::to_string(round));
|
|
2746
|
+
|
|
2747
|
+
// Write some new/updated keys.
|
|
2748
|
+
int num_writes = 30 + rnd.Uniform(70);
|
|
2749
|
+
for (int i = 0; i < num_writes; i++) {
|
|
2750
|
+
int k = rnd.Uniform(kMaxKey);
|
|
2751
|
+
ASSERT_OK(
|
|
2752
|
+
db_->Put(WriteOptions(), format_key(k),
|
|
2753
|
+
"r" + std::to_string(round) + "_" + rnd.RandomString(15)));
|
|
2754
|
+
}
|
|
2755
|
+
|
|
2756
|
+
// Issue 1-3 random DeleteRanges per round.
|
|
2757
|
+
int num_ranges = 1 + rnd.Uniform(3);
|
|
2758
|
+
for (int r = 0; r < num_ranges; r++) {
|
|
2759
|
+
int range_start = rnd.Uniform(kMaxKey - 10);
|
|
2760
|
+
int range_end = range_start + 5 + rnd.Uniform(50);
|
|
2761
|
+
if (range_end > kMaxKey) {
|
|
2762
|
+
range_end = kMaxKey;
|
|
2763
|
+
}
|
|
2764
|
+
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
|
|
2765
|
+
format_key(range_start),
|
|
2766
|
+
format_key(range_end)));
|
|
2767
|
+
}
|
|
2768
|
+
|
|
2769
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2770
|
+
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
2771
|
+
|
|
2772
|
+
// On odd rounds, do a partial compaction to push some data down,
|
|
2773
|
+
// creating a multi-level structure where range tombstones at L0
|
|
2774
|
+
// must shadow data at L1/L2.
|
|
2775
|
+
if (round % 2 == 1) {
|
|
2776
|
+
int compact_start = rnd.Uniform(kMaxKey / 2);
|
|
2777
|
+
int compact_end = compact_start + kMaxKey / 4;
|
|
2778
|
+
std::string start_key = format_key(compact_start);
|
|
2779
|
+
std::string end_key = format_key(compact_end);
|
|
2780
|
+
Slice s(start_key);
|
|
2781
|
+
Slice e(end_key);
|
|
2782
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &s, &e));
|
|
2783
|
+
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
2784
|
+
}
|
|
2785
|
+
}
|
|
2786
|
+
|
|
2787
|
+
// Phase 3: Snapshot, then delete a large range. The snapshot must
|
|
2788
|
+
// preserve the pre-deletion state while current reads see the deletion.
|
|
2789
|
+
const Snapshot* snap = db_->GetSnapshot();
|
|
2790
|
+
auto snap_kvs = ScanAllKeyValues(StandardIndexReadOptions());
|
|
2791
|
+
|
|
2792
|
+
int big_start = rnd.Uniform(kMaxKey / 4);
|
|
2793
|
+
int big_end = big_start + kMaxKey / 3;
|
|
2794
|
+
if (big_end > kMaxKey) {
|
|
2795
|
+
big_end = kMaxKey;
|
|
2796
|
+
}
|
|
2797
|
+
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
|
|
2798
|
+
format_key(big_start), format_key(big_end)));
|
|
2799
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2800
|
+
|
|
2801
|
+
// Current state should reflect the deletion.
|
|
2802
|
+
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
2803
|
+
|
|
2804
|
+
// Snapshot state should be unchanged.
|
|
2805
|
+
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(snap, snap_kvs));
|
|
2806
|
+
|
|
2807
|
+
db_->ReleaseSnapshot(snap);
|
|
2808
|
+
|
|
2809
|
+
// Phase 4: Re-insert keys into the deleted range, creating a pattern
|
|
2810
|
+
// where range tombstones and live data coexist at different levels.
|
|
2811
|
+
for (int i = big_start; i < big_end && i < kMaxKey; i += 3) {
|
|
2812
|
+
ASSERT_OK(db_->Put(WriteOptions(), format_key(i),
|
|
2813
|
+
"reinserted_" + rnd.RandomString(10)));
|
|
2814
|
+
}
|
|
2815
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
2816
|
+
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
2817
|
+
|
|
2818
|
+
// Phase 5: Full compaction — all range tombstones should be resolved.
|
|
2819
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
2820
|
+
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
2821
|
+
|
|
2822
|
+
// Phase 6: Point lookups for a sample of keys — both indexes must agree.
|
|
2823
|
+
for (int i = 0; i < kMaxKey; i += 7) {
|
|
2824
|
+
std::string key = format_key(i);
|
|
2825
|
+
std::string std_val;
|
|
2826
|
+
std::string trie_val;
|
|
2827
|
+
Status s1 = db_->Get(StandardIndexReadOptions(), key, &std_val);
|
|
2828
|
+
Status s2 = db_->Get(TrieIndexReadOptions(), key, &trie_val);
|
|
2829
|
+
ASSERT_EQ(s1.code(), s2.code()) << "Status mismatch for " << key;
|
|
2830
|
+
if (s1.ok()) {
|
|
2831
|
+
ASSERT_EQ(std_val, trie_val) << "Value mismatch for " << key;
|
|
2832
|
+
}
|
|
2833
|
+
}
|
|
2834
|
+
}
|
|
2835
|
+
|
|
2836
|
+
} // namespace trie_index
|
|
2837
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
2838
|
+
|
|
2839
|
+
int main(int argc, char** argv) {
|
|
2840
|
+
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
2841
|
+
::testing::InitGoogleTest(&argc, argv);
|
|
2842
|
+
return RUN_ALL_TESTS();
|
|
2843
|
+
}
|