@nxtedition/rocksdb 15.4.1 → 15.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +24 -15
- package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
- package/deps/rocksdb/rocksdb/BUCK +42 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
- package/deps/rocksdb/rocksdb/Makefile +59 -32
- package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
- package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
- package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
- package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
- package/deps/rocksdb/rocksdb/db/builder.h +7 -0
- package/deps/rocksdb/rocksdb/db/c.cc +373 -57
- package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
- package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
- package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
- package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
- package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
- package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
- package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
- package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
- package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
- package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
- package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
- package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
- package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
- package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
- package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
- package/deps/rocksdb/rocksdb/env/env.cc +1 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
- package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
- package/deps/rocksdb/rocksdb/folly.mk +22 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
- package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
- package/deps/rocksdb/rocksdb/options/options.cc +5 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
- package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
- package/deps/rocksdb/rocksdb/port/lang.h +4 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
- package/deps/rocksdb/rocksdb/src.mk +12 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
- package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
- package/deps/rocksdb/rocksdb/table/format.cc +27 -15
- package/deps/rocksdb/rocksdb/table/format.h +41 -15
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
- package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
- package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
- package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
- package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
- package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
- package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
- package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
- package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
- package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
- package/deps/rocksdb/rocksdb/util/coding.h +14 -27
- package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
- package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
- package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
- package/deps/rocksdb/rocksdb/util/math.h +3 -1
- package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
- package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
- package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
- package/deps/rocksdb/rocksdb/util/status.cc +3 -1
- package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
- package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
- package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
- package/deps/rocksdb/rocksdb.gyp +7 -0
- package/iterator.js +2 -2
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
|
@@ -40,8 +40,11 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
|
|
|
40
40
|
if (!multi_scan_status_.ok()) {
|
|
41
41
|
return;
|
|
42
42
|
}
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
|
|
44
|
+
// MultiScan requires an explicit seek key — SeekToFirst() is not supported
|
|
45
|
+
if (multi_scan_read_set_ && !target) {
|
|
46
|
+
multi_scan_status_ = Status::InvalidArgument("No seek key for MultiScan");
|
|
47
|
+
RecordTick(table_->GetStatistics(), MULTISCAN_SEEK_ERRORS);
|
|
45
48
|
return;
|
|
46
49
|
}
|
|
47
50
|
|
|
@@ -67,7 +70,7 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
|
|
|
67
70
|
read_options_.auto_readahead_size &&
|
|
68
71
|
(read_options_.iterate_upper_bound || read_options_.prefix_same_as_start);
|
|
69
72
|
|
|
70
|
-
if (autotune_readaheadsize &&
|
|
73
|
+
if (autotune_readaheadsize && !multi_scan_read_set_ &&
|
|
71
74
|
table_->get_rep()->table_options.block_cache.get() &&
|
|
72
75
|
direction_ == IterDirection::kForward) {
|
|
73
76
|
readahead_cache_lookup_ = true;
|
|
@@ -97,8 +100,10 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
|
|
|
97
100
|
// In case of readahead_cache_lookup_, index_iter_ could change to find the
|
|
98
101
|
// readahead size in BlockCacheLookupForReadAheadSize so it needs to
|
|
99
102
|
// reseek.
|
|
100
|
-
|
|
101
|
-
|
|
103
|
+
// MultiScan must always go through index_iter_->Seek() so that
|
|
104
|
+
// MultiScanIndexIterator can update its scan range tracking state.
|
|
105
|
+
if (!multi_scan_read_set_ && IsIndexAtCurr() &&
|
|
106
|
+
block_iter_points_to_real_block_ && block_iter_.Valid()) {
|
|
102
107
|
// Reseek.
|
|
103
108
|
prev_block_offset_ = index_iter_->value().handle.offset();
|
|
104
109
|
|
|
@@ -152,7 +157,7 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
|
|
|
152
157
|
} else {
|
|
153
158
|
// Need to use the data block.
|
|
154
159
|
if (!same_block) {
|
|
155
|
-
if (read_options_.async_io && async_prefetch) {
|
|
160
|
+
if (read_options_.async_io && async_prefetch && !multi_scan_read_set_) {
|
|
156
161
|
AsyncInitDataBlock(/*is_first_pass=*/true);
|
|
157
162
|
if (async_read_in_progress_) {
|
|
158
163
|
// Status::TryAgain indicates asynchronous request for retrieval of
|
|
@@ -163,6 +168,10 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
|
|
|
163
168
|
}
|
|
164
169
|
} else {
|
|
165
170
|
InitDataBlock();
|
|
171
|
+
if (multi_scan_read_set_ && !block_iter_points_to_real_block_) {
|
|
172
|
+
// MultiScan InitDataBlock failed (e.g., prefetch limit or IO error)
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
166
175
|
}
|
|
167
176
|
} else {
|
|
168
177
|
// When the user does a reseek, the iterate_upper_bound might have
|
|
@@ -184,12 +193,19 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
|
|
|
184
193
|
CheckOutOfBound();
|
|
185
194
|
|
|
186
195
|
if (target) {
|
|
187
|
-
|
|
196
|
+
// MultiScan uses user-key separators in its index, so after a reseek
|
|
197
|
+
// with the same user key but a different sequence number (e.g., from
|
|
198
|
+
// max_sequential_skip_in_iterations), the data block entry may appear
|
|
199
|
+
// "before" the target in internal key order. The user-key invariant
|
|
200
|
+
// still holds and the iteration is correct because DBIter will skip
|
|
201
|
+
// remaining same-user-key entries.
|
|
202
|
+
assert(multi_scan_read_set_ || !Valid() ||
|
|
203
|
+
icomp_.Compare(*target, key()) <= 0);
|
|
188
204
|
}
|
|
189
205
|
}
|
|
190
206
|
|
|
191
207
|
void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
|
|
192
|
-
|
|
208
|
+
ResetMultiScan();
|
|
193
209
|
direction_ = IterDirection::kBackward;
|
|
194
210
|
ResetBlockCacheLookupVar();
|
|
195
211
|
is_out_of_bound_ = false;
|
|
@@ -264,7 +280,7 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
|
|
|
264
280
|
}
|
|
265
281
|
|
|
266
282
|
void BlockBasedTableIterator::SeekToLast() {
|
|
267
|
-
|
|
283
|
+
ResetMultiScan();
|
|
268
284
|
direction_ = IterDirection::kBackward;
|
|
269
285
|
ResetBlockCacheLookupVar();
|
|
270
286
|
is_out_of_bound_ = false;
|
|
@@ -290,7 +306,7 @@ void BlockBasedTableIterator::SeekToLast() {
|
|
|
290
306
|
void BlockBasedTableIterator::Next() {
|
|
291
307
|
assert(Valid());
|
|
292
308
|
if (is_at_first_key_from_index_ && !MaterializeCurrentBlock()) {
|
|
293
|
-
assert(!
|
|
309
|
+
assert(!multi_scan_read_set_);
|
|
294
310
|
return;
|
|
295
311
|
}
|
|
296
312
|
assert(block_iter_points_to_real_block_);
|
|
@@ -311,9 +327,8 @@ bool BlockBasedTableIterator::NextAndGetResult(IterateResult* result) {
|
|
|
311
327
|
}
|
|
312
328
|
|
|
313
329
|
void BlockBasedTableIterator::Prev() {
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
multi_scan_.reset();
|
|
330
|
+
if ((readahead_cache_lookup_ && !IsIndexAtCurr()) || multi_scan_read_set_) {
|
|
331
|
+
ResetMultiScan();
|
|
317
332
|
// In case of readahead_cache_lookup_, index_iter_ has moved forward. So we
|
|
318
333
|
// need to reseek the index_iter_ to point to current block by using
|
|
319
334
|
// block_iter_'s key.
|
|
@@ -358,6 +373,41 @@ void BlockBasedTableIterator::Prev() {
|
|
|
358
373
|
}
|
|
359
374
|
|
|
360
375
|
void BlockBasedTableIterator::InitDataBlock() {
|
|
376
|
+
// MultiScan path: load block from ReadSet
|
|
377
|
+
if (multi_scan_read_set_) {
|
|
378
|
+
BlockHandle data_block_handle = index_iter_->value().handle;
|
|
379
|
+
if (!block_iter_points_to_real_block_ ||
|
|
380
|
+
data_block_handle.offset() != prev_block_offset_) {
|
|
381
|
+
if (block_iter_points_to_real_block_) {
|
|
382
|
+
ResetDataIter();
|
|
383
|
+
}
|
|
384
|
+
size_t rs_idx = multi_scan_index_iter_->current_read_set_index();
|
|
385
|
+
if (rs_idx >= prefetch_max_idx_) {
|
|
386
|
+
if (multi_scan_index_iter_->GetMaxPrefetchSize() == 0) {
|
|
387
|
+
// max_prefetch_size is not set, treat as end of file
|
|
388
|
+
return;
|
|
389
|
+
} else {
|
|
390
|
+
// max_prefetch_size is set, treat as error
|
|
391
|
+
multi_scan_status_ = Status::PrefetchLimitReached();
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
CachableEntry<Block> block_entry;
|
|
396
|
+
multi_scan_status_ =
|
|
397
|
+
multi_scan_read_set_->ReadIndex(rs_idx, &block_entry);
|
|
398
|
+
if (!multi_scan_status_.ok()) {
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
401
|
+
table_->NewDataBlockIterator<DataBlockIter>(read_options_, block_entry,
|
|
402
|
+
&block_iter_, Status::OK());
|
|
403
|
+
block_iter_points_to_real_block_ = true;
|
|
404
|
+
prev_block_offset_ = data_block_handle.offset();
|
|
405
|
+
CheckDataBlockWithinUpperBound();
|
|
406
|
+
}
|
|
407
|
+
return;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// Regular path
|
|
361
411
|
BlockHandle data_block_handle;
|
|
362
412
|
bool is_in_cache = false;
|
|
363
413
|
bool use_block_cache_for_lookup = true;
|
|
@@ -580,10 +630,6 @@ void BlockBasedTableIterator::FindKeyForward() {
|
|
|
580
630
|
}
|
|
581
631
|
|
|
582
632
|
void BlockBasedTableIterator::FindBlockForward() {
|
|
583
|
-
if (multi_scan_) {
|
|
584
|
-
FindBlockForwardInMultiScan();
|
|
585
|
-
return;
|
|
586
|
-
}
|
|
587
633
|
// TODO the while loop inherits from two-level-iterator. We don't know
|
|
588
634
|
// whether a block can be empty so it can be replaced by an "if".
|
|
589
635
|
do {
|
|
@@ -594,8 +640,14 @@ void BlockBasedTableIterator::FindBlockForward() {
|
|
|
594
640
|
// index_iter_ can point to different block in case of
|
|
595
641
|
// readahead_cache_lookup_. readahead_cache_lookup_ will be handle the
|
|
596
642
|
// upper_bound check.
|
|
643
|
+
// MultiScan handles scan range boundaries via IsScanRangeExhausted()
|
|
644
|
+
// after index_iter_->Next(), so we must not use the
|
|
645
|
+
// next_block_is_out_of_bound mechanism which can prematurely terminate
|
|
646
|
+
// a scan range when the block separator >= iterate_upper_bound but
|
|
647
|
+
// valid keys still remain in the current range's blocks.
|
|
597
648
|
bool next_block_is_out_of_bound =
|
|
598
|
-
IsIndexAtCurr() &&
|
|
649
|
+
!multi_scan_read_set_ && IsIndexAtCurr() &&
|
|
650
|
+
read_options_.iterate_upper_bound != nullptr &&
|
|
599
651
|
block_iter_points_to_real_block_ &&
|
|
600
652
|
block_upper_bound_check_ == BlockUpperBound::kUpperBoundInCurBlock;
|
|
601
653
|
|
|
@@ -627,6 +679,18 @@ void BlockBasedTableIterator::FindBlockForward() {
|
|
|
627
679
|
next_block_is_out_of_bound = is_index_out_of_bound_;
|
|
628
680
|
is_index_out_of_bound_ = false;
|
|
629
681
|
}
|
|
682
|
+
// MultiScan: detect scan range boundary after Next()
|
|
683
|
+
if (multi_scan_index_iter_ &&
|
|
684
|
+
multi_scan_index_iter_->IsScanRangeExhausted()) {
|
|
685
|
+
if (multi_scan_index_iter_->HasMoreScanRanges()) {
|
|
686
|
+
// More ranges remain — signal out-of-bound so DBIter/LevelIter
|
|
687
|
+
// will trigger the next Seek for the next scan range.
|
|
688
|
+
is_out_of_bound_ = true;
|
|
689
|
+
}
|
|
690
|
+
// For last range: index_iter_->Valid() is false, so we fall
|
|
691
|
+
// through to the !Valid() return below. LevelIterator advances.
|
|
692
|
+
return;
|
|
693
|
+
}
|
|
630
694
|
} else {
|
|
631
695
|
// Skip Next as index_iter_ already points to correct index when it
|
|
632
696
|
// iterates in BlockCacheLookupForReadAheadSize.
|
|
@@ -658,6 +722,10 @@ void BlockBasedTableIterator::FindBlockForward() {
|
|
|
658
722
|
}
|
|
659
723
|
}
|
|
660
724
|
InitDataBlock();
|
|
725
|
+
if (multi_scan_read_set_ && !block_iter_points_to_real_block_) {
|
|
726
|
+
// MultiScan InitDataBlock failed (prefetch limit or IO error)
|
|
727
|
+
return;
|
|
728
|
+
}
|
|
661
729
|
block_iter_.SeekToFirst();
|
|
662
730
|
} while (!block_iter_.Valid());
|
|
663
731
|
}
|
|
@@ -767,7 +835,7 @@ void BlockBasedTableIterator::InitializeStartAndEndOffsets(
|
|
|
767
835
|
// It can be when Reseek is from block cache (which doesn't clear the
|
|
768
836
|
// buffers in FilePrefetchBuffer but clears block handles from queue) and
|
|
769
837
|
// reseek also lies within the buffer. So Next will get data from
|
|
770
|
-
//
|
|
838
|
+
// existing buffers until this callback is made to prefetch additional
|
|
771
839
|
// data. All handles need to be added to the queue starting from
|
|
772
840
|
// index_iter_.
|
|
773
841
|
assert(index_iter_->Valid());
|
|
@@ -919,42 +987,6 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize(
|
|
|
919
987
|
ResetPreviousBlockOffset();
|
|
920
988
|
}
|
|
921
989
|
|
|
922
|
-
BlockBasedTableIterator::MultiScanState::~MultiScanState() {
|
|
923
|
-
// Abort any pending async IO operations to prevent callback being called
|
|
924
|
-
// after async read states are destructed.
|
|
925
|
-
if (!async_states.empty()) {
|
|
926
|
-
std::vector<void*> io_handles_to_abort;
|
|
927
|
-
std::vector<AsyncReadState*> states_to_cleanup;
|
|
928
|
-
|
|
929
|
-
// Collect all pending IO handles
|
|
930
|
-
for (size_t i = 0; i < async_states.size(); ++i) {
|
|
931
|
-
auto& async_read = async_states[i];
|
|
932
|
-
|
|
933
|
-
if (async_read.io_handle != nullptr) {
|
|
934
|
-
assert(!async_read.finished);
|
|
935
|
-
io_handles_to_abort.push_back(async_read.io_handle);
|
|
936
|
-
states_to_cleanup.push_back(&async_read);
|
|
937
|
-
}
|
|
938
|
-
}
|
|
939
|
-
|
|
940
|
-
if (!io_handles_to_abort.empty()) {
|
|
941
|
-
IOStatus abort_status = fs->AbortIO(io_handles_to_abort);
|
|
942
|
-
if (!abort_status.ok()) {
|
|
943
|
-
#ifndef NDEBUG
|
|
944
|
-
fprintf(stderr, "Error aborting async IO operations: %s\n",
|
|
945
|
-
abort_status.ToString().c_str());
|
|
946
|
-
#endif
|
|
947
|
-
assert(false);
|
|
948
|
-
}
|
|
949
|
-
(void)abort_status; // Suppress unused variable warning
|
|
950
|
-
}
|
|
951
|
-
|
|
952
|
-
for (auto async_read : states_to_cleanup) {
|
|
953
|
-
async_read->CleanUpIOHandle();
|
|
954
|
-
}
|
|
955
|
-
}
|
|
956
|
-
}
|
|
957
|
-
|
|
958
990
|
// Note:
|
|
959
991
|
// - Iterator should not be reused for multiple multiscans or mixing
|
|
960
992
|
// multiscan with regular iterator usage.
|
|
@@ -977,14 +1009,20 @@ BlockBasedTableIterator::MultiScanState::~MultiScanState() {
|
|
|
977
1009
|
// end key. These Seeks will be handled properly, as long as the target is
|
|
978
1010
|
// moving forward.
|
|
979
1011
|
void BlockBasedTableIterator::Prepare(const MultiScanArgs* multiscan_opts) {
|
|
980
|
-
assert(!
|
|
1012
|
+
assert(!multi_scan_read_set_);
|
|
1013
|
+
RecordTick(table_->GetStatistics(), MULTISCAN_PREPARE_CALLS);
|
|
1014
|
+
StopWatch sw(table_->get_rep()->ioptions.clock, table_->GetStatistics(),
|
|
1015
|
+
MULTISCAN_PREPARE_MICROS);
|
|
1016
|
+
|
|
981
1017
|
if (!index_iter_->status().ok()) {
|
|
982
1018
|
multi_scan_status_ = index_iter_->status();
|
|
1019
|
+
RecordTick(table_->GetStatistics(), MULTISCAN_PREPARE_ERRORS);
|
|
983
1020
|
return;
|
|
984
1021
|
}
|
|
985
|
-
if (
|
|
986
|
-
|
|
1022
|
+
if (multi_scan_read_set_) {
|
|
1023
|
+
multi_scan_read_set_.reset();
|
|
987
1024
|
multi_scan_status_ = Status::InvalidArgument("Prepare already called");
|
|
1025
|
+
RecordTick(table_->GetStatistics(), MULTISCAN_PREPARE_ERRORS);
|
|
988
1026
|
return;
|
|
989
1027
|
}
|
|
990
1028
|
|
|
@@ -998,457 +1036,73 @@ void BlockBasedTableIterator::Prepare(const MultiScanArgs* multiscan_opts) {
|
|
|
998
1036
|
CollectBlockHandles(scan_opts, &scan_block_handles,
|
|
999
1037
|
&block_index_ranges_per_scan, &data_block_separators);
|
|
1000
1038
|
if (!multi_scan_status_.ok()) {
|
|
1039
|
+
RecordTick(table_->GetStatistics(), MULTISCAN_PREPARE_ERRORS);
|
|
1001
1040
|
return;
|
|
1002
1041
|
}
|
|
1003
1042
|
|
|
1004
|
-
//
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
}
|
|
1015
|
-
|
|
1016
|
-
std::vector<AsyncReadState> async_states;
|
|
1017
|
-
// Maps from block index into async read request (index into async_states[])
|
|
1018
|
-
UnorderedMap<size_t, size_t> block_idx_to_readreq_idx;
|
|
1019
|
-
if (!block_indices_to_read.empty()) {
|
|
1020
|
-
std::vector<FSReadRequest> read_reqs;
|
|
1021
|
-
std::vector<std::vector<size_t>> coalesced_block_indices;
|
|
1022
|
-
PrepareIORequests(block_indices_to_read, scan_block_handles, multiscan_opts,
|
|
1023
|
-
&read_reqs, &block_idx_to_readreq_idx,
|
|
1024
|
-
&coalesced_block_indices);
|
|
1025
|
-
|
|
1026
|
-
multi_scan_status_ =
|
|
1027
|
-
ExecuteIO(scan_block_handles, multiscan_opts, coalesced_block_indices,
|
|
1028
|
-
&read_reqs, &async_states, &pinned_data_blocks_guard);
|
|
1029
|
-
if (!multi_scan_status_.ok()) {
|
|
1030
|
-
return;
|
|
1031
|
-
}
|
|
1032
|
-
}
|
|
1033
|
-
|
|
1034
|
-
// Successful Prepare, init related states so the iterator reads from prepared
|
|
1035
|
-
// blocks.
|
|
1036
|
-
multi_scan_ = std::make_unique<MultiScanState>(
|
|
1037
|
-
table_->get_rep()->ioptions.env->GetFileSystem(), multiscan_opts,
|
|
1038
|
-
std::move(pinned_data_blocks_guard), std::move(data_block_separators),
|
|
1039
|
-
std::move(block_index_ranges_per_scan),
|
|
1040
|
-
std::move(block_idx_to_readreq_idx), std::move(async_states),
|
|
1041
|
-
prefetched_max_idx);
|
|
1042
|
-
|
|
1043
|
-
is_index_at_curr_block_ = false;
|
|
1044
|
-
block_iter_points_to_real_block_ = false;
|
|
1045
|
-
}
|
|
1046
|
-
|
|
1047
|
-
void BlockBasedTableIterator::SeekMultiScan(const Slice* seek_target) {
|
|
1048
|
-
assert(multi_scan_ && multi_scan_status_.ok());
|
|
1049
|
-
// This is a MultiScan and Preapre() has been called.
|
|
1050
|
-
|
|
1051
|
-
// Reset out of bound on seek, if it is out of bound again, it will be set
|
|
1052
|
-
// properly later in the code path
|
|
1053
|
-
is_out_of_bound_ = false;
|
|
1054
|
-
|
|
1055
|
-
// Validate seek key with scan options
|
|
1056
|
-
if (!seek_target) {
|
|
1057
|
-
// start key must be set for multi-scan
|
|
1058
|
-
multi_scan_status_ = Status::InvalidArgument("No seek key for MultiScan");
|
|
1059
|
-
return;
|
|
1060
|
-
}
|
|
1061
|
-
|
|
1062
|
-
// Check the case where there is no range prepared on this table
|
|
1063
|
-
if (multi_scan_->scan_opts->size() == 0) {
|
|
1064
|
-
// out of bound
|
|
1065
|
-
MarkPreparedRangeExhausted();
|
|
1066
|
-
return;
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
// Check whether seek key is moving forward.
|
|
1070
|
-
if (multi_scan_->prev_seek_key_.empty() ||
|
|
1071
|
-
icomp_.Compare(*seek_target, multi_scan_->prev_seek_key_) > 0) {
|
|
1072
|
-
// If seek key is empty or is larger than previous seek key, update the
|
|
1073
|
-
// previous seek key. Otherwise use the previous seek key as the adjusted
|
|
1074
|
-
// seek target moving forward. This prevents seek target going backward,
|
|
1075
|
-
// which would visit pages that have been unpinned.
|
|
1076
|
-
// This issue is caused by sub-optimal range delete handling inside merge
|
|
1077
|
-
// iterator.
|
|
1078
|
-
// TODO xingbo issues:14068 : Optimize the handling of range delete iterator
|
|
1079
|
-
// inside merge iterator, so that it doesn't move seek key backward. After
|
|
1080
|
-
// that we could return error if the key moves backward here.
|
|
1081
|
-
multi_scan_->prev_seek_key_ = seek_target->ToString();
|
|
1082
|
-
} else {
|
|
1083
|
-
// Seek key is adjusted to previous one, we can return here directly.
|
|
1084
|
-
return;
|
|
1085
|
-
}
|
|
1086
|
-
|
|
1087
|
-
// There are 3 different Cases we need to handle:
|
|
1088
|
-
// The following diagram explain different seek targets seeking at various
|
|
1089
|
-
// position on the table, while the next_scan_idx points to the PreparedRange
|
|
1090
|
-
// 2.
|
|
1091
|
-
//
|
|
1092
|
-
// next_scan_idx: -------------------┐
|
|
1093
|
-
// ▼
|
|
1094
|
-
// table: : __[PreparedRange 1]__[PreparedRange 2]__[PreparedRange 3]__
|
|
1095
|
-
// Seek target: <----- Case 1 ------>▲<------------- Case 2 -------------->
|
|
1096
|
-
// │
|
|
1097
|
-
// Case 3
|
|
1098
|
-
//
|
|
1099
|
-
// Case 1: seek before the start of next prepared ranges. This could happen
|
|
1100
|
-
// due to too many delete tomestone triggered reseek or delete range.
|
|
1101
|
-
// Case 2: seek after the start of next prepared range.
|
|
1102
|
-
// This could happen due to seek key adjustment from delete range file.
|
|
1103
|
-
// E.g. LSM has 3 levels, each level has only 1 file:
|
|
1104
|
-
// L1 : key : 0---10
|
|
1105
|
-
// L2 : Delete range key : 0-5
|
|
1106
|
-
// L3 : key : 0---10
|
|
1107
|
-
// When a range 2-8 was prepared, the prepared key would be 2 on L3 file,
|
|
1108
|
-
// but the seek key would be 5, as the seek key was updated by the largest
|
|
1109
|
-
// key of delete range. This causes all of the cases above to be possible,
|
|
1110
|
-
// when the ranges are adjusted in the above examples.
|
|
1111
|
-
// Case 3: seek at the beginning of a prepared range (expected case)
|
|
1112
|
-
|
|
1113
|
-
// Allow reseek on the start of the last prepared range due to too many
|
|
1114
|
-
// tombstone
|
|
1115
|
-
multi_scan_->next_scan_idx =
|
|
1116
|
-
std::min(multi_scan_->next_scan_idx,
|
|
1117
|
-
multi_scan_->block_index_ranges_per_scan.size() - 1);
|
|
1118
|
-
|
|
1119
|
-
auto user_seek_target = ExtractUserKey(*seek_target);
|
|
1120
|
-
|
|
1121
|
-
auto compare_next_scan_start_result =
|
|
1122
|
-
user_comparator_.CompareWithoutTimestamp(
|
|
1123
|
-
user_seek_target, /*a_has_ts=*/true,
|
|
1124
|
-
multi_scan_->scan_opts->GetScanRanges()[multi_scan_->next_scan_idx]
|
|
1125
|
-
.range.start.value(),
|
|
1126
|
-
/*b_has_ts=*/false);
|
|
1127
|
-
|
|
1128
|
-
if (compare_next_scan_start_result != 0) {
|
|
1129
|
-
// The seek target is not exactly same as what was prepared.
|
|
1130
|
-
if (compare_next_scan_start_result < 0) {
|
|
1131
|
-
// Case 1:
|
|
1132
|
-
if (multi_scan_->next_scan_idx == 0) {
|
|
1133
|
-
// This should not happen, even when seek target is adjusted by delete
|
|
1134
|
-
// range. The reason is that if the seek target is before the start key
|
|
1135
|
-
// of the first prepared range, its end key needs to be >= the smallest
|
|
1136
|
-
// key of this file, otherwise it is skipped in level iterator. If its
|
|
1137
|
-
// end key is >= the smallest key of this file, then this range will be
|
|
1138
|
-
// prepared for this file. As delete range could only adjust seek
|
|
1139
|
-
// target forward, so it would never be before the start key of the
|
|
1140
|
-
// first prepared range.
|
|
1141
|
-
assert(false && "Seek target before the first prepared range");
|
|
1142
|
-
MarkPreparedRangeExhausted();
|
|
1143
|
-
return;
|
|
1043
|
+
// Calculate prefetch_max_idx (enforces max_prefetch_size)
|
|
1044
|
+
size_t prefetch_max_idx = scan_block_handles.size();
|
|
1045
|
+
if (multiscan_opts->max_prefetch_size > 0) {
|
|
1046
|
+
uint64_t total_size = 0;
|
|
1047
|
+
for (size_t i = 0; i < scan_block_handles.size(); ++i) {
|
|
1048
|
+
total_size +=
|
|
1049
|
+
BlockBasedTable::BlockSizeWithTrailer(scan_block_handles[i]);
|
|
1050
|
+
if (total_size > multiscan_opts->max_prefetch_size) {
|
|
1051
|
+
prefetch_max_idx = i;
|
|
1052
|
+
break;
|
|
1144
1053
|
}
|
|
1145
|
-
auto seek_target_before_previous_prepared_range =
|
|
1146
|
-
user_comparator_.CompareWithoutTimestamp(
|
|
1147
|
-
user_seek_target, /*a_has_ts=*/true,
|
|
1148
|
-
multi_scan_->scan_opts
|
|
1149
|
-
->GetScanRanges()[multi_scan_->next_scan_idx - 1]
|
|
1150
|
-
.range.start.value(),
|
|
1151
|
-
/*b_has_ts=*/false) < 0;
|
|
1152
|
-
// Not expected to happen
|
|
1153
|
-
// This should never happen, the reason is that the
|
|
1154
|
-
// multi_scan_->next_scan_idx is set to a non zero value is due to a seek
|
|
1155
|
-
// target larger or equal to the start key of multi_scan_->next_scan_idx-1
|
|
1156
|
-
// happended earlier. If a seek happens before the start key of
|
|
1157
|
-
// multi_scan_->next_scan_idx-1, it would seek a key that is less than
|
|
1158
|
-
// what was seeked before.
|
|
1159
|
-
assert(!seek_target_before_previous_prepared_range);
|
|
1160
|
-
if (seek_target_before_previous_prepared_range) {
|
|
1161
|
-
multi_scan_status_ = Status::InvalidArgument(
|
|
1162
|
-
"Seek target is before the previous prepared range at index " +
|
|
1163
|
-
std::to_string(multi_scan_->next_scan_idx));
|
|
1164
|
-
return;
|
|
1165
|
-
}
|
|
1166
|
-
// It should only be possible to seek a key between the start of current
|
|
1167
|
-
// prepared scan and start of next prepared range.
|
|
1168
|
-
MultiScanUnexpectedSeekTarget(seek_target, &user_seek_target);
|
|
1169
|
-
} else {
|
|
1170
|
-
// Case 2:
|
|
1171
|
-
MultiScanUnexpectedSeekTarget(seek_target, &user_seek_target);
|
|
1172
1054
|
}
|
|
1173
|
-
} else {
|
|
1174
|
-
// Case 2:
|
|
1175
|
-
assert(multi_scan_->next_scan_idx <
|
|
1176
|
-
multi_scan_->block_index_ranges_per_scan.size());
|
|
1177
|
-
|
|
1178
|
-
auto [cur_scan_start_idx, cur_scan_end_idx] =
|
|
1179
|
-
multi_scan_->block_index_ranges_per_scan[multi_scan_->next_scan_idx];
|
|
1180
|
-
// We should have the data block already loaded
|
|
1181
|
-
++multi_scan_->next_scan_idx;
|
|
1182
|
-
if (cur_scan_start_idx >= cur_scan_end_idx) {
|
|
1183
|
-
// No blocks are prepared for this range at current file.
|
|
1184
|
-
MarkPreparedRangeExhausted();
|
|
1185
|
-
return;
|
|
1186
|
-
}
|
|
1187
|
-
|
|
1188
|
-
// max_sequential_skip_in_iterations can trigger a reseek on the start
|
|
1189
|
-
// key of a scan range, even though the multiscan is already past
|
|
1190
|
-
// `cur_scan_start_idx` (e.g., a user key spans multiple data blocks).
|
|
1191
|
-
size_t block_idx =
|
|
1192
|
-
std::max(cur_scan_start_idx, multi_scan_->cur_data_block_idx);
|
|
1193
|
-
MultiScanSeekTargetFromBlock(seek_target, block_idx);
|
|
1194
1055
|
}
|
|
1195
|
-
}
|
|
1196
1056
|
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
// The logic here could be confusing when there is a delete range involved.
|
|
1203
|
-
// E.g. we have an LSM with 3 levels, each level has only 1 file:
|
|
1204
|
-
// L1: data file : 0---10
|
|
1205
|
-
// L2: Delete range : 0-5
|
|
1206
|
-
// L3: data file : 0---10
|
|
1207
|
-
//
|
|
1208
|
-
// MultiScan on ranges 1-2, 3-4, and 5-6.
|
|
1209
|
-
// When user first do Seek(1), on level 2, due to delete range 0-5, the seek
|
|
1210
|
-
// key is adjusted to 5 at level 3. Therefore, we will internally do Seek(5)
|
|
1211
|
-
// and unpins all blocks until 5 at level 3. Then the next scan's blocks from
|
|
1212
|
-
// 3-4 are unpinned at level 3. It is confusing that maybe block 3-4 should
|
|
1213
|
-
// not be unpinned, as next scan would need it. But Seek(5) implies that these
|
|
1214
|
-
// keys are all covered by some range deletion, so the next Seek(3) will also
|
|
1215
|
-
// do Seek(5) internally, so the blocks from 3-4 could be safely unpinned.
|
|
1216
|
-
|
|
1217
|
-
// advance to the right prepared range
|
|
1218
|
-
while (
|
|
1219
|
-
multi_scan_->next_scan_idx <
|
|
1220
|
-
multi_scan_->block_index_ranges_per_scan.size() &&
|
|
1221
|
-
(user_comparator_.CompareWithoutTimestamp(
|
|
1222
|
-
*user_seek_target, /*a_has_ts=*/true,
|
|
1223
|
-
multi_scan_->scan_opts->GetScanRanges()[multi_scan_->next_scan_idx]
|
|
1224
|
-
.range.start.value(),
|
|
1225
|
-
/*b_has_ts=*/false) >= 0)) {
|
|
1226
|
-
multi_scan_->next_scan_idx++;
|
|
1057
|
+
// Create block handles vector for IODispatcher (limited to prefetch_max_idx)
|
|
1058
|
+
std::vector<BlockHandle> blocks_to_prefetch;
|
|
1059
|
+
if (prefetch_max_idx > 0) {
|
|
1060
|
+
blocks_to_prefetch.assign(scan_block_handles.begin(),
|
|
1061
|
+
scan_block_handles.begin() + prefetch_max_idx);
|
|
1227
1062
|
}
|
|
1228
1063
|
|
|
1229
|
-
//
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
multi_scan_->block_index_ranges_per_scan[cur_scan_idx];
|
|
1238
|
-
|
|
1239
|
-
if (cur_scan_start_idx >= cur_scan_end_idx) {
|
|
1240
|
-
// No blocks are prepared for this range at current file.
|
|
1241
|
-
MarkPreparedRangeExhausted();
|
|
1242
|
-
return;
|
|
1243
|
-
}
|
|
1064
|
+
// Submit to IODispatcher
|
|
1065
|
+
auto job = std::make_shared<IOJob>();
|
|
1066
|
+
job->table = const_cast<BlockBasedTable*>(table_);
|
|
1067
|
+
job->block_handles = std::move(blocks_to_prefetch);
|
|
1068
|
+
job->job_options.io_coalesce_threshold =
|
|
1069
|
+
multiscan_opts->io_coalesce_threshold;
|
|
1070
|
+
job->job_options.read_options = read_options_;
|
|
1071
|
+
job->job_options.read_options.async_io = multiscan_opts->use_async_io;
|
|
1244
1072
|
|
|
1245
|
-
|
|
1246
|
-
//
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1073
|
+
std::shared_ptr<ReadSet> read_set;
|
|
1074
|
+
// IODispatcher should be provided by DBIter::Prepare() to enable sharing
|
|
1075
|
+
// across all BlockBasedTableIterators in the scan. Create one if not
|
|
1076
|
+
// provided (for direct calls to Prepare, e.g., in unit tests).
|
|
1077
|
+
std::shared_ptr<IODispatcher> dispatcher = multiscan_opts->io_dispatcher;
|
|
1078
|
+
if (!dispatcher) {
|
|
1079
|
+
dispatcher.reset(NewIODispatcher());
|
|
1252
1080
|
}
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
std::max(cur_scan_start_idx, multi_scan_->cur_data_block_idx);
|
|
1257
|
-
auto const& data_block_separators = multi_scan_->data_block_separators;
|
|
1258
|
-
while (block_idx < data_block_separators.size() &&
|
|
1259
|
-
(user_comparator_.CompareWithoutTimestamp(
|
|
1260
|
-
*user_seek_target, /*a_has_ts=*/true,
|
|
1261
|
-
data_block_separators[block_idx],
|
|
1262
|
-
/*b_has_ts=*/false) > 0)) {
|
|
1263
|
-
// Unpin the blocks that are passed
|
|
1264
|
-
if (!multi_scan_->pinned_data_blocks[block_idx].IsEmpty()) {
|
|
1265
|
-
multi_scan_->pinned_data_blocks[block_idx].Reset();
|
|
1266
|
-
}
|
|
1267
|
-
block_idx++;
|
|
1268
|
-
}
|
|
1269
|
-
|
|
1270
|
-
if (block_idx >= data_block_separators.size()) {
|
|
1271
|
-
// All of the prepared blocks for this file is exhausted.
|
|
1272
|
-
MarkPreparedRangeExhausted();
|
|
1081
|
+
multi_scan_status_ = dispatcher->SubmitJob(job, &read_set);
|
|
1082
|
+
if (!multi_scan_status_.ok()) {
|
|
1083
|
+
RecordTick(table_->GetStatistics(), MULTISCAN_PREPARE_ERRORS);
|
|
1273
1084
|
return;
|
|
1274
1085
|
}
|
|
1275
1086
|
|
|
1276
|
-
//
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
if (block_iter_points_to_real_block_) {
|
|
1287
|
-
// Should be scan in increasing key range.
|
|
1288
|
-
// All blocks before cur_data_block_idx_ are not pinned anymore.
|
|
1289
|
-
assert(multi_scan_->cur_data_block_idx < block_idx);
|
|
1290
|
-
}
|
|
1291
|
-
|
|
1292
|
-
ResetDataIter();
|
|
1293
|
-
|
|
1294
|
-
if (MultiScanLoadDataBlock(block_idx)) {
|
|
1295
|
-
return;
|
|
1296
|
-
}
|
|
1297
|
-
}
|
|
1298
|
-
|
|
1299
|
-
// Move current data block index forward until block_idx, meantime, unpin all
|
|
1300
|
-
// the blocks in between
|
|
1301
|
-
while (multi_scan_->cur_data_block_idx < block_idx) {
|
|
1302
|
-
// unpin block
|
|
1303
|
-
if (!multi_scan_->pinned_data_blocks[multi_scan_->cur_data_block_idx]
|
|
1304
|
-
.IsEmpty()) {
|
|
1305
|
-
multi_scan_->pinned_data_blocks[multi_scan_->cur_data_block_idx].Reset();
|
|
1306
|
-
}
|
|
1307
|
-
multi_scan_->cur_data_block_idx++;
|
|
1308
|
-
}
|
|
1309
|
-
block_iter_points_to_real_block_ = true;
|
|
1310
|
-
block_iter_.Seek(*seek_target);
|
|
1311
|
-
FindKeyForward();
|
|
1312
|
-
CheckOutOfBound();
|
|
1313
|
-
}
|
|
1314
|
-
|
|
1315
|
-
void BlockBasedTableIterator::FindBlockForwardInMultiScan() {
|
|
1316
|
-
assert(multi_scan_);
|
|
1317
|
-
assert(multi_scan_->next_scan_idx >= 1);
|
|
1318
|
-
const auto cur_scan_end_idx = std::get<1>(
|
|
1319
|
-
multi_scan_->block_index_ranges_per_scan[multi_scan_->next_scan_idx - 1]);
|
|
1320
|
-
do {
|
|
1321
|
-
if (!block_iter_.status().ok()) {
|
|
1322
|
-
return;
|
|
1323
|
-
}
|
|
1324
|
-
|
|
1325
|
-
// If is_out_of_bound_ is true, upper layer (LevelIterator) considers this
|
|
1326
|
-
// level has reached iterate_upper_bound_ and will not continue to iterate
|
|
1327
|
-
// into the next file. When we are doing the last scan within a MultiScan
|
|
1328
|
-
// for this file, it may need to continue to scan into the next file, so
|
|
1329
|
-
// we do not set is_out_of_bound_ in this case.
|
|
1330
|
-
if (multi_scan_->cur_data_block_idx + 1 >= cur_scan_end_idx) {
|
|
1331
|
-
MarkPreparedRangeExhausted();
|
|
1332
|
-
return;
|
|
1333
|
-
}
|
|
1334
|
-
// Move to the next pinned data block
|
|
1335
|
-
ResetDataIter();
|
|
1336
|
-
// Unpin previous block if it is not reset by data iterator
|
|
1337
|
-
if (!multi_scan_->pinned_data_blocks[multi_scan_->cur_data_block_idx]
|
|
1338
|
-
.IsEmpty()) {
|
|
1339
|
-
multi_scan_->pinned_data_blocks[multi_scan_->cur_data_block_idx].Reset();
|
|
1340
|
-
}
|
|
1341
|
-
++multi_scan_->cur_data_block_idx;
|
|
1342
|
-
|
|
1343
|
-
if (MultiScanLoadDataBlock(multi_scan_->cur_data_block_idx)) {
|
|
1344
|
-
return;
|
|
1345
|
-
}
|
|
1346
|
-
|
|
1347
|
-
block_iter_points_to_real_block_ = true;
|
|
1348
|
-
block_iter_.SeekToFirst();
|
|
1349
|
-
} while (!block_iter_.Valid());
|
|
1350
|
-
}
|
|
1351
|
-
|
|
1352
|
-
Status BlockBasedTableIterator::PollForBlock(size_t idx) {
|
|
1353
|
-
assert(multi_scan_);
|
|
1354
|
-
const auto async_idx = multi_scan_->block_idx_to_readreq_idx.find(idx);
|
|
1355
|
-
if (async_idx == multi_scan_->block_idx_to_readreq_idx.end()) {
|
|
1356
|
-
// Did not require async read, should already be pinned.
|
|
1357
|
-
assert(multi_scan_->pinned_data_blocks[idx].GetValue());
|
|
1358
|
-
return Status::OK();
|
|
1359
|
-
}
|
|
1360
|
-
|
|
1361
|
-
AsyncReadState& async_read = multi_scan_->async_states[async_idx->second];
|
|
1362
|
-
if (async_read.finished) {
|
|
1363
|
-
assert(async_read.io_handle == nullptr);
|
|
1364
|
-
assert(async_read.status.ok());
|
|
1365
|
-
return async_read.status;
|
|
1366
|
-
}
|
|
1087
|
+
// Successful Prepare. Create MultiScanIndexIterator and swap it in as
|
|
1088
|
+
// the index iterator. The original index_iter_ is saved for restoration
|
|
1089
|
+
// on backward operations.
|
|
1090
|
+
// Note: data_block_separators keeps full size for seek logic, even though
|
|
1091
|
+
// only blocks up to prefetch_max_idx are actually prefetched.
|
|
1092
|
+
auto multi_scan_idx_iter = std::make_unique<MultiScanIndexIterator>(
|
|
1093
|
+
std::move(scan_block_handles), std::move(data_block_separators),
|
|
1094
|
+
std::move(block_index_ranges_per_scan), multiscan_opts, read_set,
|
|
1095
|
+
prefetch_max_idx, icomp_, table_->GetStatistics());
|
|
1096
|
+
assert(multi_scan_idx_iter->status().ok());
|
|
1367
1097
|
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
return poll_s;
|
|
1374
|
-
}
|
|
1375
|
-
}
|
|
1376
|
-
assert(async_read.status.ok());
|
|
1377
|
-
if (!async_read.status.ok()) {
|
|
1378
|
-
return async_read.status;
|
|
1379
|
-
}
|
|
1380
|
-
async_read.CleanUpIOHandle();
|
|
1098
|
+
multi_scan_read_set_ = std::move(read_set);
|
|
1099
|
+
multi_scan_index_iter_ = multi_scan_idx_iter.get();
|
|
1100
|
+
prefetch_max_idx_ = prefetch_max_idx;
|
|
1101
|
+
original_index_iter_ = std::move(index_iter_);
|
|
1102
|
+
index_iter_ = std::move(multi_scan_idx_iter);
|
|
1381
1103
|
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
const auto& block = async_read.blocks[i];
|
|
1385
|
-
|
|
1386
|
-
Status s = CreateAndPinBlockFromBuffer(
|
|
1387
|
-
block, async_read.offset, async_read.result,
|
|
1388
|
-
multi_scan_->pinned_data_blocks[async_read.block_indices[i]]);
|
|
1389
|
-
|
|
1390
|
-
if (!s.ok()) {
|
|
1391
|
-
return s;
|
|
1392
|
-
}
|
|
1393
|
-
assert(multi_scan_->pinned_data_blocks[async_read.block_indices[i]]
|
|
1394
|
-
.GetValue());
|
|
1395
|
-
}
|
|
1396
|
-
assert(multi_scan_->pinned_data_blocks[idx].GetValue());
|
|
1397
|
-
return Status::OK();
|
|
1398
|
-
}
|
|
1399
|
-
|
|
1400
|
-
Status BlockBasedTableIterator::CreateAndPinBlockFromBuffer(
|
|
1401
|
-
const BlockHandle& block, uint64_t buffer_start_offset,
|
|
1402
|
-
const Slice& buffer_data, CachableEntry<Block>& pinned_block_entry) {
|
|
1403
|
-
// Get decompressor and handle dictionary loading
|
|
1404
|
-
UnownedPtr<Decompressor> decompressor = table_->get_rep()->decompressor.get();
|
|
1405
|
-
CachableEntry<DecompressorDict> cached_dict;
|
|
1406
|
-
|
|
1407
|
-
if (table_->get_rep()->uncompression_dict_reader) {
|
|
1408
|
-
{
|
|
1409
|
-
Status s =
|
|
1410
|
-
table_->get_rep()
|
|
1411
|
-
->uncompression_dict_reader->GetOrReadUncompressionDictionary(
|
|
1412
|
-
/* prefetch_buffer= */ nullptr, read_options_,
|
|
1413
|
-
/* get_context= */ nullptr, /* lookup_context= */ nullptr,
|
|
1414
|
-
&cached_dict);
|
|
1415
|
-
if (!s.ok()) {
|
|
1416
|
-
#ifndef NDEBUG
|
|
1417
|
-
fprintf(stdout, "Prepare dictionary loading failed with %s\n",
|
|
1418
|
-
s.ToString().c_str());
|
|
1419
|
-
#endif
|
|
1420
|
-
return s;
|
|
1421
|
-
}
|
|
1422
|
-
}
|
|
1423
|
-
if (!cached_dict.GetValue()) {
|
|
1424
|
-
#ifndef NDEBUG
|
|
1425
|
-
fprintf(stdout, "Success but no dictionary read\n");
|
|
1426
|
-
#endif
|
|
1427
|
-
return Status::InvalidArgument("No dictionary found");
|
|
1428
|
-
}
|
|
1429
|
-
decompressor = cached_dict.GetValue()->decompressor_.get();
|
|
1430
|
-
}
|
|
1431
|
-
|
|
1432
|
-
// Create block from buffer data
|
|
1433
|
-
const auto block_size_with_trailer =
|
|
1434
|
-
BlockBasedTable::BlockSizeWithTrailer(block);
|
|
1435
|
-
const auto block_offset_in_buffer = block.offset() - buffer_start_offset;
|
|
1436
|
-
|
|
1437
|
-
CacheAllocationPtr data =
|
|
1438
|
-
AllocateBlock(block_size_with_trailer,
|
|
1439
|
-
GetMemoryAllocator(table_->get_rep()->table_options));
|
|
1440
|
-
memcpy(data.get(), buffer_data.data() + block_offset_in_buffer,
|
|
1441
|
-
block_size_with_trailer);
|
|
1442
|
-
BlockContents tmp_contents(std::move(data), block.size());
|
|
1443
|
-
|
|
1444
|
-
#ifndef NDEBUG
|
|
1445
|
-
tmp_contents.has_trailer =
|
|
1446
|
-
table_->get_rep()->footer.GetBlockTrailerSize() > 0;
|
|
1447
|
-
#endif
|
|
1448
|
-
|
|
1449
|
-
return table_->CreateAndPinBlockInCache<Block_kData>(
|
|
1450
|
-
read_options_, block, decompressor, &tmp_contents,
|
|
1451
|
-
&pinned_block_entry.As<Block_kData>());
|
|
1104
|
+
is_index_at_curr_block_ = false;
|
|
1105
|
+
block_iter_points_to_real_block_ = false;
|
|
1452
1106
|
}
|
|
1453
1107
|
|
|
1454
1108
|
constexpr auto kVerbose = false;
|
|
@@ -1536,245 +1190,4 @@ Status BlockBasedTableIterator::CollectBlockHandles(
|
|
|
1536
1190
|
return Status::OK();
|
|
1537
1191
|
}
|
|
1538
1192
|
|
|
1539
|
-
Status BlockBasedTableIterator::FilterAndPinCachedBlocks(
|
|
1540
|
-
const std::vector<BlockHandle>& scan_block_handles,
|
|
1541
|
-
const MultiScanArgs* multiscan_opts,
|
|
1542
|
-
std::vector<size_t>* block_indices_to_read,
|
|
1543
|
-
std::vector<CachableEntry<Block>>* pinned_data_blocks_guard,
|
|
1544
|
-
size_t* prefetched_max_idx) {
|
|
1545
|
-
uint64_t total_prefetch_size = 0;
|
|
1546
|
-
*prefetched_max_idx = scan_block_handles.size();
|
|
1547
|
-
|
|
1548
|
-
for (size_t i = 0; i < scan_block_handles.size(); ++i) {
|
|
1549
|
-
const auto& data_block_handle = scan_block_handles[i];
|
|
1550
|
-
|
|
1551
|
-
total_prefetch_size +=
|
|
1552
|
-
BlockBasedTable::BlockSizeWithTrailer(data_block_handle);
|
|
1553
|
-
if (multiscan_opts->max_prefetch_size > 0 &&
|
|
1554
|
-
total_prefetch_size > multiscan_opts->max_prefetch_size) {
|
|
1555
|
-
for (size_t j = i; j < scan_block_handles.size(); ++j) {
|
|
1556
|
-
assert((*pinned_data_blocks_guard)[j].IsEmpty());
|
|
1557
|
-
}
|
|
1558
|
-
*prefetched_max_idx = i;
|
|
1559
|
-
break;
|
|
1560
|
-
}
|
|
1561
|
-
|
|
1562
|
-
Status s = table_->LookupAndPinBlocksInCache<Block_kData>(
|
|
1563
|
-
read_options_, data_block_handle,
|
|
1564
|
-
&(*pinned_data_blocks_guard)[i].As<Block_kData>());
|
|
1565
|
-
|
|
1566
|
-
if (!s.ok()) {
|
|
1567
|
-
// Abort: block cache look up failed.
|
|
1568
|
-
return s;
|
|
1569
|
-
}
|
|
1570
|
-
if (!(*pinned_data_blocks_guard)[i].GetValue()) {
|
|
1571
|
-
// Block not in cache
|
|
1572
|
-
block_indices_to_read->emplace_back(i);
|
|
1573
|
-
}
|
|
1574
|
-
}
|
|
1575
|
-
return Status::OK();
|
|
1576
|
-
}
|
|
1577
|
-
|
|
1578
|
-
void BlockBasedTableIterator::PrepareIORequests(
|
|
1579
|
-
const std::vector<size_t>& block_indices_to_read,
|
|
1580
|
-
const std::vector<BlockHandle>& scan_block_handles,
|
|
1581
|
-
const MultiScanArgs* multiscan_opts, std::vector<FSReadRequest>* read_reqs,
|
|
1582
|
-
UnorderedMap<size_t, size_t>* block_idx_to_readreq_idx,
|
|
1583
|
-
std::vector<std::vector<size_t>>* coalesced_block_indices) {
|
|
1584
|
-
assert(coalesced_block_indices->empty());
|
|
1585
|
-
coalesced_block_indices->resize(1);
|
|
1586
|
-
|
|
1587
|
-
for (const auto& block_idx : block_indices_to_read) {
|
|
1588
|
-
if (!coalesced_block_indices->back().empty()) {
|
|
1589
|
-
// Check if we can coalesce.
|
|
1590
|
-
const auto& last_block_handle =
|
|
1591
|
-
scan_block_handles[coalesced_block_indices->back().back()];
|
|
1592
|
-
uint64_t last_block_end =
|
|
1593
|
-
last_block_handle.offset() +
|
|
1594
|
-
BlockBasedTable::BlockSizeWithTrailer(last_block_handle);
|
|
1595
|
-
uint64_t current_start = scan_block_handles[block_idx].offset();
|
|
1596
|
-
|
|
1597
|
-
if (current_start >
|
|
1598
|
-
last_block_end + multiscan_opts->io_coalesce_threshold) {
|
|
1599
|
-
// new IO
|
|
1600
|
-
coalesced_block_indices->emplace_back();
|
|
1601
|
-
}
|
|
1602
|
-
}
|
|
1603
|
-
coalesced_block_indices->back().emplace_back(block_idx);
|
|
1604
|
-
}
|
|
1605
|
-
|
|
1606
|
-
assert(read_reqs->empty());
|
|
1607
|
-
read_reqs->reserve(coalesced_block_indices->size());
|
|
1608
|
-
for (const auto& block_indices : *coalesced_block_indices) {
|
|
1609
|
-
assert(block_indices.size());
|
|
1610
|
-
const auto& first_block_handle = scan_block_handles[block_indices[0]];
|
|
1611
|
-
const auto& last_block_handle = scan_block_handles[block_indices.back()];
|
|
1612
|
-
|
|
1613
|
-
const auto start_offset = first_block_handle.offset();
|
|
1614
|
-
const auto end_offset =
|
|
1615
|
-
last_block_handle.offset() +
|
|
1616
|
-
BlockBasedTable::BlockSizeWithTrailer(last_block_handle);
|
|
1617
|
-
#ifndef NDEBUG
|
|
1618
|
-
// Debug print for failing the assertion below.
|
|
1619
|
-
if (start_offset >= end_offset) {
|
|
1620
|
-
fprintf(stderr, "scan_block_handles: ");
|
|
1621
|
-
for (const auto& block : scan_block_handles) {
|
|
1622
|
-
fprintf(stderr, "offset: %" PRIu64 ", size: %" PRIu64 "; ",
|
|
1623
|
-
block.offset(), block.size());
|
|
1624
|
-
}
|
|
1625
|
-
fprintf(stderr,
|
|
1626
|
-
"\nfirst block - offset: %" PRIu64 ", size: %" PRIu64 "\n",
|
|
1627
|
-
first_block_handle.offset(), first_block_handle.size());
|
|
1628
|
-
fprintf(stderr, "last block - offset: %" PRIu64 ", size: %" PRIu64 "\n",
|
|
1629
|
-
last_block_handle.offset(), last_block_handle.size());
|
|
1630
|
-
|
|
1631
|
-
fprintf(stderr, "coalesced_block_indices: ");
|
|
1632
|
-
for (const auto& b : *coalesced_block_indices) {
|
|
1633
|
-
fprintf(stderr, "[");
|
|
1634
|
-
for (const auto& block_idx : b) {
|
|
1635
|
-
fprintf(stderr, "%zu ", block_idx);
|
|
1636
|
-
}
|
|
1637
|
-
fprintf(stderr, "] ");
|
|
1638
|
-
}
|
|
1639
|
-
fprintf(stderr, "\ncurrent blocks: ");
|
|
1640
|
-
for (const auto& block_idx : block_indices) {
|
|
1641
|
-
fprintf(stderr, "offset: %" PRIu64 ", size: %" PRIu64 "; ",
|
|
1642
|
-
scan_block_handles[block_idx].offset(),
|
|
1643
|
-
scan_block_handles[block_idx].size());
|
|
1644
|
-
}
|
|
1645
|
-
fprintf(stderr, "\n");
|
|
1646
|
-
}
|
|
1647
|
-
#endif // NDEBUG
|
|
1648
|
-
assert(end_offset > start_offset);
|
|
1649
|
-
|
|
1650
|
-
read_reqs->emplace_back();
|
|
1651
|
-
read_reqs->back().offset = start_offset;
|
|
1652
|
-
read_reqs->back().len = end_offset - start_offset;
|
|
1653
|
-
|
|
1654
|
-
if (multiscan_opts->use_async_io) {
|
|
1655
|
-
for (const auto& block_idx : block_indices) {
|
|
1656
|
-
(*block_idx_to_readreq_idx)[block_idx] = read_reqs->size() - 1;
|
|
1657
|
-
}
|
|
1658
|
-
}
|
|
1659
|
-
}
|
|
1660
|
-
}
|
|
1661
|
-
|
|
1662
|
-
Status BlockBasedTableIterator::ExecuteIO(
|
|
1663
|
-
const std::vector<BlockHandle>& scan_block_handles,
|
|
1664
|
-
const MultiScanArgs* multiscan_opts,
|
|
1665
|
-
const std::vector<std::vector<size_t>>& coalesced_block_indices,
|
|
1666
|
-
std::vector<FSReadRequest>* read_reqs,
|
|
1667
|
-
std::vector<AsyncReadState>* async_states,
|
|
1668
|
-
std::vector<CachableEntry<Block>>* pinned_data_blocks_guard) {
|
|
1669
|
-
IOOptions io_opts;
|
|
1670
|
-
Status s;
|
|
1671
|
-
s = table_->get_rep()->file->PrepareIOOptions(read_options_, io_opts);
|
|
1672
|
-
if (!s.ok()) {
|
|
1673
|
-
// Abort: PrepareIOOptions failed
|
|
1674
|
-
return s;
|
|
1675
|
-
}
|
|
1676
|
-
const bool direct_io = table_->get_rep()->file->use_direct_io();
|
|
1677
|
-
|
|
1678
|
-
if (multiscan_opts->use_async_io) {
|
|
1679
|
-
async_states->resize(read_reqs->size());
|
|
1680
|
-
for (size_t i = 0; i < read_reqs->size(); ++i) {
|
|
1681
|
-
auto& read_req = (*read_reqs)[i];
|
|
1682
|
-
auto& async_read = (*async_states)[i];
|
|
1683
|
-
|
|
1684
|
-
async_read.finished = false;
|
|
1685
|
-
async_read.offset = read_req.offset;
|
|
1686
|
-
async_read.block_indices = coalesced_block_indices[i];
|
|
1687
|
-
for (const auto idx : coalesced_block_indices[i]) {
|
|
1688
|
-
async_read.blocks.emplace_back(scan_block_handles[idx]);
|
|
1689
|
-
}
|
|
1690
|
-
|
|
1691
|
-
if (direct_io) {
|
|
1692
|
-
read_req.scratch = nullptr;
|
|
1693
|
-
} else {
|
|
1694
|
-
async_read.buf.reset(new char[read_req.len]);
|
|
1695
|
-
read_req.scratch = async_read.buf.get();
|
|
1696
|
-
}
|
|
1697
|
-
|
|
1698
|
-
auto cb = std::bind(&BlockBasedTableIterator::PrepareReadAsyncCallBack,
|
|
1699
|
-
this, std::placeholders::_1, std::placeholders::_2);
|
|
1700
|
-
// TODO: for mmap, io_handle will not be set but callback will already
|
|
1701
|
-
// be called.
|
|
1702
|
-
s = table_->get_rep()->file.get()->ReadAsync(
|
|
1703
|
-
read_req, io_opts, cb, &async_read, &async_read.io_handle,
|
|
1704
|
-
&async_read.del_fn, direct_io ? &async_read.aligned_buf : nullptr);
|
|
1705
|
-
if (!s.ok()) {
|
|
1706
|
-
#ifndef NDEBUG
|
|
1707
|
-
fprintf(stderr, "ReadAsync failed with %s\n", s.ToString().c_str());
|
|
1708
|
-
#endif
|
|
1709
|
-
assert(false);
|
|
1710
|
-
return s;
|
|
1711
|
-
}
|
|
1712
|
-
for (auto& req : *read_reqs) {
|
|
1713
|
-
if (!req.status.ok()) {
|
|
1714
|
-
assert(false);
|
|
1715
|
-
// Silence compiler warning about NRVO
|
|
1716
|
-
s = req.status;
|
|
1717
|
-
return s;
|
|
1718
|
-
}
|
|
1719
|
-
}
|
|
1720
|
-
}
|
|
1721
|
-
} else {
|
|
1722
|
-
// Synchronous IO using MultiRead
|
|
1723
|
-
std::unique_ptr<char[]> buf;
|
|
1724
|
-
|
|
1725
|
-
if (direct_io) {
|
|
1726
|
-
for (auto& read_req : *read_reqs) {
|
|
1727
|
-
read_req.scratch = nullptr;
|
|
1728
|
-
}
|
|
1729
|
-
} else {
|
|
1730
|
-
// TODO: optimize if FSSupportedOps::kFSBuffer is supported.
|
|
1731
|
-
size_t total_len = 0;
|
|
1732
|
-
for (const auto& req : *read_reqs) {
|
|
1733
|
-
total_len += req.len;
|
|
1734
|
-
}
|
|
1735
|
-
buf.reset(new char[total_len]);
|
|
1736
|
-
size_t offset = 0;
|
|
1737
|
-
for (auto& read_req : *read_reqs) {
|
|
1738
|
-
read_req.scratch = buf.get() + offset;
|
|
1739
|
-
offset += read_req.len;
|
|
1740
|
-
}
|
|
1741
|
-
}
|
|
1742
|
-
|
|
1743
|
-
AlignedBuf aligned_buf;
|
|
1744
|
-
s = table_->get_rep()->file->MultiRead(io_opts, read_reqs->data(),
|
|
1745
|
-
read_reqs->size(),
|
|
1746
|
-
direct_io ? &aligned_buf : nullptr);
|
|
1747
|
-
if (!s.ok()) {
|
|
1748
|
-
return s;
|
|
1749
|
-
}
|
|
1750
|
-
for (auto& req : *read_reqs) {
|
|
1751
|
-
if (!req.status.ok()) {
|
|
1752
|
-
// Silence compiler warning about NRVO
|
|
1753
|
-
s = req.status;
|
|
1754
|
-
return s;
|
|
1755
|
-
}
|
|
1756
|
-
}
|
|
1757
|
-
|
|
1758
|
-
// Init blocks and pin them in block cache.
|
|
1759
|
-
assert(read_reqs->size() == coalesced_block_indices.size());
|
|
1760
|
-
for (size_t i = 0; i < coalesced_block_indices.size(); i++) {
|
|
1761
|
-
const auto& read_req = (*read_reqs)[i];
|
|
1762
|
-
for (const auto& block_idx : coalesced_block_indices[i]) {
|
|
1763
|
-
const auto& block = scan_block_handles[block_idx];
|
|
1764
|
-
|
|
1765
|
-
assert((*pinned_data_blocks_guard)[block_idx].IsEmpty());
|
|
1766
|
-
s = CreateAndPinBlockFromBuffer(block, read_req.offset, read_req.result,
|
|
1767
|
-
(*pinned_data_blocks_guard)[block_idx]);
|
|
1768
|
-
if (!s.ok()) {
|
|
1769
|
-
assert(false);
|
|
1770
|
-
// Abort: failed to create and pin block in cache
|
|
1771
|
-
return s;
|
|
1772
|
-
}
|
|
1773
|
-
assert((*pinned_data_blocks_guard)[block_idx].GetValue());
|
|
1774
|
-
}
|
|
1775
|
-
}
|
|
1776
|
-
}
|
|
1777
|
-
return s;
|
|
1778
|
-
}
|
|
1779
|
-
|
|
1780
1193
|
} // namespace ROCKSDB_NAMESPACE
|