@nxtedition/rocksdb 8.2.7 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
- package/deps/rocksdb/rocksdb/Makefile +22 -19
- package/deps/rocksdb/rocksdb/TARGETS +8 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
- package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
- package/deps/rocksdb/rocksdb/db/c.cc +169 -6
- package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
- package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
- package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
- package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
- package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
- package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
- package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
- package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
- package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
- package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
- package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
- package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
- package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
- package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
- package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
- package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
- package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
- package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
- package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
- package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
- package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
- package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
- package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
- package/deps/rocksdb/rocksdb/env/env.cc +1 -2
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
- package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
- package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
- package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
- package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
- package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
- package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
- package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
- package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
- package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
- package/deps/rocksdb/rocksdb/options/options.cc +15 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
- package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
- package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
- package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
- package/deps/rocksdb/rocksdb/src.mk +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
- package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/table/format.cc +175 -33
- package/deps/rocksdb/rocksdb/table/format.h +63 -10
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
- package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
- package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
- package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
- package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
- package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
- package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
- package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
- package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
- package/deps/rocksdb/rocksdb/util/compression.h +110 -32
- package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
- package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
- package/deps/rocksdb/rocksdb/util/hash.h +7 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
- package/deps/rocksdb/rocksdb/util/math.h +58 -6
- package/deps/rocksdb/rocksdb/util/math128.h +29 -7
- package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
- package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
- package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
- package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
- package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
- package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
- package/deps/rocksdb/rocksdb.gyp +2 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -25,10 +25,12 @@ class MockFS;
|
|
|
25
25
|
class MockRandomAccessFile : public FSRandomAccessFileOwnerWrapper {
|
|
26
26
|
public:
|
|
27
27
|
MockRandomAccessFile(std::unique_ptr<FSRandomAccessFile>& file,
|
|
28
|
-
bool support_prefetch, std::atomic_int& prefetch_count
|
|
28
|
+
bool support_prefetch, std::atomic_int& prefetch_count,
|
|
29
|
+
bool small_buffer_alignment = false)
|
|
29
30
|
: FSRandomAccessFileOwnerWrapper(std::move(file)),
|
|
30
31
|
support_prefetch_(support_prefetch),
|
|
31
|
-
prefetch_count_(prefetch_count)
|
|
32
|
+
prefetch_count_(prefetch_count),
|
|
33
|
+
small_buffer_alignment_(small_buffer_alignment) {}
|
|
32
34
|
|
|
33
35
|
IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options,
|
|
34
36
|
IODebugContext* dbg) override {
|
|
@@ -40,16 +42,25 @@ class MockRandomAccessFile : public FSRandomAccessFileOwnerWrapper {
|
|
|
40
42
|
}
|
|
41
43
|
}
|
|
42
44
|
|
|
45
|
+
size_t GetRequiredBufferAlignment() const override {
|
|
46
|
+
return small_buffer_alignment_
|
|
47
|
+
? 1
|
|
48
|
+
: FSRandomAccessFileOwnerWrapper::GetRequiredBufferAlignment();
|
|
49
|
+
}
|
|
50
|
+
|
|
43
51
|
private:
|
|
44
52
|
const bool support_prefetch_;
|
|
45
53
|
std::atomic_int& prefetch_count_;
|
|
54
|
+
const bool small_buffer_alignment_;
|
|
46
55
|
};
|
|
47
56
|
|
|
48
57
|
class MockFS : public FileSystemWrapper {
|
|
49
58
|
public:
|
|
50
59
|
explicit MockFS(const std::shared_ptr<FileSystem>& wrapped,
|
|
51
|
-
bool support_prefetch)
|
|
52
|
-
: FileSystemWrapper(wrapped),
|
|
60
|
+
bool support_prefetch, bool small_buffer_alignment = false)
|
|
61
|
+
: FileSystemWrapper(wrapped),
|
|
62
|
+
support_prefetch_(support_prefetch),
|
|
63
|
+
small_buffer_alignment_(small_buffer_alignment) {}
|
|
53
64
|
|
|
54
65
|
static const char* kClassName() { return "MockFS"; }
|
|
55
66
|
const char* Name() const override { return kClassName(); }
|
|
@@ -61,8 +72,8 @@ class MockFS : public FileSystemWrapper {
|
|
|
61
72
|
std::unique_ptr<FSRandomAccessFile> file;
|
|
62
73
|
IOStatus s;
|
|
63
74
|
s = target()->NewRandomAccessFile(fname, opts, &file, dbg);
|
|
64
|
-
result->reset(
|
|
65
|
-
|
|
75
|
+
result->reset(new MockRandomAccessFile(
|
|
76
|
+
file, support_prefetch_, prefetch_count_, small_buffer_alignment_));
|
|
66
77
|
return s;
|
|
67
78
|
}
|
|
68
79
|
|
|
@@ -76,6 +87,7 @@ class MockFS : public FileSystemWrapper {
|
|
|
76
87
|
|
|
77
88
|
private:
|
|
78
89
|
const bool support_prefetch_;
|
|
90
|
+
const bool small_buffer_alignment_;
|
|
79
91
|
std::atomic_int prefetch_count_{0};
|
|
80
92
|
};
|
|
81
93
|
|
|
@@ -85,7 +97,8 @@ class PrefetchTest
|
|
|
85
97
|
public:
|
|
86
98
|
PrefetchTest() : DBTestBase("prefetch_test", true) {}
|
|
87
99
|
|
|
88
|
-
void SetGenericOptions(Env* env, bool use_direct_io,
|
|
100
|
+
virtual void SetGenericOptions(Env* env, bool use_direct_io,
|
|
101
|
+
Options& options) {
|
|
89
102
|
options = CurrentOptions();
|
|
90
103
|
options.write_buffer_size = 1024;
|
|
91
104
|
options.create_if_missing = true;
|
|
@@ -115,7 +128,14 @@ std::string BuildKey(int num, std::string postfix = "") {
|
|
|
115
128
|
return "my_key_" + std::to_string(num) + postfix;
|
|
116
129
|
}
|
|
117
130
|
|
|
118
|
-
// This test verifies the basic
|
|
131
|
+
// This test verifies the following basic functionalities of prefetching:
|
|
132
|
+
// (1) If underline file system supports prefetch, and directIO is not enabled
|
|
133
|
+
// make sure prefetch() is called and FilePrefetchBuffer is not used.
|
|
134
|
+
// (2) If underline file system doesn't support prefetch, or directIO is
|
|
135
|
+
// enabled, make sure prefetch() is not called and FilePrefetchBuffer is
|
|
136
|
+
// used.
|
|
137
|
+
// (3) Measure read bytes, hit and miss of SST's tail prefetching during table
|
|
138
|
+
// open.
|
|
119
139
|
TEST_P(PrefetchTest, Basic) {
|
|
120
140
|
// First param is if the mockFS support_prefetch or not
|
|
121
141
|
bool support_prefetch =
|
|
@@ -152,6 +172,7 @@ TEST_P(PrefetchTest, Basic) {
|
|
|
152
172
|
ASSERT_OK(batch.Put(BuildKey(i), "value for range 1 key"));
|
|
153
173
|
}
|
|
154
174
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
175
|
+
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
|
155
176
|
|
|
156
177
|
// create second key range
|
|
157
178
|
batch.Clear();
|
|
@@ -159,6 +180,7 @@ TEST_P(PrefetchTest, Basic) {
|
|
|
159
180
|
ASSERT_OK(batch.Put(BuildKey(i, "key2"), "value for range 2 key"));
|
|
160
181
|
}
|
|
161
182
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
183
|
+
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
|
162
184
|
|
|
163
185
|
// delete second key range
|
|
164
186
|
batch.Clear();
|
|
@@ -166,6 +188,20 @@ TEST_P(PrefetchTest, Basic) {
|
|
|
166
188
|
ASSERT_OK(batch.Delete(BuildKey(i, "key2")));
|
|
167
189
|
}
|
|
168
190
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
191
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
192
|
+
|
|
193
|
+
// To verify SST file tail prefetch (once per file) during flush output
|
|
194
|
+
// verification
|
|
195
|
+
if (support_prefetch && !use_direct_io) {
|
|
196
|
+
ASSERT_TRUE(fs->IsPrefetchCalled());
|
|
197
|
+
ASSERT_EQ(3, fs->GetPrefetchCount());
|
|
198
|
+
ASSERT_EQ(0, buff_prefetch_count);
|
|
199
|
+
fs->ClearPrefetchCount();
|
|
200
|
+
} else {
|
|
201
|
+
ASSERT_FALSE(fs->IsPrefetchCalled());
|
|
202
|
+
ASSERT_EQ(buff_prefetch_count, 3);
|
|
203
|
+
buff_prefetch_count = 0;
|
|
204
|
+
}
|
|
169
205
|
|
|
170
206
|
// compact database
|
|
171
207
|
std::string start_key = BuildKey(0);
|
|
@@ -192,25 +228,27 @@ TEST_P(PrefetchTest, Basic) {
|
|
|
192
228
|
const uint64_t cur_table_open_prefetch_tail_hit =
|
|
193
229
|
options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_HIT);
|
|
194
230
|
|
|
231
|
+
// To verify prefetch during compaction input read
|
|
195
232
|
if (support_prefetch && !use_direct_io) {
|
|
196
|
-
// If underline file system supports prefetch, and directIO is not enabled
|
|
197
|
-
// make sure prefetch() is called and FilePrefetchBuffer is not used.
|
|
198
233
|
ASSERT_TRUE(fs->IsPrefetchCalled());
|
|
199
|
-
|
|
234
|
+
// To rule out false positive by the SST file tail prefetch during
|
|
235
|
+
// compaction output verification
|
|
236
|
+
ASSERT_GT(fs->GetPrefetchCount(), 1);
|
|
200
237
|
ASSERT_EQ(0, buff_prefetch_count);
|
|
238
|
+
fs->ClearPrefetchCount();
|
|
201
239
|
} else {
|
|
202
|
-
// If underline file system doesn't support prefetch, or directIO is
|
|
203
|
-
// enabled, make sure prefetch() is not called and FilePrefetchBuffer is
|
|
204
|
-
// used.
|
|
205
240
|
ASSERT_FALSE(fs->IsPrefetchCalled());
|
|
206
|
-
|
|
241
|
+
// To rule out false positive by the SST file tail prefetch during
|
|
242
|
+
// compaction output verification
|
|
243
|
+
ASSERT_GT(buff_prefetch_count, 1);
|
|
244
|
+
buff_prefetch_count = 0;
|
|
245
|
+
|
|
207
246
|
ASSERT_GT(cur_table_open_prefetch_tail_read.count,
|
|
208
247
|
prev_table_open_prefetch_tail_read.count);
|
|
209
248
|
ASSERT_GT(cur_table_open_prefetch_tail_hit,
|
|
210
249
|
prev_table_open_prefetch_tail_hit);
|
|
211
250
|
ASSERT_GE(cur_table_open_prefetch_tail_miss,
|
|
212
251
|
prev_table_open_prefetch_tail_miss);
|
|
213
|
-
buff_prefetch_count = 0;
|
|
214
252
|
}
|
|
215
253
|
|
|
216
254
|
// count the keys
|
|
@@ -223,7 +261,7 @@ TEST_P(PrefetchTest, Basic) {
|
|
|
223
261
|
(void)num_keys;
|
|
224
262
|
}
|
|
225
263
|
|
|
226
|
-
//
|
|
264
|
+
// To verify prefetch during user scan
|
|
227
265
|
if (support_prefetch && !use_direct_io) {
|
|
228
266
|
ASSERT_TRUE(fs->IsPrefetchCalled());
|
|
229
267
|
fs->ClearPrefetchCount();
|
|
@@ -236,30 +274,79 @@ TEST_P(PrefetchTest, Basic) {
|
|
|
236
274
|
Close();
|
|
237
275
|
}
|
|
238
276
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
const bool use_file_prefetch_buffer = !support_prefetch || use_direct_io;
|
|
277
|
+
class PrefetchTailTest : public PrefetchTest {
|
|
278
|
+
public:
|
|
279
|
+
bool SupportPrefetch() const {
|
|
280
|
+
return std::get<0>(GetParam()) &&
|
|
281
|
+
test::IsPrefetchSupported(env_->GetFileSystem(), dbname_);
|
|
282
|
+
}
|
|
246
283
|
|
|
247
|
-
std::
|
|
248
|
-
|
|
249
|
-
|
|
284
|
+
bool UseDirectIO() const { return std::get<1>(GetParam()); }
|
|
285
|
+
|
|
286
|
+
bool UseFilePrefetchBuffer() const {
|
|
287
|
+
return !SupportPrefetch() || UseDirectIO();
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
Env* GetEnv(bool small_buffer_alignment = false) const {
|
|
291
|
+
std::shared_ptr<MockFS> fs = std::make_shared<MockFS>(
|
|
292
|
+
env_->GetFileSystem(), SupportPrefetch(), small_buffer_alignment);
|
|
293
|
+
|
|
294
|
+
return new CompositeEnvWrapper(env_, fs);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
void SetGenericOptions(Env* env, bool use_direct_io,
|
|
298
|
+
Options& options) override {
|
|
299
|
+
PrefetchTest::SetGenericOptions(env, use_direct_io, options);
|
|
300
|
+
options.statistics = CreateDBStatistics();
|
|
301
|
+
}
|
|
250
302
|
|
|
303
|
+
void SetBlockBasedTableOptions(
|
|
304
|
+
BlockBasedTableOptions& table_options, bool partition_filters = true,
|
|
305
|
+
uint64_t metadata_block_size =
|
|
306
|
+
BlockBasedTableOptions().metadata_block_size,
|
|
307
|
+
bool use_small_cache = false) {
|
|
308
|
+
table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
|
|
309
|
+
table_options.partition_filters = partition_filters;
|
|
310
|
+
if (table_options.partition_filters) {
|
|
311
|
+
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
|
312
|
+
}
|
|
313
|
+
table_options.metadata_block_size = metadata_block_size;
|
|
314
|
+
|
|
315
|
+
if (use_small_cache) {
|
|
316
|
+
LRUCacheOptions co;
|
|
317
|
+
co.capacity = 1;
|
|
318
|
+
std::shared_ptr<Cache> cache = NewLRUCache(co);
|
|
319
|
+
table_options.block_cache = cache;
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
int64_t GetNumIndexPartition() const {
|
|
324
|
+
int64_t index_partition_counts = 0;
|
|
325
|
+
TablePropertiesCollection all_table_props;
|
|
326
|
+
assert(db_->GetPropertiesOfAllTables(&all_table_props).ok());
|
|
327
|
+
for (const auto& name_and_table_props : all_table_props) {
|
|
328
|
+
const auto& table_props = name_and_table_props.second;
|
|
329
|
+
index_partition_counts += table_props->index_partitions;
|
|
330
|
+
}
|
|
331
|
+
return index_partition_counts;
|
|
332
|
+
}
|
|
333
|
+
};
|
|
334
|
+
|
|
335
|
+
INSTANTIATE_TEST_CASE_P(PrefetchTailTest, PrefetchTailTest,
|
|
336
|
+
::testing::Combine(::testing::Bool(),
|
|
337
|
+
::testing::Bool()));
|
|
338
|
+
|
|
339
|
+
TEST_P(PrefetchTailTest, Basic) {
|
|
340
|
+
std::unique_ptr<Env> env(GetEnv());
|
|
251
341
|
Options options;
|
|
252
|
-
SetGenericOptions(env.get(),
|
|
253
|
-
options.statistics = CreateDBStatistics();
|
|
342
|
+
SetGenericOptions(env.get(), UseDirectIO(), options);
|
|
254
343
|
|
|
255
344
|
BlockBasedTableOptions bbto;
|
|
256
|
-
bbto
|
|
257
|
-
bbto.partition_filters = true;
|
|
258
|
-
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
|
345
|
+
SetBlockBasedTableOptions(bbto);
|
|
259
346
|
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
|
260
347
|
|
|
261
348
|
Status s = TryReopen(options);
|
|
262
|
-
if (
|
|
349
|
+
if (UseDirectIO() && (s.IsNotSupported() || s.IsInvalidArgument())) {
|
|
263
350
|
// If direct IO is not supported, skip the test
|
|
264
351
|
ROCKSDB_GTEST_BYPASS("Direct IO is not supported");
|
|
265
352
|
return;
|
|
@@ -276,7 +363,7 @@ TEST_P(PrefetchTest, BlockBasedTableTailPrefetch) {
|
|
|
276
363
|
HistogramData post_flush_file_read;
|
|
277
364
|
options.statistics->histogramData(FILE_READ_FLUSH_MICROS,
|
|
278
365
|
&post_flush_file_read);
|
|
279
|
-
if (
|
|
366
|
+
if (UseFilePrefetchBuffer()) {
|
|
280
367
|
// `PartitionedFilterBlockReader/PartitionIndexReader::CacheDependencies()`
|
|
281
368
|
// should read from the prefetched tail in file prefetch buffer instead of
|
|
282
369
|
// initiating extra SST reads. Therefore `BlockBasedTable::PrefetchTail()`
|
|
@@ -300,7 +387,7 @@ TEST_P(PrefetchTest, BlockBasedTableTailPrefetch) {
|
|
|
300
387
|
HistogramData post_compaction_file_read;
|
|
301
388
|
options.statistics->histogramData(FILE_READ_COMPACTION_MICROS,
|
|
302
389
|
&post_compaction_file_read);
|
|
303
|
-
if (
|
|
390
|
+
if (UseFilePrefetchBuffer()) {
|
|
304
391
|
// `PartitionedFilterBlockReader/PartitionIndexReader::CacheDependencies()`
|
|
305
392
|
// should read from the prefetched tail in file prefetch buffer instead of
|
|
306
393
|
// initiating extra SST reads.
|
|
@@ -323,6 +410,85 @@ TEST_P(PrefetchTest, BlockBasedTableTailPrefetch) {
|
|
|
323
410
|
Close();
|
|
324
411
|
}
|
|
325
412
|
|
|
413
|
+
TEST_P(PrefetchTailTest, UpgradeToTailSizeInManifest) {
|
|
414
|
+
if (!UseFilePrefetchBuffer()) {
|
|
415
|
+
ROCKSDB_GTEST_BYPASS(
|
|
416
|
+
"Upgrade to tail size in manifest is only relevant when RocksDB file "
|
|
417
|
+
"prefetch buffer is used.");
|
|
418
|
+
}
|
|
419
|
+
if (UseDirectIO()) {
|
|
420
|
+
ROCKSDB_GTEST_BYPASS(
|
|
421
|
+
"To simplify testing logics with setting file's buffer alignment to "
|
|
422
|
+
"be "
|
|
423
|
+
"1, direct IO is required to be disabled.");
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
std::unique_ptr<Env> env(GetEnv(true /* small_buffer_alignment */));
|
|
427
|
+
Options options;
|
|
428
|
+
SetGenericOptions(env.get(), false /* use_direct_io*/, options);
|
|
429
|
+
options.max_open_files = -1;
|
|
430
|
+
options.write_buffer_size = 1024 * 1024;
|
|
431
|
+
|
|
432
|
+
BlockBasedTableOptions table_options;
|
|
433
|
+
SetBlockBasedTableOptions(table_options, false /* partition_filters */,
|
|
434
|
+
1 /* metadata_block_size*/,
|
|
435
|
+
true /* use_small_cache */);
|
|
436
|
+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
437
|
+
|
|
438
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
439
|
+
// To simulate a pre-upgrade DB where file tail size is not recorded in
|
|
440
|
+
// manifest
|
|
441
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
442
|
+
"FileMetaData::FileMetaData", [&](void* arg) {
|
|
443
|
+
FileMetaData* meta = static_cast<FileMetaData*>(arg);
|
|
444
|
+
meta->tail_size = 0;
|
|
445
|
+
});
|
|
446
|
+
|
|
447
|
+
ASSERT_OK(TryReopen(options));
|
|
448
|
+
for (int i = 0; i < 10000; ++i) {
|
|
449
|
+
ASSERT_OK(Put("k" + std::to_string(i), "v"));
|
|
450
|
+
}
|
|
451
|
+
ASSERT_OK(Flush());
|
|
452
|
+
|
|
453
|
+
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
454
|
+
|
|
455
|
+
// To simulate a DB undergoing the upgrade where tail size to prefetch is
|
|
456
|
+
// inferred to be a small number for files with no tail size recorded in
|
|
457
|
+
// manifest.
|
|
458
|
+
// "1" is chosen to be such number so that with `small_buffer_alignment ==
|
|
459
|
+
// true` and `use_small_cache == true`, it would have caused one file read
|
|
460
|
+
// per index partition during db open if the upgrade is done wrong.
|
|
461
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
462
|
+
"BlockBasedTable::Open::TailPrefetchLen", [&](void* arg) {
|
|
463
|
+
std::pair<size_t*, size_t*>* prefetch_off_len_pair =
|
|
464
|
+
static_cast<std::pair<size_t*, size_t*>*>(arg);
|
|
465
|
+
size_t* prefetch_off = prefetch_off_len_pair->first;
|
|
466
|
+
size_t* tail_size = prefetch_off_len_pair->second;
|
|
467
|
+
const size_t file_size = *prefetch_off + *tail_size;
|
|
468
|
+
|
|
469
|
+
*tail_size = 1;
|
|
470
|
+
*prefetch_off = file_size - (*tail_size);
|
|
471
|
+
});
|
|
472
|
+
|
|
473
|
+
ASSERT_OK(TryReopen(options));
|
|
474
|
+
|
|
475
|
+
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
476
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
477
|
+
|
|
478
|
+
HistogramData db_open_file_read;
|
|
479
|
+
options.statistics->histogramData(FILE_READ_DB_OPEN_MICROS,
|
|
480
|
+
&db_open_file_read);
|
|
481
|
+
|
|
482
|
+
int64_t num_index_partition = GetNumIndexPartition();
|
|
483
|
+
// If the upgrade is done right, db open will prefetch all the index
|
|
484
|
+
// partitions at once, instead of doing one read per partition.
|
|
485
|
+
// That is, together with `metadata_block_size == 1`, there will be more
|
|
486
|
+
// index partitions than number of non index partitions reads.
|
|
487
|
+
ASSERT_LT(db_open_file_read.count, num_index_partition);
|
|
488
|
+
|
|
489
|
+
Close();
|
|
490
|
+
}
|
|
491
|
+
|
|
326
492
|
// This test verifies BlockBasedTableOptions.max_auto_readahead_size is
|
|
327
493
|
// configured dynamically.
|
|
328
494
|
TEST_P(PrefetchTest, ConfigureAutoMaxReadaheadSize) {
|
|
@@ -385,7 +551,7 @@ TEST_P(PrefetchTest, ConfigureAutoMaxReadaheadSize) {
|
|
|
385
551
|
}
|
|
386
552
|
Close();
|
|
387
553
|
std::vector<int> buff_prefectch_level_count = {0, 0, 0};
|
|
388
|
-
TryReopen(options);
|
|
554
|
+
ASSERT_OK(TryReopen(options));
|
|
389
555
|
{
|
|
390
556
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
391
557
|
fs->ClearPrefetchCount();
|
|
@@ -513,7 +679,7 @@ TEST_P(PrefetchTest, ConfigureInternalAutoReadaheadSize) {
|
|
|
513
679
|
}
|
|
514
680
|
Close();
|
|
515
681
|
|
|
516
|
-
TryReopen(options);
|
|
682
|
+
ASSERT_OK(TryReopen(options));
|
|
517
683
|
{
|
|
518
684
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
519
685
|
fs->ClearPrefetchCount();
|
|
@@ -530,8 +696,8 @@ TEST_P(PrefetchTest, ConfigureInternalAutoReadaheadSize) {
|
|
|
530
696
|
"{initial_auto_readahead_size=0;}"}}));
|
|
531
697
|
break;
|
|
532
698
|
case 1:
|
|
533
|
-
// intial_auto_readahead_size and max_auto_readahead_size are set
|
|
534
|
-
// so readahead_size remains same.
|
|
699
|
+
// intial_auto_readahead_size and max_auto_readahead_size are set
|
|
700
|
+
// same so readahead_size remains same.
|
|
535
701
|
ASSERT_OK(db_->SetOptions({{"block_based_table_factory",
|
|
536
702
|
"{initial_auto_readahead_size=4096;max_"
|
|
537
703
|
"auto_readahead_size=4096;}"}}));
|
|
@@ -628,7 +794,7 @@ TEST_P(PrefetchTest, ConfigureNumFilesReadsForReadaheadSize) {
|
|
|
628
794
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest));
|
|
629
795
|
|
|
630
796
|
Close();
|
|
631
|
-
TryReopen(options);
|
|
797
|
+
ASSERT_OK(TryReopen(options));
|
|
632
798
|
|
|
633
799
|
fs->ClearPrefetchCount();
|
|
634
800
|
buff_prefetch_count = 0;
|
|
@@ -638,8 +804,9 @@ TEST_P(PrefetchTest, ConfigureNumFilesReadsForReadaheadSize) {
|
|
|
638
804
|
/*
|
|
639
805
|
* Reseek keys from sequential Data Blocks within same partitioned
|
|
640
806
|
* index. It will prefetch the data block at the first seek since
|
|
641
|
-
* num_file_reads_for_auto_readahead = 0. Data Block size is nearly 4076
|
|
642
|
-
* readahead will fetch 8 * 1024 data more initially (2 more data
|
|
807
|
+
* num_file_reads_for_auto_readahead = 0. Data Block size is nearly 4076
|
|
808
|
+
* so readahead will fetch 8 * 1024 data more initially (2 more data
|
|
809
|
+
* blocks).
|
|
643
810
|
*/
|
|
644
811
|
iter->Seek(BuildKey(0)); // Prefetch data + index block since
|
|
645
812
|
// num_file_reads_for_auto_readahead = 0.
|
|
@@ -737,8 +904,8 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
|
|
|
737
904
|
/*
|
|
738
905
|
* Reseek keys from sequential Data Blocks within same partitioned
|
|
739
906
|
* index. After 2 sequential reads it will prefetch the data block.
|
|
740
|
-
* Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data
|
|
741
|
-
* initially (2 more data blocks).
|
|
907
|
+
* Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data
|
|
908
|
+
* more initially (2 more data blocks).
|
|
742
909
|
*/
|
|
743
910
|
iter->Seek(BuildKey(0));
|
|
744
911
|
ASSERT_TRUE(iter->Valid());
|
|
@@ -815,9 +982,9 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
|
|
|
815
982
|
{
|
|
816
983
|
/*
|
|
817
984
|
* Reseek keys from sequential data blocks to set implicit auto readahead
|
|
818
|
-
* and prefetch data but after that iterate over different (non
|
|
819
|
-
* data blocks which won't prefetch any data further. So
|
|
820
|
-
* will be 1 for the first one.
|
|
985
|
+
* and prefetch data but after that iterate over different (non
|
|
986
|
+
* sequential) data blocks which won't prefetch any data further. So
|
|
987
|
+
* buff_prefetch_count will be 1 for the first one.
|
|
821
988
|
*/
|
|
822
989
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
823
990
|
iter->Seek(BuildKey(0));
|
|
@@ -844,8 +1011,8 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
|
|
|
844
1011
|
buff_prefetch_count = 0;
|
|
845
1012
|
}
|
|
846
1013
|
|
|
847
|
-
// Read sequentially to confirm readahead_size is reset to initial value
|
|
848
|
-
// more data blocks)
|
|
1014
|
+
// Read sequentially to confirm readahead_size is reset to initial value
|
|
1015
|
+
// (2 more data blocks)
|
|
849
1016
|
iter->Seek(BuildKey(1011));
|
|
850
1017
|
ASSERT_TRUE(iter->Valid());
|
|
851
1018
|
iter->Seek(BuildKey(1015));
|
|
@@ -895,8 +1062,8 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
|
|
|
895
1062
|
}
|
|
896
1063
|
{
|
|
897
1064
|
/*
|
|
898
|
-
* Reseek over different keys from different blocks. buff_prefetch_count
|
|
899
|
-
* set 0.
|
|
1065
|
+
* Reseek over different keys from different blocks. buff_prefetch_count
|
|
1066
|
+
* is set 0.
|
|
900
1067
|
*/
|
|
901
1068
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
902
1069
|
int i = 0;
|
|
@@ -1000,8 +1167,8 @@ TEST_P(PrefetchTest, PrefetchWhenReseekwithCache) {
|
|
|
1000
1167
|
/*
|
|
1001
1168
|
* Reseek keys from sequential Data Blocks within same partitioned
|
|
1002
1169
|
* index. After 2 sequential reads it will prefetch the data block.
|
|
1003
|
-
* Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data
|
|
1004
|
-
* initially (2 more data blocks).
|
|
1170
|
+
* Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data
|
|
1171
|
+
* more initially (2 more data blocks).
|
|
1005
1172
|
*/
|
|
1006
1173
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
1007
1174
|
// Warm up the cache
|
|
@@ -1028,8 +1195,8 @@ TEST_P(PrefetchTest, PrefetchWhenReseekwithCache) {
|
|
|
1028
1195
|
ASSERT_TRUE(iter->Valid());
|
|
1029
1196
|
iter->Seek(BuildKey(1004)); // Prefetch data (not in cache).
|
|
1030
1197
|
ASSERT_TRUE(iter->Valid());
|
|
1031
|
-
// Missed one sequential block but next is in already in buffer so
|
|
1032
|
-
// will not be reset.
|
|
1198
|
+
// Missed one sequential block but next is in already in buffer so
|
|
1199
|
+
// readahead will not be reset.
|
|
1033
1200
|
iter->Seek(BuildKey(1011));
|
|
1034
1201
|
ASSERT_TRUE(iter->Valid());
|
|
1035
1202
|
// Prefetch data but blocks are in cache so no prefetch and reset.
|
|
@@ -1164,10 +1331,14 @@ TEST_P(PrefetchTest, DBIterLevelReadAhead) {
|
|
|
1164
1331
|
// This test verifies the functionality of ReadOptions.adaptive_readahead when
|
|
1165
1332
|
// async_io is enabled.
|
|
1166
1333
|
TEST_P(PrefetchTest, DBIterLevelReadAheadWithAsyncIO) {
|
|
1334
|
+
if (mem_env_ || encrypted_env_) {
|
|
1335
|
+
ROCKSDB_GTEST_BYPASS("Test requires non-mem or non-encrypted environment");
|
|
1336
|
+
return;
|
|
1337
|
+
}
|
|
1167
1338
|
const int kNumKeys = 1000;
|
|
1168
1339
|
// Set options
|
|
1169
1340
|
std::shared_ptr<MockFS> fs =
|
|
1170
|
-
std::make_shared<MockFS>(
|
|
1341
|
+
std::make_shared<MockFS>(FileSystem::Default(), false);
|
|
1171
1342
|
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
|
|
1172
1343
|
|
|
1173
1344
|
bool use_direct_io = std::get<0>(GetParam());
|
|
@@ -1201,16 +1372,26 @@ TEST_P(PrefetchTest, DBIterLevelReadAheadWithAsyncIO) {
|
|
|
1201
1372
|
}
|
|
1202
1373
|
MoveFilesToLevel(2);
|
|
1203
1374
|
int buff_async_prefetch_count = 0;
|
|
1375
|
+
int buff_prefetch_count = 0;
|
|
1204
1376
|
int readahead_carry_over_count = 0;
|
|
1205
1377
|
int num_sst_files = NumTableFilesAtLevel(2);
|
|
1206
1378
|
size_t current_readahead_size = 0;
|
|
1379
|
+
bool read_async_called = false;
|
|
1207
1380
|
|
|
1208
1381
|
// Test - Iterate over the keys sequentially.
|
|
1209
1382
|
{
|
|
1383
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
1384
|
+
"FilePrefetchBuffer::Prefetch:Start",
|
|
1385
|
+
[&](void*) { buff_prefetch_count++; });
|
|
1386
|
+
|
|
1210
1387
|
SyncPoint::GetInstance()->SetCallBack(
|
|
1211
1388
|
"FilePrefetchBuffer::PrefetchAsyncInternal:Start",
|
|
1212
1389
|
[&](void*) { buff_async_prefetch_count++; });
|
|
1213
1390
|
|
|
1391
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
1392
|
+
"UpdateResults::io_uring_result",
|
|
1393
|
+
[&](void* /*arg*/) { read_async_called = true; });
|
|
1394
|
+
|
|
1214
1395
|
// The callback checks, since reads are sequential, readahead_size doesn't
|
|
1215
1396
|
// start from 8KB when iterator moves to next file and its called
|
|
1216
1397
|
// num_sst_files-1 times (excluding for first file).
|
|
@@ -1253,15 +1434,18 @@ TEST_P(PrefetchTest, DBIterLevelReadAheadWithAsyncIO) {
|
|
|
1253
1434
|
} else {
|
|
1254
1435
|
ASSERT_EQ(readahead_carry_over_count, 0);
|
|
1255
1436
|
}
|
|
1256
|
-
ASSERT_GT(buff_async_prefetch_count, 0);
|
|
1257
1437
|
|
|
1258
1438
|
// Check stats to make sure async prefetch is done.
|
|
1259
1439
|
{
|
|
1260
1440
|
HistogramData async_read_bytes;
|
|
1261
1441
|
options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes);
|
|
1262
|
-
|
|
1442
|
+
// Not all platforms support iouring. In that case, ReadAsync in posix
|
|
1443
|
+
// won't submit async requests.
|
|
1444
|
+
if (read_async_called) {
|
|
1445
|
+
ASSERT_GT(buff_async_prefetch_count, 0);
|
|
1263
1446
|
ASSERT_GT(async_read_bytes.count, 0);
|
|
1264
1447
|
} else {
|
|
1448
|
+
ASSERT_GT(buff_prefetch_count, 0);
|
|
1265
1449
|
ASSERT_EQ(async_read_bytes.count, 0);
|
|
1266
1450
|
}
|
|
1267
1451
|
}
|
|
@@ -1294,6 +1478,7 @@ TEST_P(PrefetchTest, DBIterAsyncIONoIOUring) {
|
|
|
1294
1478
|
Status s = TryReopen(options);
|
|
1295
1479
|
if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) {
|
|
1296
1480
|
// If direct IO is not supported, skip the test
|
|
1481
|
+
enable_io_uring = true;
|
|
1297
1482
|
return;
|
|
1298
1483
|
} else {
|
|
1299
1484
|
ASSERT_OK(s);
|
|
@@ -1375,7 +1560,8 @@ class PrefetchTest1 : public DBTestBase,
|
|
|
1375
1560
|
public:
|
|
1376
1561
|
PrefetchTest1() : DBTestBase("prefetch_test1", true) {}
|
|
1377
1562
|
|
|
1378
|
-
void SetGenericOptions(Env* env, bool use_direct_io,
|
|
1563
|
+
virtual void SetGenericOptions(Env* env, bool use_direct_io,
|
|
1564
|
+
Options& options) {
|
|
1379
1565
|
options = CurrentOptions();
|
|
1380
1566
|
options.write_buffer_size = 1024;
|
|
1381
1567
|
options.create_if_missing = true;
|
|
@@ -1399,6 +1585,106 @@ class PrefetchTest1 : public DBTestBase,
|
|
|
1399
1585
|
|
|
1400
1586
|
INSTANTIATE_TEST_CASE_P(PrefetchTest1, PrefetchTest1, ::testing::Bool());
|
|
1401
1587
|
|
|
1588
|
+
TEST_P(PrefetchTest1, SeekWithExtraPrefetchAsyncIO) {
|
|
1589
|
+
const int kNumKeys = 2000;
|
|
1590
|
+
// Set options
|
|
1591
|
+
std::shared_ptr<MockFS> fs =
|
|
1592
|
+
std::make_shared<MockFS>(env_->GetFileSystem(), false);
|
|
1593
|
+
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
|
|
1594
|
+
|
|
1595
|
+
Options options;
|
|
1596
|
+
SetGenericOptions(env.get(), GetParam(), options);
|
|
1597
|
+
options.statistics = CreateDBStatistics();
|
|
1598
|
+
BlockBasedTableOptions table_options;
|
|
1599
|
+
SetBlockBasedTableOptions(table_options);
|
|
1600
|
+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
1601
|
+
|
|
1602
|
+
Status s = TryReopen(options);
|
|
1603
|
+
if (GetParam() && (s.IsNotSupported() || s.IsInvalidArgument())) {
|
|
1604
|
+
// If direct IO is not supported, skip the test
|
|
1605
|
+
return;
|
|
1606
|
+
} else {
|
|
1607
|
+
ASSERT_OK(s);
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
WriteBatch batch;
|
|
1611
|
+
Random rnd(309);
|
|
1612
|
+
for (int i = 0; i < kNumKeys; i++) {
|
|
1613
|
+
ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000)));
|
|
1614
|
+
}
|
|
1615
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
1616
|
+
|
|
1617
|
+
std::string start_key = BuildKey(0);
|
|
1618
|
+
std::string end_key = BuildKey(kNumKeys - 1);
|
|
1619
|
+
Slice least(start_key.data(), start_key.size());
|
|
1620
|
+
Slice greatest(end_key.data(), end_key.size());
|
|
1621
|
+
|
|
1622
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest));
|
|
1623
|
+
Close();
|
|
1624
|
+
|
|
1625
|
+
for (size_t i = 0; i < 3; i++) {
|
|
1626
|
+
table_options.num_file_reads_for_auto_readahead = i;
|
|
1627
|
+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
1628
|
+
|
|
1629
|
+
s = TryReopen(options);
|
|
1630
|
+
ASSERT_OK(s);
|
|
1631
|
+
|
|
1632
|
+
int buff_prefetch_count = 0;
|
|
1633
|
+
int extra_prefetch_buff_cnt = 0;
|
|
1634
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
1635
|
+
"FilePrefetchBuffer::PrefetchAsync:ExtraPrefetching",
|
|
1636
|
+
[&](void*) { extra_prefetch_buff_cnt++; });
|
|
1637
|
+
|
|
1638
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
1639
|
+
"FilePrefetchBuffer::PrefetchAsyncInternal:Start",
|
|
1640
|
+
[&](void*) { buff_prefetch_count++; });
|
|
1641
|
+
|
|
1642
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
1643
|
+
|
|
1644
|
+
ReadOptions ro;
|
|
1645
|
+
ro.async_io = true;
|
|
1646
|
+
{
|
|
1647
|
+
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
|
|
1648
|
+
// First Seek
|
|
1649
|
+
iter->Seek(BuildKey(
|
|
1650
|
+
0)); // Prefetch data on seek because of seek parallelization.
|
|
1651
|
+
ASSERT_TRUE(iter->Valid());
|
|
1652
|
+
|
|
1653
|
+
// Do extra prefetching in Seek only if
|
|
1654
|
+
// num_file_reads_for_auto_readahead = 0.
|
|
1655
|
+
ASSERT_EQ(extra_prefetch_buff_cnt, (i == 0 ? 1 : 0));
|
|
1656
|
+
// buff_prefetch_count is 2 because of index block when
|
|
1657
|
+
// num_file_reads_for_auto_readahead = 0.
|
|
1658
|
+
// If num_file_reads_for_auto_readahead > 0, index block isn't
|
|
1659
|
+
// prefetched.
|
|
1660
|
+
ASSERT_EQ(buff_prefetch_count, i == 0 ? 2 : 1);
|
|
1661
|
+
|
|
1662
|
+
extra_prefetch_buff_cnt = 0;
|
|
1663
|
+
buff_prefetch_count = 0;
|
|
1664
|
+
// Reset all values of FilePrefetchBuffer on new seek.
|
|
1665
|
+
iter->Seek(
|
|
1666
|
+
BuildKey(22)); // Prefetch data because of seek parallelization.
|
|
1667
|
+
ASSERT_TRUE(iter->Valid());
|
|
1668
|
+
// Do extra prefetching in Seek only if
|
|
1669
|
+
// num_file_reads_for_auto_readahead = 0.
|
|
1670
|
+
ASSERT_EQ(extra_prefetch_buff_cnt, (i == 0 ? 1 : 0));
|
|
1671
|
+
ASSERT_EQ(buff_prefetch_count, 1);
|
|
1672
|
+
|
|
1673
|
+
extra_prefetch_buff_cnt = 0;
|
|
1674
|
+
buff_prefetch_count = 0;
|
|
1675
|
+
// Reset all values of FilePrefetchBuffer on new seek.
|
|
1676
|
+
iter->Seek(
|
|
1677
|
+
BuildKey(33)); // Prefetch data because of seek parallelization.
|
|
1678
|
+
ASSERT_TRUE(iter->Valid());
|
|
1679
|
+
// Do extra prefetching in Seek only if
|
|
1680
|
+
// num_file_reads_for_auto_readahead = 0.
|
|
1681
|
+
ASSERT_EQ(extra_prefetch_buff_cnt, (i == 0 ? 1 : 0));
|
|
1682
|
+
ASSERT_EQ(buff_prefetch_count, 1);
|
|
1683
|
+
}
|
|
1684
|
+
Close();
|
|
1685
|
+
}
|
|
1686
|
+
}
|
|
1687
|
+
|
|
1402
1688
|
// This test verifies the functionality of ReadOptions.adaptive_readahead when
|
|
1403
1689
|
// reads are not sequential.
|
|
1404
1690
|
TEST_P(PrefetchTest1, NonSequentialReadsWithAdaptiveReadahead) {
|
|
@@ -1482,8 +1768,8 @@ TEST_P(PrefetchTest1, NonSequentialReadsWithAdaptiveReadahead) {
|
|
|
1482
1768
|
Close();
|
|
1483
1769
|
}
|
|
1484
1770
|
|
|
1485
|
-
// This test verifies the functionality of adaptive_readaheadsize with cache
|
|
1486
|
-
// if block is found in cache, decrease the readahead_size if
|
|
1771
|
+
// This test verifies the functionality of adaptive_readaheadsize with cache
|
|
1772
|
+
// and if block is found in cache, decrease the readahead_size if
|
|
1487
1773
|
// - its enabled internally by RocksDB (implicit_auto_readahead_) and,
|
|
1488
1774
|
// - readahead_size is greater than 0 and,
|
|
1489
1775
|
// - the block would have called prefetch API if not found in cache for
|
|
@@ -1605,8 +1891,8 @@ TEST_P(PrefetchTest1, DecreaseReadAheadIfInCache) {
|
|
|
1605
1891
|
ASSERT_TRUE(iter->Valid());
|
|
1606
1892
|
|
|
1607
1893
|
// Prefetch data (not in buffer) but found in cache. So decrease
|
|
1608
|
-
// readahead_size. Since it will 0 after decrementing so readahead_size
|
|
1609
|
-
// be set to initial value.
|
|
1894
|
+
// readahead_size. Since it will 0 after decrementing so readahead_size
|
|
1895
|
+
// will be set to initial value.
|
|
1610
1896
|
iter->Seek(BuildKey(1019));
|
|
1611
1897
|
ASSERT_TRUE(iter->Valid());
|
|
1612
1898
|
expected_current_readahead_size = std::max(
|
|
@@ -1629,10 +1915,14 @@ TEST_P(PrefetchTest1, DecreaseReadAheadIfInCache) {
|
|
|
1629
1915
|
// This test verifies the basic functionality of seek parallelization for
|
|
1630
1916
|
// async_io.
|
|
1631
1917
|
TEST_P(PrefetchTest1, SeekParallelizationTest) {
|
|
1918
|
+
if (mem_env_ || encrypted_env_) {
|
|
1919
|
+
ROCKSDB_GTEST_BYPASS("Test requires non-mem or non-encrypted environment");
|
|
1920
|
+
return;
|
|
1921
|
+
}
|
|
1632
1922
|
const int kNumKeys = 2000;
|
|
1633
1923
|
// Set options
|
|
1634
|
-
std::shared_ptr<MockFS> fs =
|
|
1635
|
-
|
|
1924
|
+
std::shared_ptr<MockFS> fs = std::make_shared<MockFS>(
|
|
1925
|
+
FileSystem::Default(), /*support_prefetch=*/false);
|
|
1636
1926
|
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
|
|
1637
1927
|
|
|
1638
1928
|
Options options;
|
|
@@ -1665,10 +1955,19 @@ TEST_P(PrefetchTest1, SeekParallelizationTest) {
|
|
|
1665
1955
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest));
|
|
1666
1956
|
|
|
1667
1957
|
int buff_prefetch_count = 0;
|
|
1958
|
+
int buff_prefetch_async_count = 0;
|
|
1668
1959
|
|
|
1669
1960
|
SyncPoint::GetInstance()->SetCallBack(
|
|
1670
1961
|
"FilePrefetchBuffer::PrefetchAsyncInternal:Start",
|
|
1671
|
-
[&](void*) {
|
|
1962
|
+
[&](void*) { buff_prefetch_async_count++; });
|
|
1963
|
+
|
|
1964
|
+
SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start",
|
|
1965
|
+
[&](void*) { buff_prefetch_count++; });
|
|
1966
|
+
|
|
1967
|
+
bool read_async_called = false;
|
|
1968
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
1969
|
+
"UpdateResults::io_uring_result",
|
|
1970
|
+
[&](void* /*arg*/) { read_async_called = true; });
|
|
1672
1971
|
|
|
1673
1972
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
1674
1973
|
ReadOptions ro;
|
|
@@ -1703,17 +2002,276 @@ TEST_P(PrefetchTest1, SeekParallelizationTest) {
|
|
|
1703
2002
|
iter->Next();
|
|
1704
2003
|
ASSERT_TRUE(iter->Valid());
|
|
1705
2004
|
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
//
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
2005
|
+
HistogramData async_read_bytes;
|
|
2006
|
+
options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes);
|
|
2007
|
+
// not all platforms support io_uring. In that case it'll fallback to
|
|
2008
|
+
// normal prefetching without async_io.
|
|
2009
|
+
if (read_async_called) {
|
|
2010
|
+
ASSERT_EQ(buff_prefetch_async_count, 2);
|
|
1712
2011
|
ASSERT_GT(async_read_bytes.count, 0);
|
|
1713
2012
|
ASSERT_GT(get_perf_context()->number_async_seek, 0);
|
|
2013
|
+
} else {
|
|
2014
|
+
ASSERT_EQ(buff_prefetch_count, 1);
|
|
2015
|
+
}
|
|
2016
|
+
}
|
|
2017
|
+
Close();
|
|
2018
|
+
}
|
|
2019
|
+
|
|
2020
|
+
// This test checks if readahead_size is trimmed when upper_bound is reached.
|
|
2021
|
+
// It tests with different combinations of async_io disabled/enabled,
|
|
2022
|
+
// readahead_size (implicit and explicit), and num_file_reads_for_auto_readahead
|
|
2023
|
+
// from 0 to 2.
|
|
2024
|
+
TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBound) {
|
|
2025
|
+
if (mem_env_ || encrypted_env_) {
|
|
2026
|
+
ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment");
|
|
2027
|
+
return;
|
|
2028
|
+
}
|
|
2029
|
+
|
|
2030
|
+
// First param is if the mockFS support_prefetch or not
|
|
2031
|
+
std::shared_ptr<MockFS> fs =
|
|
2032
|
+
std::make_shared<MockFS>(FileSystem::Default(), false);
|
|
2033
|
+
|
|
2034
|
+
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
|
|
2035
|
+
Options options;
|
|
2036
|
+
SetGenericOptions(env.get(), /*use_direct_io=*/false, options);
|
|
2037
|
+
options.statistics = CreateDBStatistics();
|
|
2038
|
+
BlockBasedTableOptions table_options;
|
|
2039
|
+
SetBlockBasedTableOptions(table_options);
|
|
2040
|
+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
2041
|
+
|
|
2042
|
+
Status s = TryReopen(options);
|
|
2043
|
+
ASSERT_OK(s);
|
|
2044
|
+
|
|
2045
|
+
Random rnd(309);
|
|
2046
|
+
WriteBatch batch;
|
|
2047
|
+
|
|
2048
|
+
for (int i = 0; i < 26; i++) {
|
|
2049
|
+
std::string key = "my_key_";
|
|
2050
|
+
|
|
2051
|
+
for (int j = 0; j < 10; j++) {
|
|
2052
|
+
key += char('a' + i);
|
|
2053
|
+
ASSERT_OK(batch.Put(key, rnd.RandomString(1000)));
|
|
1714
2054
|
}
|
|
2055
|
+
}
|
|
2056
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
1715
2057
|
|
|
2058
|
+
std::string start_key = "my_key_a";
|
|
2059
|
+
|
|
2060
|
+
std::string end_key = "my_key_";
|
|
2061
|
+
for (int j = 0; j < 10; j++) {
|
|
2062
|
+
end_key += char('a' + 25);
|
|
2063
|
+
}
|
|
2064
|
+
|
|
2065
|
+
Slice least(start_key.data(), start_key.size());
|
|
2066
|
+
Slice greatest(end_key.data(), end_key.size());
|
|
2067
|
+
|
|
2068
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest));
|
|
2069
|
+
|
|
2070
|
+
int buff_prefetch_count = 0;
|
|
2071
|
+
|
|
2072
|
+
// Try with different num_file_reads_for_auto_readahead from 0 to 3.
|
|
2073
|
+
for (size_t i = 0; i < 3; i++) {
|
|
2074
|
+
table_options.num_file_reads_for_auto_readahead = i;
|
|
2075
|
+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
2076
|
+
|
|
2077
|
+
s = TryReopen(options);
|
|
2078
|
+
ASSERT_OK(s);
|
|
2079
|
+
|
|
2080
|
+
int buff_count_with_tuning = 0, buff_count_without_tuning = 0;
|
|
2081
|
+
int keys_with_tuning = 0, keys_without_tuning = 0;
|
|
1716
2082
|
buff_prefetch_count = 0;
|
|
2083
|
+
|
|
2084
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
2085
|
+
"FilePrefetchBuffer::Prefetch:Start",
|
|
2086
|
+
[&](void*) { buff_prefetch_count++; });
|
|
2087
|
+
|
|
2088
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
2089
|
+
"FilePrefetchBuffer::PrefetchAsyncInternal:Start",
|
|
2090
|
+
[&](void*) { buff_prefetch_count++; });
|
|
2091
|
+
|
|
2092
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
2093
|
+
|
|
2094
|
+
ReadOptions ropts;
|
|
2095
|
+
if (std::get<0>(GetParam())) {
|
|
2096
|
+
ropts.readahead_size = 32768;
|
|
2097
|
+
}
|
|
2098
|
+
if (std::get<1>(GetParam())) {
|
|
2099
|
+
ropts.async_io = true;
|
|
2100
|
+
}
|
|
2101
|
+
|
|
2102
|
+
Slice ub = Slice("my_key_uuu");
|
|
2103
|
+
ropts.iterate_upper_bound = &ub;
|
|
2104
|
+
Slice seek_key = Slice("my_key_aaa");
|
|
2105
|
+
|
|
2106
|
+
// With tuning readahead_size.
|
|
2107
|
+
{
|
|
2108
|
+
ASSERT_OK(options.statistics->Reset());
|
|
2109
|
+
ropts.auto_readahead_size = true;
|
|
2110
|
+
|
|
2111
|
+
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ropts));
|
|
2112
|
+
|
|
2113
|
+
iter->Seek(seek_key);
|
|
2114
|
+
|
|
2115
|
+
while (iter->Valid()) {
|
|
2116
|
+
keys_with_tuning++;
|
|
2117
|
+
iter->Next();
|
|
2118
|
+
}
|
|
2119
|
+
|
|
2120
|
+
uint64_t readhahead_trimmed =
|
|
2121
|
+
options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED);
|
|
2122
|
+
ASSERT_GT(readhahead_trimmed, 0);
|
|
2123
|
+
buff_count_with_tuning = buff_prefetch_count;
|
|
2124
|
+
}
|
|
2125
|
+
|
|
2126
|
+
// Without tuning readahead_size
|
|
2127
|
+
{
|
|
2128
|
+
buff_prefetch_count = 0;
|
|
2129
|
+
ASSERT_OK(options.statistics->Reset());
|
|
2130
|
+
ropts.auto_readahead_size = false;
|
|
2131
|
+
|
|
2132
|
+
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ropts));
|
|
2133
|
+
|
|
2134
|
+
iter->Seek(seek_key);
|
|
2135
|
+
|
|
2136
|
+
while (iter->Valid()) {
|
|
2137
|
+
keys_without_tuning++;
|
|
2138
|
+
iter->Next();
|
|
2139
|
+
}
|
|
2140
|
+
buff_count_without_tuning = buff_prefetch_count;
|
|
2141
|
+
uint64_t readhahead_trimmed =
|
|
2142
|
+
options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED);
|
|
2143
|
+
ASSERT_EQ(readhahead_trimmed, 0);
|
|
2144
|
+
}
|
|
2145
|
+
|
|
2146
|
+
{
|
|
2147
|
+
// Verify results with and without tuning.
|
|
2148
|
+
if (std::get<1>(GetParam())) {
|
|
2149
|
+
// In case of async_io.
|
|
2150
|
+
ASSERT_GE(buff_count_with_tuning, buff_count_without_tuning);
|
|
2151
|
+
} else {
|
|
2152
|
+
ASSERT_EQ(buff_count_without_tuning, buff_count_with_tuning);
|
|
2153
|
+
}
|
|
2154
|
+
// Prefetching should happen.
|
|
2155
|
+
ASSERT_GT(buff_count_without_tuning, 0);
|
|
2156
|
+
ASSERT_GT(buff_count_with_tuning, 0);
|
|
2157
|
+
// No of keys should be equal.
|
|
2158
|
+
ASSERT_EQ(keys_without_tuning, keys_with_tuning);
|
|
2159
|
+
}
|
|
2160
|
+
Close();
|
|
2161
|
+
}
|
|
2162
|
+
}
|
|
2163
|
+
|
|
2164
|
+
// This test checks if readahead_size is trimmed when upper_bound is reached
|
|
2165
|
+
// during Seek in async_io and it goes for polling without any extra
|
|
2166
|
+
// prefetching.
|
|
2167
|
+
TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBoundSeekOnly) {
|
|
2168
|
+
if (mem_env_ || encrypted_env_) {
|
|
2169
|
+
ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment");
|
|
2170
|
+
return;
|
|
2171
|
+
}
|
|
2172
|
+
|
|
2173
|
+
// First param is if the mockFS support_prefetch or not
|
|
2174
|
+
std::shared_ptr<MockFS> fs =
|
|
2175
|
+
std::make_shared<MockFS>(FileSystem::Default(), false);
|
|
2176
|
+
|
|
2177
|
+
bool use_direct_io = false;
|
|
2178
|
+
if (std::get<0>(GetParam())) {
|
|
2179
|
+
use_direct_io = true;
|
|
2180
|
+
}
|
|
2181
|
+
|
|
2182
|
+
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
|
|
2183
|
+
Options options;
|
|
2184
|
+
SetGenericOptions(env.get(), use_direct_io, options);
|
|
2185
|
+
options.statistics = CreateDBStatistics();
|
|
2186
|
+
BlockBasedTableOptions table_options;
|
|
2187
|
+
SetBlockBasedTableOptions(table_options);
|
|
2188
|
+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
2189
|
+
|
|
2190
|
+
Status s = TryReopen(options);
|
|
2191
|
+
if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) {
|
|
2192
|
+
// If direct IO is not supported, skip the test
|
|
2193
|
+
return;
|
|
2194
|
+
} else {
|
|
2195
|
+
ASSERT_OK(s);
|
|
2196
|
+
}
|
|
2197
|
+
|
|
2198
|
+
Random rnd(309);
|
|
2199
|
+
WriteBatch batch;
|
|
2200
|
+
|
|
2201
|
+
for (int i = 0; i < 26; i++) {
|
|
2202
|
+
std::string key = "my_key_";
|
|
2203
|
+
|
|
2204
|
+
for (int j = 0; j < 10; j++) {
|
|
2205
|
+
key += char('a' + i);
|
|
2206
|
+
ASSERT_OK(batch.Put(key, rnd.RandomString(1000)));
|
|
2207
|
+
}
|
|
2208
|
+
}
|
|
2209
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
2210
|
+
|
|
2211
|
+
std::string start_key = "my_key_a";
|
|
2212
|
+
|
|
2213
|
+
std::string end_key = "my_key_";
|
|
2214
|
+
for (int j = 0; j < 10; j++) {
|
|
2215
|
+
end_key += char('a' + 25);
|
|
2216
|
+
}
|
|
2217
|
+
|
|
2218
|
+
Slice least(start_key.data(), start_key.size());
|
|
2219
|
+
Slice greatest(end_key.data(), end_key.size());
|
|
2220
|
+
|
|
2221
|
+
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest));
|
|
2222
|
+
|
|
2223
|
+
s = TryReopen(options);
|
|
2224
|
+
ASSERT_OK(s);
|
|
2225
|
+
|
|
2226
|
+
int buff_count_with_tuning = 0;
|
|
2227
|
+
|
|
2228
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
2229
|
+
"FilePrefetchBuffer::PrefetchAsyncInternal:Start",
|
|
2230
|
+
[&](void*) { buff_count_with_tuning++; });
|
|
2231
|
+
|
|
2232
|
+
bool read_async_called = false;
|
|
2233
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
2234
|
+
"UpdateResults::io_uring_result",
|
|
2235
|
+
[&](void* /*arg*/) { read_async_called = true; });
|
|
2236
|
+
|
|
2237
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
2238
|
+
|
|
2239
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
2240
|
+
|
|
2241
|
+
ReadOptions ropts;
|
|
2242
|
+
if (std::get<1>(GetParam())) {
|
|
2243
|
+
ropts.readahead_size = 32768;
|
|
2244
|
+
}
|
|
2245
|
+
ropts.async_io = true;
|
|
2246
|
+
|
|
2247
|
+
Slice ub = Slice("my_key_aaa");
|
|
2248
|
+
ropts.iterate_upper_bound = &ub;
|
|
2249
|
+
Slice seek_key = Slice("my_key_aaa");
|
|
2250
|
+
|
|
2251
|
+
// With tuning readahead_size.
|
|
2252
|
+
{
|
|
2253
|
+
ASSERT_OK(options.statistics->Reset());
|
|
2254
|
+
ropts.auto_readahead_size = true;
|
|
2255
|
+
|
|
2256
|
+
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ropts));
|
|
2257
|
+
|
|
2258
|
+
iter->Seek(seek_key);
|
|
2259
|
+
|
|
2260
|
+
ASSERT_OK(iter->status());
|
|
2261
|
+
|
|
2262
|
+
// Verify results.
|
|
2263
|
+
uint64_t readhahead_trimmed =
|
|
2264
|
+
options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED);
|
|
2265
|
+
// Readahead got trimmed.
|
|
2266
|
+
if (read_async_called) {
|
|
2267
|
+
ASSERT_GT(readhahead_trimmed, 0);
|
|
2268
|
+
// Seek called PrefetchAsync to poll the data.
|
|
2269
|
+
ASSERT_EQ(1, buff_count_with_tuning);
|
|
2270
|
+
} else {
|
|
2271
|
+
// async_io disabled.
|
|
2272
|
+
ASSERT_GE(readhahead_trimmed, 0);
|
|
2273
|
+
ASSERT_EQ(0, buff_count_with_tuning);
|
|
2274
|
+
}
|
|
1717
2275
|
}
|
|
1718
2276
|
Close();
|
|
1719
2277
|
}
|
|
@@ -2294,7 +2852,7 @@ TEST_F(FilePrefetchBufferTest, SeekWithBlockCacheHit) {
|
|
|
2294
2852
|
std::unique_ptr<RandomAccessFileReader> r;
|
|
2295
2853
|
Read(fname, opts, &r);
|
|
2296
2854
|
|
|
2297
|
-
FilePrefetchBuffer fpb(16384, 16384, true, false, false, 0, 0, fs());
|
|
2855
|
+
FilePrefetchBuffer fpb(16384, 16384, true, false, false, 0, 0, 0, fs());
|
|
2298
2856
|
Slice result;
|
|
2299
2857
|
// Simulate a seek of 4096 bytes at offset 0. Due to the readahead settings,
|
|
2300
2858
|
// it will do two reads of 4096+8192 and 8192
|
|
@@ -2310,8 +2868,93 @@ TEST_F(FilePrefetchBufferTest, SeekWithBlockCacheHit) {
|
|
|
2310
2868
|
fpb.UpdateReadPattern(0, 4096, false);
|
|
2311
2869
|
// Now read some data that straddles the two prefetch buffers - offset 8192 to
|
|
2312
2870
|
// 16384
|
|
2313
|
-
|
|
2314
|
-
|
|
2871
|
+
IOOptions io_opts;
|
|
2872
|
+
io_opts.rate_limiter_priority = Env::IOPriority::IO_LOW;
|
|
2873
|
+
ASSERT_TRUE(
|
|
2874
|
+
fpb.TryReadFromCacheAsync(io_opts, r.get(), 8192, 8192, &result, &s));
|
|
2875
|
+
}
|
|
2876
|
+
|
|
2877
|
+
// Test to ensure when PrefetchAsync is called during seek, it doesn't do any
|
|
2878
|
+
// alignment or prefetch extra if readahead is not enabled during seek.
|
|
2879
|
+
TEST_F(FilePrefetchBufferTest, SeekWithoutAlignment) {
|
|
2880
|
+
std::string fname = "seek-wwithout-alignment";
|
|
2881
|
+
Random rand(0);
|
|
2882
|
+
std::string content = rand.RandomString(32768);
|
|
2883
|
+
Write(fname, content);
|
|
2884
|
+
|
|
2885
|
+
FileOptions opts;
|
|
2886
|
+
std::unique_ptr<RandomAccessFileReader> r;
|
|
2887
|
+
Read(fname, opts, &r);
|
|
2888
|
+
|
|
2889
|
+
size_t alignment = r->file()->GetRequiredBufferAlignment();
|
|
2890
|
+
size_t n = alignment / 2;
|
|
2891
|
+
|
|
2892
|
+
int read_async_called = 0;
|
|
2893
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
2894
|
+
"FilePrefetchBuffer::ReadAsync",
|
|
2895
|
+
[&](void* /*arg*/) { read_async_called++; });
|
|
2896
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
2897
|
+
|
|
2898
|
+
// Without readahead enabled, there will be no alignment and offset of buffer
|
|
2899
|
+
// will be n.
|
|
2900
|
+
{
|
|
2901
|
+
FilePrefetchBuffer fpb(
|
|
2902
|
+
/*readahead_size=*/8192, /*max_readahead_size=*/16384, /*enable=*/true,
|
|
2903
|
+
/*track_min_offset=*/false, /*implicit_auto_readahead=*/true,
|
|
2904
|
+
/*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/2,
|
|
2905
|
+
/*upper_bound_offset=*/0, fs());
|
|
2906
|
+
|
|
2907
|
+
Slice result;
|
|
2908
|
+
// Simulate a seek of half of alignment bytes at offset n. Due to the
|
|
2909
|
+
// readahead settings, it won't prefetch extra or do any alignment and
|
|
2910
|
+
// offset of buffer will be n.
|
|
2911
|
+
Status s = fpb.PrefetchAsync(IOOptions(), r.get(), n, n, &result);
|
|
2912
|
+
|
|
2913
|
+
// Platforms that don't have IO uring may not support async IO.
|
|
2914
|
+
if (s.IsNotSupported()) {
|
|
2915
|
+
return;
|
|
2916
|
+
}
|
|
2917
|
+
|
|
2918
|
+
ASSERT_TRUE(s.IsTryAgain());
|
|
2919
|
+
|
|
2920
|
+
IOOptions io_opts;
|
|
2921
|
+
io_opts.rate_limiter_priority = Env::IOPriority::IO_LOW;
|
|
2922
|
+
ASSERT_TRUE(fpb.TryReadFromCacheAsync(io_opts, r.get(), n, n, &result, &s));
|
|
2923
|
+
|
|
2924
|
+
if (read_async_called) {
|
|
2925
|
+
ASSERT_EQ(fpb.GetPrefetchOffset(), n);
|
|
2926
|
+
}
|
|
2927
|
+
}
|
|
2928
|
+
|
|
2929
|
+
// With readahead enabled, it will do the alignment and prefetch and offset of
|
|
2930
|
+
// buffer will be 0.
|
|
2931
|
+
{
|
|
2932
|
+
read_async_called = false;
|
|
2933
|
+
FilePrefetchBuffer fpb(
|
|
2934
|
+
/*readahead_size=*/16384, /*max_readahead_size=*/16384, /*enable=*/true,
|
|
2935
|
+
/*track_min_offset=*/false, /*implicit_auto_readahead=*/false,
|
|
2936
|
+
/*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/2,
|
|
2937
|
+
/*upper_bound_offset=*/0, fs());
|
|
2938
|
+
|
|
2939
|
+
Slice result;
|
|
2940
|
+
// Simulate a seek of half of alignment bytes at offset n.
|
|
2941
|
+
Status s = fpb.PrefetchAsync(IOOptions(), r.get(), n, n, &result);
|
|
2942
|
+
|
|
2943
|
+
// Platforms that don't have IO uring may not support async IO.
|
|
2944
|
+
if (s.IsNotSupported()) {
|
|
2945
|
+
return;
|
|
2946
|
+
}
|
|
2947
|
+
|
|
2948
|
+
ASSERT_TRUE(s.IsTryAgain());
|
|
2949
|
+
|
|
2950
|
+
IOOptions io_opts;
|
|
2951
|
+
io_opts.rate_limiter_priority = Env::IOPriority::IO_LOW;
|
|
2952
|
+
ASSERT_TRUE(fpb.TryReadFromCacheAsync(io_opts, r.get(), n, n, &result, &s));
|
|
2953
|
+
|
|
2954
|
+
if (read_async_called) {
|
|
2955
|
+
ASSERT_EQ(fpb.GetPrefetchOffset(), 0);
|
|
2956
|
+
}
|
|
2957
|
+
}
|
|
2315
2958
|
}
|
|
2316
2959
|
|
|
2317
2960
|
TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) {
|
|
@@ -2327,7 +2970,8 @@ TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) {
|
|
|
2327
2970
|
FilePrefetchBuffer fpb(
|
|
2328
2971
|
/*readahead_size=*/8192, /*max_readahead_size=*/16384, /*enable=*/true,
|
|
2329
2972
|
/*track_min_offset=*/false, /*implicit_auto_readahead=*/false,
|
|
2330
|
-
/*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0,
|
|
2973
|
+
/*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0,
|
|
2974
|
+
/*upper_bound_offset=*/0, fs());
|
|
2331
2975
|
|
|
2332
2976
|
int read_async_called = 0;
|
|
2333
2977
|
SyncPoint::GetInstance()->SetCallBack(
|
|
@@ -2346,9 +2990,10 @@ TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) {
|
|
|
2346
2990
|
}
|
|
2347
2991
|
|
|
2348
2992
|
ASSERT_TRUE(s.IsTryAgain());
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2993
|
+
IOOptions io_opts;
|
|
2994
|
+
io_opts.rate_limiter_priority = Env::IOPriority::IO_LOW;
|
|
2995
|
+
ASSERT_TRUE(fpb.TryReadFromCacheAsync(io_opts, r.get(), /*offset=*/3000,
|
|
2996
|
+
/*length=*/4000, &async_result, &s));
|
|
2352
2997
|
// No sync call should be made.
|
|
2353
2998
|
HistogramData sst_read_micros;
|
|
2354
2999
|
stats()->histogramData(SST_READ_MICROS, &sst_read_micros);
|