@nxtedition/rocksdb 8.2.8 → 9.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +0 -21
- package/deps/rocksdb/rocksdb/CMakeLists.txt +20 -10
- package/deps/rocksdb/rocksdb/Makefile +37 -25
- package/deps/rocksdb/rocksdb/README.md +29 -0
- package/deps/rocksdb/rocksdb/TARGETS +25 -2
- package/deps/rocksdb/rocksdb/cache/cache.cc +35 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +229 -74
- package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +4 -3
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +58 -95
- package/deps/rocksdb/rocksdb/cache/charged_cache.cc +4 -2
- package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -3
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +2683 -496
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +580 -159
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +145 -42
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +20 -1
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +391 -17
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +309 -212
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +0 -32
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +439 -12
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +44 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +11 -1
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -3
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.cc +119 -0
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.h +155 -0
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +711 -0
- package/deps/rocksdb/rocksdb/cache/typed_cache.h +17 -11
- package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +28 -12
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +20 -22
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +8 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +7 -3
- package/deps/rocksdb/rocksdb/db/builder.cc +35 -10
- package/deps/rocksdb/rocksdb/db/c.cc +233 -6
- package/deps/rocksdb/rocksdb/db/c_test.c +140 -6
- package/deps/rocksdb/rocksdb/db/column_family.cc +110 -51
- package/deps/rocksdb/rocksdb/db/column_family.h +34 -2
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +314 -7
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +106 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +47 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +10 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +148 -60
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +22 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +33 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +14 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +3 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +90 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +170 -95
- package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +32 -58
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +46 -10
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -3
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +74 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +27 -3
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +850 -44
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +275 -1
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +52 -19
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +733 -320
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +155 -66
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +516 -155
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +8 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +100 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +95 -50
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +136 -79
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +6 -95
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +31 -22
- package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +85 -57
- package/deps/rocksdb/rocksdb/db/db_iter.h +11 -2
- package/deps/rocksdb/rocksdb/db/db_iter_test.cc +29 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +276 -21
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +35 -0
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +193 -7
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +294 -26
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +364 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +13 -3
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +52 -0
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +74 -1
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +22 -4
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +282 -167
- package/deps/rocksdb/rocksdb/db/db_test.cc +180 -49
- package/deps/rocksdb/rocksdb/db/db_test2.cc +84 -12
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +25 -12
- package/deps/rocksdb/rocksdb/db/db_test_util.h +45 -2
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +14 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +245 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +480 -1
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/deletefile_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/error_handler.cc +67 -34
- package/deps/rocksdb/rocksdb/db/error_handler.h +13 -9
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +144 -4
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
- package/deps/rocksdb/rocksdb/db/flush_job.cc +105 -17
- package/deps/rocksdb/rocksdb/db/flush_job.h +27 -4
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +90 -12
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
- package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
- package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -3
- package/deps/rocksdb/rocksdb/db/memtable.cc +70 -83
- package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +45 -11
- package/deps/rocksdb/rocksdb/db/memtable_list.h +43 -2
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +91 -5
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +330 -115
- package/deps/rocksdb/rocksdb/db/merge_helper.h +100 -12
- package/deps/rocksdb/rocksdb/db/merge_operator.cc +82 -0
- package/deps/rocksdb/rocksdb/db/merge_test.cc +267 -0
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +5 -2
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +4 -4
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +3 -0
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +4 -0
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +4 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +25 -7
- package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +459 -74
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +105 -69
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +83 -46
- package/deps/rocksdb/rocksdb/db/table_cache.cc +76 -54
- package/deps/rocksdb/rocksdb/db/table_cache.h +18 -12
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
- package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
- package/deps/rocksdb/rocksdb/db/version_edit.h +58 -10
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
- package/deps/rocksdb/rocksdb/db/version_set.cc +207 -110
- package/deps/rocksdb/rocksdb/db/version_set.h +36 -15
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -5
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +47 -26
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +525 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -22
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -20
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +0 -29
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +46 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +40 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper_test.cc +39 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +55 -20
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +4 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +88 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +37 -13
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +110 -58
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +68 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +34 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +8 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +429 -237
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +13 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +21 -14
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.cc +51 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.h +27 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +3 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +29 -38
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +302 -101
- package/deps/rocksdb/rocksdb/env/env.cc +6 -2
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
- package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
- package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
- package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
- package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +79 -0
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +100 -70
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +64 -18
- package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
- package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1225 -97
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +72 -33
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
- package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +40 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +163 -91
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +112 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +108 -16
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +42 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +92 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +34 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +91 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +8 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +10 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +55 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +130 -22
- package/deps/rocksdb/rocksdb/include/rocksdb/port_defs.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +92 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +37 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +35 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +20 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +42 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +53 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -2
- package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +4 -3
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +1 -1
- package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +22 -1
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +18 -7
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +6 -1
- package/deps/rocksdb/rocksdb/options/db_options.cc +54 -2
- package/deps/rocksdb/rocksdb/options/db_options.h +4 -0
- package/deps/rocksdb/rocksdb/options/options.cc +15 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +18 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +14 -4
- package/deps/rocksdb/rocksdb/options/options_test.cc +14 -1
- package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
- package/deps/rocksdb/rocksdb/port/README +10 -0
- package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
- package/deps/rocksdb/rocksdb/port/port_posix.cc +1 -1
- package/deps/rocksdb/rocksdb/port/port_posix.h +7 -4
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +32 -12
- package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
- package/deps/rocksdb/rocksdb/port/win/port_win.h +5 -2
- package/deps/rocksdb/rocksdb/src.mk +10 -1
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
- package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +116 -43
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +9 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +321 -49
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +98 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +233 -98
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +58 -23
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +12 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +52 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +26 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -18
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +20 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +232 -71
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -6
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +44 -26
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +36 -19
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -3
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +11 -7
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +14 -13
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +4 -0
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +9 -2
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/table/format.cc +175 -33
- package/deps/rocksdb/rocksdb/table/format.h +63 -10
- package/deps/rocksdb/rocksdb/table/get_context.cc +52 -89
- package/deps/rocksdb/rocksdb/table/get_context.h +12 -3
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +11 -0
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +29 -1
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +22 -2
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
- package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +45 -9
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +1 -0
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +24 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
- package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +6 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
- package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +31 -0
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +2 -1
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +3 -3
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +87 -65
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +221 -33
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +36 -0
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +33 -11
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
- package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
- package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
- package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +32 -11
- package/deps/rocksdb/rocksdb/util/cast_util.h +24 -0
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
- package/deps/rocksdb/rocksdb/util/comparator.cc +55 -8
- package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
- package/deps/rocksdb/rocksdb/util/compression.h +119 -35
- package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
- package/deps/rocksdb/rocksdb/util/crc32c.cc +7 -1
- package/deps/rocksdb/rocksdb/util/distributed_mutex.h +1 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
- package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
- package/deps/rocksdb/rocksdb/util/hash.h +7 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
- package/deps/rocksdb/rocksdb/util/math.h +58 -6
- package/deps/rocksdb/rocksdb/util/math128.h +29 -7
- package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
- package/deps/rocksdb/rocksdb/util/overload.h +23 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter.cc +53 -18
- package/deps/rocksdb/rocksdb/util/rate_limiter_impl.h +6 -1
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +90 -19
- package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
- package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -0
- package/deps/rocksdb/rocksdb/util/status.cc +1 -0
- package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
- package/deps/rocksdb/rocksdb/util/string_util.cc +39 -0
- package/deps/rocksdb/rocksdb/util/string_util.h +10 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +10 -1
- package/deps/rocksdb/rocksdb/util/udt_util.cc +385 -0
- package/deps/rocksdb/rocksdb/util/udt_util.h +192 -1
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +461 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
- package/deps/rocksdb/rocksdb/util/xxhash.h +0 -3
- package/deps/rocksdb/rocksdb/util/xxph3.h +0 -4
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +71 -26
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +20 -16
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +11 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +7 -1
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +12 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +7 -4
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +13 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +272 -33
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +15 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +4 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +195 -23
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +19 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +88 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +43 -17
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +73 -24
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +41 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +15 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +59 -28
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +127 -120
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +129 -59
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +111 -14
- package/deps/rocksdb/rocksdb.gyp +6 -2
- package/index.js +0 -8
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +0 -7
- package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +0 -33
- package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +0 -26
- package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +0 -10
|
@@ -18,10 +18,21 @@ void BlockBasedTableIterator::Seek(const Slice& target) {
|
|
|
18
18
|
|
|
19
19
|
void BlockBasedTableIterator::SeekImpl(const Slice* target,
|
|
20
20
|
bool async_prefetch) {
|
|
21
|
-
|
|
21
|
+
ResetBlockCacheLookupVar();
|
|
22
|
+
bool is_first_pass = !async_read_in_progress_;
|
|
23
|
+
bool autotune_readaheadsize = is_first_pass &&
|
|
24
|
+
read_options_.auto_readahead_size &&
|
|
25
|
+
read_options_.iterate_upper_bound;
|
|
26
|
+
|
|
27
|
+
if (autotune_readaheadsize &&
|
|
28
|
+
table_->get_rep()->table_options.block_cache.get() &&
|
|
29
|
+
!read_options_.async_io && direction_ == IterDirection::kForward) {
|
|
30
|
+
readahead_cache_lookup_ = true;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Second pass.
|
|
22
34
|
if (async_read_in_progress_) {
|
|
23
35
|
AsyncInitDataBlock(false);
|
|
24
|
-
is_first_pass = false;
|
|
25
36
|
}
|
|
26
37
|
|
|
27
38
|
is_out_of_bound_ = false;
|
|
@@ -44,7 +55,11 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
|
|
|
44
55
|
}
|
|
45
56
|
|
|
46
57
|
bool need_seek_index = true;
|
|
47
|
-
|
|
58
|
+
|
|
59
|
+
// In case of readahead_cache_lookup_, index_iter_ could change to find the
|
|
60
|
+
// readahead size in BlockCacheLookupForReadAheadSize so it needs to reseek.
|
|
61
|
+
if (IsIndexAtCurr() && block_iter_points_to_real_block_ &&
|
|
62
|
+
block_iter_.Valid()) {
|
|
48
63
|
// Reseek.
|
|
49
64
|
prev_block_offset_ = index_iter_->value().handle.offset();
|
|
50
65
|
|
|
@@ -72,13 +87,31 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
|
|
|
72
87
|
} else {
|
|
73
88
|
index_iter_->SeekToFirst();
|
|
74
89
|
}
|
|
90
|
+
is_index_at_curr_block_ = true;
|
|
91
|
+
if (!index_iter_->Valid()) {
|
|
92
|
+
ResetDataIter();
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (autotune_readaheadsize) {
|
|
98
|
+
FindReadAheadSizeUpperBound();
|
|
99
|
+
if (target) {
|
|
100
|
+
index_iter_->Seek(*target);
|
|
101
|
+
} else {
|
|
102
|
+
index_iter_->SeekToFirst();
|
|
103
|
+
}
|
|
75
104
|
|
|
105
|
+
// Check for IO error.
|
|
76
106
|
if (!index_iter_->Valid()) {
|
|
77
107
|
ResetDataIter();
|
|
78
108
|
return;
|
|
79
109
|
}
|
|
80
110
|
}
|
|
81
111
|
|
|
112
|
+
// After reseek, index_iter_ point to the right key i.e. target in
|
|
113
|
+
// case of readahead_cache_lookup_. So index_iter_ can be used directly.
|
|
114
|
+
|
|
82
115
|
IndexValue v = index_iter_->value();
|
|
83
116
|
const bool same_block = block_iter_points_to_real_block_ &&
|
|
84
117
|
v.handle.offset() == prev_block_offset_;
|
|
@@ -135,6 +168,8 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
|
|
|
135
168
|
}
|
|
136
169
|
|
|
137
170
|
void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
|
|
171
|
+
direction_ = IterDirection::kBackward;
|
|
172
|
+
ResetBlockCacheLookupVar();
|
|
138
173
|
is_out_of_bound_ = false;
|
|
139
174
|
is_at_first_key_from_index_ = false;
|
|
140
175
|
seek_stat_state_ = kNone;
|
|
@@ -171,6 +206,7 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
|
|
|
171
206
|
// to distinguish the two unless we read the second block. In this case, we'll
|
|
172
207
|
// end up with reading two blocks.
|
|
173
208
|
index_iter_->Seek(target);
|
|
209
|
+
is_index_at_curr_block_ = true;
|
|
174
210
|
|
|
175
211
|
if (!index_iter_->Valid()) {
|
|
176
212
|
auto seek_status = index_iter_->status();
|
|
@@ -206,15 +242,22 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
|
|
|
206
242
|
}
|
|
207
243
|
|
|
208
244
|
void BlockBasedTableIterator::SeekToLast() {
|
|
245
|
+
direction_ = IterDirection::kBackward;
|
|
246
|
+
ResetBlockCacheLookupVar();
|
|
209
247
|
is_out_of_bound_ = false;
|
|
210
248
|
is_at_first_key_from_index_ = false;
|
|
211
249
|
seek_stat_state_ = kNone;
|
|
250
|
+
|
|
212
251
|
SavePrevIndexValue();
|
|
252
|
+
|
|
213
253
|
index_iter_->SeekToLast();
|
|
254
|
+
is_index_at_curr_block_ = true;
|
|
255
|
+
|
|
214
256
|
if (!index_iter_->Valid()) {
|
|
215
257
|
ResetDataIter();
|
|
216
258
|
return;
|
|
217
259
|
}
|
|
260
|
+
|
|
218
261
|
InitDataBlock();
|
|
219
262
|
block_iter_.SeekToLast();
|
|
220
263
|
FindKeyBackward();
|
|
@@ -243,6 +286,14 @@ bool BlockBasedTableIterator::NextAndGetResult(IterateResult* result) {
|
|
|
243
286
|
}
|
|
244
287
|
|
|
245
288
|
void BlockBasedTableIterator::Prev() {
|
|
289
|
+
// Return Error.
|
|
290
|
+
if (readahead_cache_lookup_) {
|
|
291
|
+
block_iter_.Invalidate(Status::NotSupported(
|
|
292
|
+
"auto tuning of readahead_size is not supported with Prev operation."));
|
|
293
|
+
return;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
ResetBlockCacheLookupVar();
|
|
246
297
|
if (is_at_first_key_from_index_) {
|
|
247
298
|
is_at_first_key_from_index_ = false;
|
|
248
299
|
|
|
@@ -262,7 +313,18 @@ void BlockBasedTableIterator::Prev() {
|
|
|
262
313
|
}
|
|
263
314
|
|
|
264
315
|
void BlockBasedTableIterator::InitDataBlock() {
|
|
265
|
-
BlockHandle data_block_handle
|
|
316
|
+
BlockHandle data_block_handle;
|
|
317
|
+
bool is_in_cache = false;
|
|
318
|
+
bool use_block_cache_for_lookup = true;
|
|
319
|
+
|
|
320
|
+
if (DoesContainBlockHandles()) {
|
|
321
|
+
data_block_handle = block_handles_.front().handle_;
|
|
322
|
+
is_in_cache = block_handles_.front().is_cache_hit_;
|
|
323
|
+
use_block_cache_for_lookup = false;
|
|
324
|
+
} else {
|
|
325
|
+
data_block_handle = index_iter_->value().handle;
|
|
326
|
+
}
|
|
327
|
+
|
|
266
328
|
if (!block_iter_points_to_real_block_ ||
|
|
267
329
|
data_block_handle.offset() != prev_block_offset_ ||
|
|
268
330
|
// if previous attempt of reading the block missed cache, try again
|
|
@@ -270,25 +332,50 @@ void BlockBasedTableIterator::InitDataBlock() {
|
|
|
270
332
|
if (block_iter_points_to_real_block_) {
|
|
271
333
|
ResetDataIter();
|
|
272
334
|
}
|
|
273
|
-
auto* rep = table_->get_rep();
|
|
274
335
|
|
|
275
336
|
bool is_for_compaction =
|
|
276
337
|
lookup_context_.caller == TableReaderCaller::kCompaction;
|
|
277
|
-
|
|
278
|
-
//
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
338
|
+
|
|
339
|
+
// Initialize Data Block From CacheableEntry.
|
|
340
|
+
if (is_in_cache) {
|
|
341
|
+
Status s;
|
|
342
|
+
block_iter_.Invalidate(Status::OK());
|
|
343
|
+
table_->NewDataBlockIterator<DataBlockIter>(
|
|
344
|
+
read_options_, (block_handles_.front().cachable_entry_).As<Block>(),
|
|
345
|
+
&block_iter_, s);
|
|
346
|
+
} else {
|
|
347
|
+
auto* rep = table_->get_rep();
|
|
348
|
+
|
|
349
|
+
std::function<void(uint64_t offset, size_t, size_t&)> readaheadsize_cb =
|
|
350
|
+
nullptr;
|
|
351
|
+
if (readahead_cache_lookup_) {
|
|
352
|
+
readaheadsize_cb = std::bind(
|
|
353
|
+
&BlockBasedTableIterator::BlockCacheLookupForReadAheadSize, this,
|
|
354
|
+
std::placeholders::_1, std::placeholders::_2,
|
|
355
|
+
std::placeholders::_3);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// Prefetch additional data for range scans (iterators).
|
|
359
|
+
// Implicit auto readahead:
|
|
360
|
+
// Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0.
|
|
361
|
+
// Explicit user requested readahead:
|
|
362
|
+
// Enabled from the very first IO when ReadOptions.readahead_size is
|
|
363
|
+
// set.
|
|
364
|
+
block_prefetcher_.PrefetchIfNeeded(
|
|
365
|
+
rep, data_block_handle, read_options_.readahead_size,
|
|
366
|
+
is_for_compaction,
|
|
367
|
+
/*no_sequential_checking=*/false, read_options_, readaheadsize_cb);
|
|
368
|
+
|
|
369
|
+
Status s;
|
|
370
|
+
table_->NewDataBlockIterator<DataBlockIter>(
|
|
371
|
+
read_options_, data_block_handle, &block_iter_, BlockType::kData,
|
|
372
|
+
/*get_context=*/nullptr, &lookup_context_,
|
|
373
|
+
block_prefetcher_.prefetch_buffer(),
|
|
374
|
+
/*for_compaction=*/is_for_compaction, /*async_read=*/false, s,
|
|
375
|
+
use_block_cache_for_lookup);
|
|
376
|
+
}
|
|
291
377
|
block_iter_points_to_real_block_ = true;
|
|
378
|
+
|
|
292
379
|
CheckDataBlockWithinUpperBound();
|
|
293
380
|
if (!is_for_compaction &&
|
|
294
381
|
(seek_stat_state_ & kDataBlockReadSinceLastSeek) == 0) {
|
|
@@ -314,6 +401,16 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) {
|
|
|
314
401
|
ResetDataIter();
|
|
315
402
|
}
|
|
316
403
|
auto* rep = table_->get_rep();
|
|
404
|
+
|
|
405
|
+
std::function<void(uint64_t offset, size_t, size_t&)> readaheadsize_cb =
|
|
406
|
+
nullptr;
|
|
407
|
+
if (readahead_cache_lookup_) {
|
|
408
|
+
readaheadsize_cb = std::bind(
|
|
409
|
+
&BlockBasedTableIterator::BlockCacheLookupForReadAheadSize, this,
|
|
410
|
+
std::placeholders::_1, std::placeholders::_2,
|
|
411
|
+
std::placeholders::_3);
|
|
412
|
+
}
|
|
413
|
+
|
|
317
414
|
// Prefetch additional data for range scans (iterators).
|
|
318
415
|
// Implicit auto readahead:
|
|
319
416
|
// Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0.
|
|
@@ -326,14 +423,15 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) {
|
|
|
326
423
|
block_prefetcher_.PrefetchIfNeeded(
|
|
327
424
|
rep, data_block_handle, read_options_.readahead_size,
|
|
328
425
|
is_for_compaction, /*no_sequential_checking=*/read_options_.async_io,
|
|
329
|
-
read_options_
|
|
426
|
+
read_options_, readaheadsize_cb);
|
|
330
427
|
|
|
331
428
|
Status s;
|
|
332
429
|
table_->NewDataBlockIterator<DataBlockIter>(
|
|
333
430
|
read_options_, data_block_handle, &block_iter_, BlockType::kData,
|
|
334
431
|
/*get_context=*/nullptr, &lookup_context_,
|
|
335
432
|
block_prefetcher_.prefetch_buffer(),
|
|
336
|
-
/*for_compaction=*/is_for_compaction, /*async_read=*/true, s
|
|
433
|
+
/*for_compaction=*/is_for_compaction, /*async_read=*/true, s,
|
|
434
|
+
/*use_block_cache_for_lookup=*/true);
|
|
337
435
|
|
|
338
436
|
if (s.IsTryAgain()) {
|
|
339
437
|
async_read_in_progress_ = true;
|
|
@@ -348,7 +446,8 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) {
|
|
|
348
446
|
read_options_, data_block_handle, &block_iter_, BlockType::kData,
|
|
349
447
|
/*get_context=*/nullptr, &lookup_context_,
|
|
350
448
|
block_prefetcher_.prefetch_buffer(),
|
|
351
|
-
/*for_compaction=*/is_for_compaction, /*async_read=*/false, s
|
|
449
|
+
/*for_compaction=*/is_for_compaction, /*async_read=*/false, s,
|
|
450
|
+
/*use_block_cache_for_lookup=*/false);
|
|
352
451
|
}
|
|
353
452
|
block_iter_points_to_real_block_ = true;
|
|
354
453
|
CheckDataBlockWithinUpperBound();
|
|
@@ -379,20 +478,29 @@ bool BlockBasedTableIterator::MaterializeCurrentBlock() {
|
|
|
379
478
|
|
|
380
479
|
block_iter_.SeekToFirst();
|
|
381
480
|
|
|
481
|
+
// MaterializeCurrentBlock is called when block is actually read by
|
|
482
|
+
// calling InitDataBlock. is_at_first_key_from_index_ will be false for block
|
|
483
|
+
// handles placed in blockhandle. So index_ will be pointing to current block.
|
|
484
|
+
// After InitDataBlock, index_iter_ can point to different block if
|
|
485
|
+
// BlockCacheLookupForReadAheadSize is called.
|
|
486
|
+
Slice first_internal_key;
|
|
487
|
+
if (DoesContainBlockHandles()) {
|
|
488
|
+
first_internal_key = block_handles_.front().first_internal_key_;
|
|
489
|
+
} else {
|
|
490
|
+
first_internal_key = index_iter_->value().first_internal_key;
|
|
491
|
+
}
|
|
492
|
+
|
|
382
493
|
if (!block_iter_.Valid() ||
|
|
383
|
-
icomp_.Compare(block_iter_.key(),
|
|
384
|
-
index_iter_->value().first_internal_key) != 0) {
|
|
494
|
+
icomp_.Compare(block_iter_.key(), first_internal_key) != 0) {
|
|
385
495
|
block_iter_.Invalidate(Status::Corruption(
|
|
386
496
|
"first key in index doesn't match first key in block"));
|
|
387
497
|
return false;
|
|
388
498
|
}
|
|
389
|
-
|
|
390
499
|
return true;
|
|
391
500
|
}
|
|
392
501
|
|
|
393
502
|
void BlockBasedTableIterator::FindKeyForward() {
|
|
394
503
|
// This method's code is kept short to make it likely to be inlined.
|
|
395
|
-
|
|
396
504
|
assert(!is_out_of_bound_);
|
|
397
505
|
assert(block_iter_points_to_real_block_);
|
|
398
506
|
|
|
@@ -415,40 +523,72 @@ void BlockBasedTableIterator::FindBlockForward() {
|
|
|
415
523
|
return;
|
|
416
524
|
}
|
|
417
525
|
// Whether next data block is out of upper bound, if there is one.
|
|
418
|
-
|
|
419
|
-
|
|
526
|
+
// index_iter_ can point to different block in case of
|
|
527
|
+
// readahead_cache_lookup_. readahead_cache_lookup_ will be handle the
|
|
528
|
+
// upper_bound check.
|
|
529
|
+
bool next_block_is_out_of_bound =
|
|
530
|
+
IsIndexAtCurr() && read_options_.iterate_upper_bound != nullptr &&
|
|
420
531
|
block_iter_points_to_real_block_ &&
|
|
421
532
|
block_upper_bound_check_ == BlockUpperBound::kUpperBoundInCurBlock;
|
|
533
|
+
|
|
422
534
|
assert(!next_block_is_out_of_bound ||
|
|
423
535
|
user_comparator_.CompareWithoutTimestamp(
|
|
424
536
|
*read_options_.iterate_upper_bound, /*a_has_ts=*/false,
|
|
425
537
|
index_iter_->user_key(), /*b_has_ts=*/true) <= 0);
|
|
538
|
+
|
|
426
539
|
ResetDataIter();
|
|
427
|
-
index_iter_->Next();
|
|
428
|
-
if (next_block_is_out_of_bound) {
|
|
429
|
-
// The next block is out of bound. No need to read it.
|
|
430
|
-
TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound", nullptr);
|
|
431
|
-
// We need to make sure this is not the last data block before setting
|
|
432
|
-
// is_out_of_bound_, since the index key for the last data block can be
|
|
433
|
-
// larger than smallest key of the next file on the same level.
|
|
434
|
-
if (index_iter_->Valid()) {
|
|
435
|
-
is_out_of_bound_ = true;
|
|
436
|
-
}
|
|
437
|
-
return;
|
|
438
|
-
}
|
|
439
540
|
|
|
440
|
-
if (
|
|
441
|
-
|
|
541
|
+
if (DoesContainBlockHandles()) {
|
|
542
|
+
// Advance and point to that next Block handle to make that block handle
|
|
543
|
+
// current.
|
|
544
|
+
block_handles_.pop_front();
|
|
442
545
|
}
|
|
443
546
|
|
|
444
|
-
|
|
547
|
+
if (!DoesContainBlockHandles()) {
|
|
548
|
+
// For readahead_cache_lookup_ enabled scenario -
|
|
549
|
+
// 1. In case of Seek, block_handle will be empty and it should be follow
|
|
550
|
+
// as usual doing index_iter_->Next().
|
|
551
|
+
// 2. If block_handles is empty and index is not at current because of
|
|
552
|
+
// lookup (during Next), it should skip doing index_iter_->Next(), as
|
|
553
|
+
// it's already pointing to next block;
|
|
554
|
+
// 3. Last block could be out of bound and it won't iterate over that
|
|
555
|
+
// during BlockCacheLookup. We need to set for that block here.
|
|
556
|
+
if (IsIndexAtCurr() || is_index_out_of_bound_) {
|
|
557
|
+
index_iter_->Next();
|
|
558
|
+
if (is_index_out_of_bound_) {
|
|
559
|
+
next_block_is_out_of_bound = is_index_out_of_bound_;
|
|
560
|
+
is_index_out_of_bound_ = false;
|
|
561
|
+
}
|
|
562
|
+
} else {
|
|
563
|
+
// Skip Next as index_iter_ already points to correct index when it
|
|
564
|
+
// iterates in BlockCacheLookupForReadAheadSize.
|
|
565
|
+
is_index_at_curr_block_ = true;
|
|
566
|
+
}
|
|
445
567
|
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
568
|
+
if (next_block_is_out_of_bound) {
|
|
569
|
+
// The next block is out of bound. No need to read it.
|
|
570
|
+
TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound",
|
|
571
|
+
nullptr);
|
|
572
|
+
// We need to make sure this is not the last data block before setting
|
|
573
|
+
// is_out_of_bound_, since the index key for the last data block can be
|
|
574
|
+
// larger than smallest key of the next file on the same level.
|
|
575
|
+
if (index_iter_->Valid()) {
|
|
576
|
+
is_out_of_bound_ = true;
|
|
577
|
+
}
|
|
578
|
+
return;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
if (!index_iter_->Valid()) {
|
|
582
|
+
return;
|
|
583
|
+
}
|
|
584
|
+
IndexValue v = index_iter_->value();
|
|
451
585
|
|
|
586
|
+
if (!v.first_internal_key.empty() && allow_unprepared_value_) {
|
|
587
|
+
// Index contains the first key of the block. Defer reading the block.
|
|
588
|
+
is_at_first_key_from_index_ = true;
|
|
589
|
+
return;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
452
592
|
InitDataBlock();
|
|
453
593
|
block_iter_.SeekToFirst();
|
|
454
594
|
} while (!block_iter_.Valid());
|
|
@@ -487,7 +627,7 @@ void BlockBasedTableIterator::CheckOutOfBound() {
|
|
|
487
627
|
}
|
|
488
628
|
|
|
489
629
|
void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() {
|
|
490
|
-
if (read_options_.iterate_upper_bound != nullptr &&
|
|
630
|
+
if (IsIndexAtCurr() && read_options_.iterate_upper_bound != nullptr &&
|
|
491
631
|
block_iter_points_to_real_block_) {
|
|
492
632
|
block_upper_bound_check_ = (user_comparator_.CompareWithoutTimestamp(
|
|
493
633
|
*read_options_.iterate_upper_bound,
|
|
@@ -497,4 +637,136 @@ void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() {
|
|
|
497
637
|
: BlockUpperBound::kUpperBoundInCurBlock;
|
|
498
638
|
}
|
|
499
639
|
}
|
|
640
|
+
|
|
641
|
+
void BlockBasedTableIterator::FindReadAheadSizeUpperBound() {
|
|
642
|
+
size_t total_bytes_till_upper_bound = 0;
|
|
643
|
+
size_t footer = table_->get_rep()->footer.GetBlockTrailerSize();
|
|
644
|
+
uint64_t start_offset = index_iter_->value().handle.offset();
|
|
645
|
+
|
|
646
|
+
do {
|
|
647
|
+
BlockHandle block_handle = index_iter_->value().handle;
|
|
648
|
+
total_bytes_till_upper_bound += block_handle.size();
|
|
649
|
+
total_bytes_till_upper_bound += footer;
|
|
650
|
+
|
|
651
|
+
// Can't figure out for current block if current block
|
|
652
|
+
// is out of bound. But for next block we can find that.
|
|
653
|
+
// If curr block's index key >= iterate_upper_bound, it
|
|
654
|
+
// means all the keys in next block or above are out of
|
|
655
|
+
// bound.
|
|
656
|
+
if (IsNextBlockOutOfBound()) {
|
|
657
|
+
break;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// Since next block is not out of bound, iterate to that
|
|
661
|
+
// index block and add it's Data block size to
|
|
662
|
+
// readahead_size.
|
|
663
|
+
index_iter_->Next();
|
|
664
|
+
|
|
665
|
+
if (!index_iter_->Valid()) {
|
|
666
|
+
break;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
} while (true);
|
|
670
|
+
|
|
671
|
+
block_prefetcher_.SetUpperBoundOffset(start_offset +
|
|
672
|
+
total_bytes_till_upper_bound);
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize(
|
|
676
|
+
uint64_t offset, size_t readahead_size, size_t& updated_readahead_size) {
|
|
677
|
+
updated_readahead_size = readahead_size;
|
|
678
|
+
|
|
679
|
+
// readahead_cache_lookup_ can be set false after Seek, if after Seek or Next
|
|
680
|
+
// there is SeekForPrev or any other backward operation.
|
|
681
|
+
if (!readahead_cache_lookup_) {
|
|
682
|
+
return;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
assert(!DoesContainBlockHandles());
|
|
686
|
+
assert(index_iter_->value().handle.offset() == offset);
|
|
687
|
+
|
|
688
|
+
// Error. current offset should be equal to what's requested for prefetching.
|
|
689
|
+
if (index_iter_->value().handle.offset() != offset) {
|
|
690
|
+
return;
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
if (IsNextBlockOutOfBound()) {
|
|
694
|
+
updated_readahead_size = 0;
|
|
695
|
+
return;
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
size_t current_readahead_size = 0;
|
|
699
|
+
size_t footer = table_->get_rep()->footer.GetBlockTrailerSize();
|
|
700
|
+
|
|
701
|
+
// Add the current block to block_handles_.
|
|
702
|
+
{
|
|
703
|
+
BlockHandleInfo block_handle_info;
|
|
704
|
+
block_handle_info.handle_ = index_iter_->value().handle;
|
|
705
|
+
block_handle_info.SetFirstInternalKey(
|
|
706
|
+
index_iter_->value().first_internal_key);
|
|
707
|
+
block_handles_.emplace_back(std::move(block_handle_info));
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// Current block is included in length. Readahead should start from next
|
|
711
|
+
// block.
|
|
712
|
+
index_iter_->Next();
|
|
713
|
+
is_index_at_curr_block_ = false;
|
|
714
|
+
|
|
715
|
+
while (index_iter_->Valid()) {
|
|
716
|
+
BlockHandle block_handle = index_iter_->value().handle;
|
|
717
|
+
|
|
718
|
+
// Adding this data block exceeds passed down readahead_size. So this data
|
|
719
|
+
// block won't be added.
|
|
720
|
+
if (current_readahead_size + block_handle.size() + footer >
|
|
721
|
+
readahead_size) {
|
|
722
|
+
break;
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
current_readahead_size += block_handle.size();
|
|
726
|
+
current_readahead_size += footer;
|
|
727
|
+
|
|
728
|
+
// For current data block, do the lookup in the cache. Lookup should pin the
|
|
729
|
+
// data block and add the placeholder for cache.
|
|
730
|
+
BlockHandleInfo block_handle_info;
|
|
731
|
+
block_handle_info.handle_ = index_iter_->value().handle;
|
|
732
|
+
block_handle_info.SetFirstInternalKey(
|
|
733
|
+
index_iter_->value().first_internal_key);
|
|
734
|
+
|
|
735
|
+
Status s = table_->LookupAndPinBlocksInCache<Block_kData>(
|
|
736
|
+
read_options_, block_handle,
|
|
737
|
+
&(block_handle_info.cachable_entry_).As<Block_kData>());
|
|
738
|
+
if (!s.ok()) {
|
|
739
|
+
break;
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
block_handle_info.is_cache_hit_ =
|
|
743
|
+
(block_handle_info.cachable_entry_.GetValue() ||
|
|
744
|
+
block_handle_info.cachable_entry_.GetCacheHandle());
|
|
745
|
+
|
|
746
|
+
// Add the handle to the queue.
|
|
747
|
+
block_handles_.emplace_back(std::move(block_handle_info));
|
|
748
|
+
|
|
749
|
+
// Can't figure out for current block if current block
|
|
750
|
+
// is out of bound. But for next block we can find that.
|
|
751
|
+
// If curr block's index key >= iterate_upper_bound, it
|
|
752
|
+
// means all the keys in next block or above are out of
|
|
753
|
+
// bound.
|
|
754
|
+
if (IsNextBlockOutOfBound()) {
|
|
755
|
+
is_index_out_of_bound_ = true;
|
|
756
|
+
break;
|
|
757
|
+
}
|
|
758
|
+
index_iter_->Next();
|
|
759
|
+
};
|
|
760
|
+
|
|
761
|
+
// Iterate cache hit block handles from the end till a Miss is there, to
|
|
762
|
+
// update the readahead_size.
|
|
763
|
+
for (auto it = block_handles_.rbegin();
|
|
764
|
+
it != block_handles_.rend() && (*it).is_cache_hit_ == true; ++it) {
|
|
765
|
+
current_readahead_size -= (*it).handle_.size();
|
|
766
|
+
current_readahead_size -= footer;
|
|
767
|
+
}
|
|
768
|
+
updated_readahead_size = current_readahead_size;
|
|
769
|
+
ResetPreviousBlockOffset();
|
|
770
|
+
}
|
|
771
|
+
|
|
500
772
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
// Use of this source code is governed by a BSD-style license that can be
|
|
8
8
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
9
9
|
#pragma once
|
|
10
|
+
#include <deque>
|
|
11
|
+
|
|
10
12
|
#include "table/block_based/block_based_table_reader.h"
|
|
11
13
|
#include "table/block_based/block_based_table_reader_impl.h"
|
|
12
14
|
#include "table/block_based/block_prefetcher.h"
|
|
@@ -44,7 +46,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
44
46
|
async_read_in_progress_(false),
|
|
45
47
|
is_last_level_(table->IsLastLevel()) {}
|
|
46
48
|
|
|
47
|
-
~BlockBasedTableIterator() {}
|
|
49
|
+
~BlockBasedTableIterator() override { ClearBlockHandles(); }
|
|
48
50
|
|
|
49
51
|
void Seek(const Slice& target) override;
|
|
50
52
|
void SeekForPrev(const Slice& target) override;
|
|
@@ -58,6 +60,11 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
58
60
|
(is_at_first_key_from_index_ ||
|
|
59
61
|
(block_iter_points_to_real_block_ && block_iter_.Valid()));
|
|
60
62
|
}
|
|
63
|
+
|
|
64
|
+
// For block cache readahead lookup scenario -
|
|
65
|
+
// If is_at_first_key_from_index_ is true, InitDataBlock hasn't been
|
|
66
|
+
// called. It means block_handles is empty and index_ point to current block.
|
|
67
|
+
// So index_iter_ can be accessed directly.
|
|
61
68
|
Slice key() const override {
|
|
62
69
|
assert(Valid());
|
|
63
70
|
if (is_at_first_key_from_index_) {
|
|
@@ -74,6 +81,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
74
81
|
return block_iter_.user_key();
|
|
75
82
|
}
|
|
76
83
|
}
|
|
84
|
+
|
|
77
85
|
bool PrepareValue() override {
|
|
78
86
|
assert(Valid());
|
|
79
87
|
|
|
@@ -104,8 +112,12 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
104
112
|
return block_iter_.value();
|
|
105
113
|
}
|
|
106
114
|
Status status() const override {
|
|
107
|
-
//
|
|
108
|
-
if
|
|
115
|
+
// In case of block cache readahead lookup, it won't add the block to
|
|
116
|
+
// block_handles if it's index is invalid. So index_iter_->status check can
|
|
117
|
+
// be skipped.
|
|
118
|
+
// Prefix index set status to NotFound when the prefix does not exist.
|
|
119
|
+
if (IsIndexAtCurr() && !index_iter_->status().ok() &&
|
|
120
|
+
!index_iter_->status().IsNotFound()) {
|
|
109
121
|
return index_iter_->status();
|
|
110
122
|
} else if (block_iter_points_to_real_block_) {
|
|
111
123
|
return block_iter_.status();
|
|
@@ -159,7 +171,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
159
171
|
}
|
|
160
172
|
|
|
161
173
|
void SavePrevIndexValue() {
|
|
162
|
-
if (block_iter_points_to_real_block_) {
|
|
174
|
+
if (block_iter_points_to_real_block_ && IsIndexAtCurr()) {
|
|
163
175
|
// Reseek. If they end up with the same data block, we shouldn't re-fetch
|
|
164
176
|
// the same data block.
|
|
165
177
|
prev_block_offset_ = index_iter_->value().handle.offset();
|
|
@@ -235,6 +247,28 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
235
247
|
kReportOnUseful = 1 << 2,
|
|
236
248
|
};
|
|
237
249
|
|
|
250
|
+
// BlockHandleInfo is used to store the info needed when block cache lookup
|
|
251
|
+
// ahead is enabled to tune readahead_size.
|
|
252
|
+
struct BlockHandleInfo {
|
|
253
|
+
void SetFirstInternalKey(const Slice& key) {
|
|
254
|
+
if (key.empty()) {
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
size_t size = key.size();
|
|
258
|
+
buf_ = std::unique_ptr<char[]>(new char[size]);
|
|
259
|
+
memcpy(buf_.get(), key.data(), size);
|
|
260
|
+
first_internal_key_ = Slice(buf_.get(), size);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
BlockHandle handle_;
|
|
264
|
+
bool is_cache_hit_ = false;
|
|
265
|
+
CachableEntry<Block> cachable_entry_;
|
|
266
|
+
Slice first_internal_key_;
|
|
267
|
+
std::unique_ptr<char[]> buf_;
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
bool IsIndexAtCurr() const { return is_index_at_curr_block_; }
|
|
271
|
+
|
|
238
272
|
const BlockBasedTable* table_;
|
|
239
273
|
const ReadOptions& read_options_;
|
|
240
274
|
const InternalKeyComparator& icomp_;
|
|
@@ -268,6 +302,29 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
268
302
|
mutable SeekStatState seek_stat_state_ = SeekStatState::kNone;
|
|
269
303
|
bool is_last_level_;
|
|
270
304
|
|
|
305
|
+
// If set to true, it'll lookup in the cache ahead to estimate the readahead
|
|
306
|
+
// size based on cache hit and miss.
|
|
307
|
+
bool readahead_cache_lookup_ = false;
|
|
308
|
+
|
|
309
|
+
// It stores all the block handles that are lookuped in cache ahead when
|
|
310
|
+
// BlockCacheLookupForReadAheadSize is called. Since index_iter_ may point to
|
|
311
|
+
// different blocks when readahead_size is calculated in
|
|
312
|
+
// BlockCacheLookupForReadAheadSize, to avoid index_iter_ reseek,
|
|
313
|
+
// block_handles_ is used.
|
|
314
|
+
std::deque<BlockHandleInfo> block_handles_;
|
|
315
|
+
|
|
316
|
+
// During cache lookup to find readahead size, index_iter_ is iterated and it
|
|
317
|
+
// can point to a different block. is_index_at_curr_block_ keeps track of
|
|
318
|
+
// that.
|
|
319
|
+
bool is_index_at_curr_block_ = true;
|
|
320
|
+
bool is_index_out_of_bound_ = false;
|
|
321
|
+
|
|
322
|
+
// Used in case of auto_readahead_size to disable the block_cache lookup if
|
|
323
|
+
// direction is reversed from forward to backward. In case of backward
|
|
324
|
+
// direction, SeekForPrev or Prev might call Seek from db_iter. So direction
|
|
325
|
+
// is used to disable the lookup.
|
|
326
|
+
IterDirection direction_ = IterDirection::kForward;
|
|
327
|
+
|
|
271
328
|
// If `target` is null, seek to first.
|
|
272
329
|
void SeekImpl(const Slice* target, bool async_prefetch);
|
|
273
330
|
|
|
@@ -306,5 +363,42 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
306
363
|
}
|
|
307
364
|
return true;
|
|
308
365
|
}
|
|
366
|
+
|
|
367
|
+
// *** BEGIN APIs relevant to auto tuning of readahead_size ***
|
|
368
|
+
void FindReadAheadSizeUpperBound();
|
|
369
|
+
|
|
370
|
+
// This API is called to lookup the data blocks ahead in the cache to estimate
|
|
371
|
+
// the current readahead_size.
|
|
372
|
+
void BlockCacheLookupForReadAheadSize(uint64_t offset, size_t readahead_size,
|
|
373
|
+
size_t& updated_readahead_size);
|
|
374
|
+
|
|
375
|
+
void ResetBlockCacheLookupVar() {
|
|
376
|
+
is_index_out_of_bound_ = false;
|
|
377
|
+
readahead_cache_lookup_ = false;
|
|
378
|
+
ClearBlockHandles();
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
bool IsNextBlockOutOfBound() {
|
|
382
|
+
// If curr block's index key >= iterate_upper_bound, it means all the keys
|
|
383
|
+
// in next block or above are out of bound.
|
|
384
|
+
return (user_comparator_.CompareWithoutTimestamp(
|
|
385
|
+
index_iter_->user_key(),
|
|
386
|
+
/*a_has_ts=*/true, *read_options_.iterate_upper_bound,
|
|
387
|
+
/*b_has_ts=*/false) >= 0
|
|
388
|
+
? true
|
|
389
|
+
: false);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
void ClearBlockHandles() { block_handles_.clear(); }
|
|
393
|
+
|
|
394
|
+
// Reset prev_block_offset_. If index_iter_ has moved ahead, it won't get
|
|
395
|
+
// accurate prev_block_offset_.
|
|
396
|
+
void ResetPreviousBlockOffset() {
|
|
397
|
+
prev_block_offset_ = std::numeric_limits<uint64_t>::max();
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
bool DoesContainBlockHandles() { return !block_handles_.empty(); }
|
|
401
|
+
|
|
402
|
+
// *** END APIs relevant to auto tuning of readahead_size ***
|
|
309
403
|
};
|
|
310
404
|
} // namespace ROCKSDB_NAMESPACE
|