@nxtedition/rocksdb 5.2.21 → 5.2.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +216 -252
- package/binding.gyp +78 -72
- package/deps/rocksdb/build_version.cc +70 -4
- package/deps/rocksdb/rocksdb/CMakeLists.txt +281 -149
- package/deps/rocksdb/rocksdb/Makefile +459 -469
- package/deps/rocksdb/rocksdb/README.md +4 -4
- package/deps/rocksdb/rocksdb/TARGETS +5244 -1500
- package/deps/rocksdb/rocksdb/cache/cache.cc +12 -3
- package/deps/rocksdb/rocksdb/cache/cache_bench.cc +7 -368
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +924 -0
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +128 -0
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +103 -0
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +183 -0
- package/deps/rocksdb/rocksdb/cache/cache_helpers.h +11 -0
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +344 -0
- package/deps/rocksdb/rocksdb/cache/cache_key.h +132 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +183 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +288 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +468 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +85 -8
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +121 -51
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +171 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +86 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +607 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +381 -154
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +176 -33
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1659 -3
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +94 -23
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +49 -28
- package/deps/rocksdb/rocksdb/crash_test.mk +93 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +54 -31
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +10 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +146 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +326 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.cc +34 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.h +37 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +4 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +99 -40
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +20 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +95 -83
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +13 -10
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +7 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +37 -37
- package/deps/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h +101 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +8 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +6 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +209 -44
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +37 -11
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +382 -179
- package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc +100 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.h +102 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +196 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +3 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +7 -5
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +10 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +12 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +5 -5
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +772 -9
- package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +730 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc +82 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +155 -17
- package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc +21 -0
- package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h +38 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +137 -89
- package/deps/rocksdb/rocksdb/db/builder.h +16 -37
- package/deps/rocksdb/rocksdb/db/c.cc +413 -208
- package/deps/rocksdb/rocksdb/db/c_test.c +227 -138
- package/deps/rocksdb/rocksdb/db/column_family.cc +118 -103
- package/deps/rocksdb/rocksdb/db/column_family.h +86 -44
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +38 -24
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +81 -0
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +275 -0
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +258 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +81 -28
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +43 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +12 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +406 -215
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +147 -50
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +167 -61
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1321 -156
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +197 -28
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +246 -43
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +65 -26
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +7 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +122 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +18 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +536 -44
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +311 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +849 -0
- package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +92 -0
- package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +46 -0
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/convenience.cc +6 -3
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +383 -28
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +7 -2
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +154 -45
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1095 -33
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +1249 -203
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +135 -9
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1348 -166
- package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +3 -5
- package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +312 -45
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1734 -48
- package/deps/rocksdb/rocksdb/db/{compacted_db_impl.cc → db_impl/compacted_db_impl.cc} +24 -7
- package/deps/rocksdb/rocksdb/db/{compacted_db_impl.h → db_impl/compacted_db_impl.h} +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +644 -333
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +365 -92
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +578 -210
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +38 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +17 -10
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +75 -74
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +450 -183
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +42 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +232 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +42 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +297 -100
- package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +16 -15
- package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +31 -1
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +6 -5
- package/deps/rocksdb/rocksdb/db/db_iter.cc +218 -153
- package/deps/rocksdb/rocksdb/db/db_iter.h +14 -12
- package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_iter_test.cc +84 -160
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +47 -6
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +204 -0
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +21 -13
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +17 -10
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +38 -24
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +184 -19
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +183 -3
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +409 -9
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +92 -23
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +446 -0
- package/deps/rocksdb/rocksdb/db/{db_impl/db_secondary_test.cc → db_secondary_test.cc} +363 -35
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +520 -15
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +50 -1
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +139 -4
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_test.cc +669 -359
- package/deps/rocksdb/rocksdb/db/db_test2.cc +2110 -304
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +76 -43
- package/deps/rocksdb/rocksdb/db/db_test_util.h +231 -103
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +19 -11
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +490 -71
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +980 -349
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +11 -12
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +793 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/dbformat.cc +4 -12
- package/deps/rocksdb/rocksdb/db/dbformat.h +28 -18
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +3 -0
- package/deps/rocksdb/rocksdb/db/deletefile_test.cc +50 -15
- package/deps/rocksdb/rocksdb/db/error_handler.cc +127 -41
- package/deps/rocksdb/rocksdb/db/error_handler.h +12 -5
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +524 -255
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +136 -11
- package/deps/rocksdb/rocksdb/db/event_helpers.h +27 -2
- package/deps/rocksdb/rocksdb/db/experimental.cc +100 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +307 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +137 -60
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +12 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -55
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +86 -5
- package/deps/rocksdb/rocksdb/db/filename_test.cc +63 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +619 -64
- package/deps/rocksdb/rocksdb/db/flush_job.h +30 -7
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +33 -16
- package/deps/rocksdb/rocksdb/db/flush_scheduler.h +2 -1
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +18 -17
- package/deps/rocksdb/rocksdb/db/forward_iterator.h +5 -4
- package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +0 -1
- package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +91 -0
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +25 -14
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -5
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +471 -50
- package/deps/rocksdb/rocksdb/db/internal_stats.h +129 -25
- package/deps/rocksdb/rocksdb/db/job_context.h +22 -9
- package/deps/rocksdb/rocksdb/db/kv_checksum.h +394 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +518 -41
- package/deps/rocksdb/rocksdb/db/log_format.h +4 -1
- package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -6
- package/deps/rocksdb/rocksdb/db/log_reader.h +17 -1
- package/deps/rocksdb/rocksdb/db/log_test.cc +161 -11
- package/deps/rocksdb/rocksdb/db/log_writer.cc +92 -13
- package/deps/rocksdb/rocksdb/db/log_writer.h +18 -5
- package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +1 -1
- package/deps/rocksdb/rocksdb/db/lookup_key.h +0 -1
- package/deps/rocksdb/rocksdb/db/malloc_stats.cc +2 -2
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +21 -8
- package/deps/rocksdb/rocksdb/db/memtable.cc +144 -54
- package/deps/rocksdb/rocksdb/db/memtable.h +72 -15
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +95 -47
- package/deps/rocksdb/rocksdb/db/memtable_list.h +33 -13
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +61 -31
- package/deps/rocksdb/rocksdb/db/merge_context.h +20 -8
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +54 -11
- package/deps/rocksdb/rocksdb/db/merge_helper.h +17 -6
- package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +13 -7
- package/deps/rocksdb/rocksdb/db/merge_test.cc +40 -19
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +14 -25
- package/deps/rocksdb/rocksdb/db/output_validator.cc +3 -0
- package/deps/rocksdb/rocksdb/db/output_validator.h +5 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +32 -28
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +43 -29
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +9 -7
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +21 -16
- package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +1 -1
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +29 -36
- package/deps/rocksdb/rocksdb/db/pre_release_callback.h +1 -2
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +2 -2
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +11 -11
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +14 -8
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +17 -0
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/read_callback.h +1 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +87 -58
- package/deps/rocksdb/rocksdb/db/repair_test.cc +35 -5
- package/deps/rocksdb/rocksdb/db/snapshot_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/table_cache.cc +95 -69
- package/deps/rocksdb/rocksdb/db/table_cache.h +63 -53
- package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +4 -4
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +78 -10
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +28 -33
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +30 -51
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +12 -8
- package/deps/rocksdb/rocksdb/db/version_builder.cc +564 -341
- package/deps/rocksdb/rocksdb/db/version_builder.h +8 -8
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +327 -155
- package/deps/rocksdb/rocksdb/db/version_edit.cc +89 -27
- package/deps/rocksdb/rocksdb/db/version_edit.h +42 -17
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +324 -43
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +79 -22
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +165 -20
- package/deps/rocksdb/rocksdb/db/version_set.cc +935 -1034
- package/deps/rocksdb/rocksdb/db/version_set.h +183 -122
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +556 -138
- package/deps/rocksdb/rocksdb/db/version_util.h +68 -0
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +23 -21
- package/deps/rocksdb/rocksdb/db/wal_manager.h +5 -2
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +30 -27
- package/deps/rocksdb/rocksdb/db/write_batch.cc +704 -209
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +135 -2
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +209 -5
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/write_controller.cc +47 -54
- package/deps/rocksdb/rocksdb/db/write_controller.h +12 -9
- package/deps/rocksdb/rocksdb/db/write_controller_test.cc +215 -103
- package/deps/rocksdb/rocksdb/db/write_thread.cc +11 -0
- package/deps/rocksdb/rocksdb/db/write_thread.h +14 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +7 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +10 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +78 -25
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +13 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +29 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +199 -32
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.cc +188 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +59 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +77 -109
- package/deps/rocksdb/rocksdb/{third-party/folly/folly/synchronization/WaitOptions.cpp → db_stress_tool/db_stress_stat.cc} +9 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +7 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +699 -143
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +20 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +49 -39
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +631 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +287 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +1565 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +374 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +149 -18
- package/deps/rocksdb/rocksdb/env/composite_env.cc +464 -0
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +98 -646
- package/deps/rocksdb/rocksdb/env/emulated_clock.h +114 -0
- package/deps/rocksdb/rocksdb/env/env.cc +632 -42
- package/deps/rocksdb/rocksdb/env/env_basic_test.cc +84 -36
- package/deps/rocksdb/rocksdb/env/env_chroot.cc +88 -286
- package/deps/rocksdb/rocksdb/env/env_chroot.h +34 -1
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +469 -277
- package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +9 -30
- package/deps/rocksdb/rocksdb/env/env_posix.cc +110 -119
- package/deps/rocksdb/rocksdb/env/env_test.cc +1128 -39
- package/deps/rocksdb/rocksdb/env/file_system.cc +147 -8
- package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +207 -136
- package/deps/rocksdb/rocksdb/env/file_system_tracer.h +86 -54
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +192 -64
- package/deps/rocksdb/rocksdb/env/fs_readonly.h +107 -0
- package/deps/rocksdb/rocksdb/env/fs_remap.cc +339 -0
- package/deps/rocksdb/rocksdb/env/fs_remap.h +139 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +245 -41
- package/deps/rocksdb/rocksdb/env/io_posix.h +66 -1
- package/deps/rocksdb/rocksdb/env/mock_env.cc +147 -149
- package/deps/rocksdb/rocksdb/env/mock_env.h +113 -11
- package/deps/rocksdb/rocksdb/env/mock_env_test.cc +2 -4
- package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +164 -0
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +71 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +9 -5
- package/deps/rocksdb/rocksdb/file/delete_scheduler.h +6 -4
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +19 -12
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +459 -70
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +205 -28
- package/deps/rocksdb/rocksdb/file/file_util.cc +39 -28
- package/deps/rocksdb/rocksdb/file/file_util.h +18 -27
- package/deps/rocksdb/rocksdb/file/filename.cc +59 -22
- package/deps/rocksdb/rocksdb/file/filename.h +13 -8
- package/deps/rocksdb/rocksdb/file/line_file_reader.cc +68 -0
- package/deps/rocksdb/rocksdb/file/line_file_reader.h +59 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1130 -6
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +220 -36
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +69 -17
- package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +13 -12
- package/deps/rocksdb/rocksdb/file/read_write_util.cc +3 -38
- package/deps/rocksdb/rocksdb/file/read_write_util.h +0 -4
- package/deps/rocksdb/rocksdb/file/readahead_file_info.h +33 -0
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +57 -9
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +58 -6
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +29 -54
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +22 -29
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +424 -50
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +66 -19
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +157 -66
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +224 -121
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +333 -30
- package/deps/rocksdb/rocksdb/include/rocksdb/cache_bench_tool.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +90 -50
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +13 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +20 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +8 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +53 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +31 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +102 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +51 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +370 -262
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +286 -87
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +124 -64
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +27 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +21 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +384 -41
- package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +111 -143
- package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +20 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +56 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +15 -33
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +37 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +314 -26
- package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +11 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +50 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +10 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +186 -96
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +373 -103
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +13 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +37 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +87 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +5 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +59 -30
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +11 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +22 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +17 -10
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +121 -41
- package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +114 -136
- package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +116 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +160 -18
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +57 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +10 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +247 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_record_result.h +187 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +14 -24
- package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +46 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +14 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/agg_merge.h +138 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +631 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +142 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +12 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +368 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +418 -63
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +143 -73
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/replayer.h +87 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +43 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +18 -23
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +26 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +32 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +1 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +30 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +11 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +89 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +108 -38
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +40 -23
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +12 -5
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +100 -49
- package/deps/rocksdb/rocksdb/logging/env_logger.h +7 -5
- package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +0 -1
- package/deps/rocksdb/rocksdb/logging/posix_logger.h +3 -9
- package/deps/rocksdb/rocksdb/memory/arena.cc +3 -1
- package/deps/rocksdb/rocksdb/memory/arena.h +1 -1
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +171 -106
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +31 -15
- package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +15 -4
- package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +24 -8
- package/deps/rocksdb/rocksdb/memory/memory_allocator.cc +91 -0
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +239 -0
- package/deps/rocksdb/rocksdb/memory/memory_usage.h +14 -1
- package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +72 -9
- package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +52 -6
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +53 -0
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +5 -5
- package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +17 -5
- package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -1
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +87 -0
- package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +20 -10
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -94
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +160 -62
- package/deps/rocksdb/rocksdb/microbench/CMakeLists.txt +17 -0
- package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +1360 -0
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +153 -0
- package/deps/rocksdb/rocksdb/monitoring/histogram.cc +8 -15
- package/deps/rocksdb/rocksdb/monitoring/histogram.h +0 -1
- package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +18 -16
- package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +9 -7
- package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +5 -3
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +7 -5
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +37 -12
- package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +26 -6
- package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +6 -10
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +14 -13
- package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +19 -20
- package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +18 -18
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +84 -2
- package/deps/rocksdb/rocksdb/monitoring/statistics.h +6 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -2
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +67 -54
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +4 -1
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +2 -1
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +280 -212
- package/deps/rocksdb/rocksdb/options/cf_options.h +51 -57
- package/deps/rocksdb/rocksdb/options/configurable.cc +242 -138
- package/deps/rocksdb/rocksdb/options/configurable_helper.h +4 -68
- package/deps/rocksdb/rocksdb/options/configurable_test.cc +144 -21
- package/deps/rocksdb/rocksdb/options/configurable_test.h +2 -3
- package/deps/rocksdb/rocksdb/options/customizable.cc +67 -7
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +1773 -151
- package/deps/rocksdb/rocksdb/options/db_options.cc +275 -47
- package/deps/rocksdb/rocksdb/options/db_options.h +36 -7
- package/deps/rocksdb/rocksdb/options/options.cc +49 -17
- package/deps/rocksdb/rocksdb/options/options_helper.cc +369 -352
- package/deps/rocksdb/rocksdb/options/options_helper.h +23 -23
- package/deps/rocksdb/rocksdb/options/options_parser.cc +18 -13
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +67 -54
- package/deps/rocksdb/rocksdb/options/options_test.cc +1162 -187
- package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -1
- package/deps/rocksdb/rocksdb/port/lang.h +52 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
- package/deps/rocksdb/rocksdb/port/port_posix.cc +31 -2
- package/deps/rocksdb/rocksdb/port/port_posix.h +20 -2
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +20 -4
- package/deps/rocksdb/rocksdb/port/sys_time.h +2 -2
- package/deps/rocksdb/rocksdb/port/win/env_default.cc +7 -7
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +44 -74
- package/deps/rocksdb/rocksdb/port/win/env_win.h +25 -23
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +32 -34
- package/deps/rocksdb/rocksdb/port/win/io_win.h +12 -6
- package/deps/rocksdb/rocksdb/port/win/port_win.cc +55 -35
- package/deps/rocksdb/rocksdb/port/win/port_win.h +22 -5
- package/deps/rocksdb/rocksdb/port/win/win_logger.cc +3 -3
- package/deps/rocksdb/rocksdb/port/win/win_logger.h +3 -5
- package/deps/rocksdb/rocksdb/port/win/win_thread.cc +7 -1
- package/deps/rocksdb/rocksdb/port/win/win_thread.h +12 -17
- package/deps/rocksdb/rocksdb/python.mk +9 -0
- package/deps/rocksdb/rocksdb/src.mk +82 -34
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -4
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +158 -80
- package/deps/rocksdb/rocksdb/table/block_based/block.h +64 -36
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +23 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +13 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +3 -218
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +603 -328
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +28 -22
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +220 -82
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +3 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +28 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +598 -492
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +151 -96
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +31 -58
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +330 -92
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +50 -19
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +23 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +226 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +56 -22
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +42 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +34 -20
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +9 -10
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +26 -3
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +844 -202
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +281 -81
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +62 -2
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +2 -3
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -7
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +22 -6
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -26
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +11 -4
- package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +68 -26
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +44 -9
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +12 -10
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +3 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +23 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +44 -19
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +16 -28
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -2
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +77 -57
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +23 -12
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +43 -56
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +8 -8
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +52 -70
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +5 -8
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +1 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +17 -11
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +2 -3
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +42 -51
- package/deps/rocksdb/rocksdb/table/format.cc +258 -104
- package/deps/rocksdb/rocksdb/table/format.h +120 -109
- package/deps/rocksdb/rocksdb/table/get_context.cc +97 -65
- package/deps/rocksdb/rocksdb/table/get_context.h +19 -12
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +14 -0
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
- package/deps/rocksdb/rocksdb/table/merger_test.cc +3 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +11 -21
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +3 -3
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +176 -171
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +47 -33
- package/deps/rocksdb/rocksdb/table/mock_table.cc +7 -9
- package/deps/rocksdb/rocksdb/table/mock_table.h +3 -2
- package/deps/rocksdb/rocksdb/table/multiget_context.h +15 -8
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +22 -29
- package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +6 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +5 -8
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +29 -26
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +12 -16
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +145 -69
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +1 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +7 -6
- package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +3 -4
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +3 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +1 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +13 -18
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -9
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +55 -37
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +10 -5
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +11 -8
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +222 -16
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +106 -58
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +6 -5
- package/deps/rocksdb/rocksdb/table/table_builder.h +68 -44
- package/deps/rocksdb/rocksdb/table/table_factory.cc +37 -10
- package/deps/rocksdb/rocksdb/table/table_properties.cc +109 -54
- package/deps/rocksdb/rocksdb/table/table_properties_internal.h +4 -20
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +33 -32
- package/deps/rocksdb/rocksdb/table/table_reader_caller.h +2 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +989 -326
- package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +4 -0
- package/deps/rocksdb/rocksdb/table/unique_id.cc +166 -0
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +59 -0
- package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +1 -1
- package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +13 -10
- package/deps/rocksdb/rocksdb/test_util/sync_point.cc +1 -2
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +35 -16
- package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +32 -10
- package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +31 -4
- package/deps/rocksdb/rocksdb/test_util/testharness.cc +53 -1
- package/deps/rocksdb/rocksdb/test_util/testharness.h +67 -3
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +236 -66
- package/deps/rocksdb/rocksdb/test_util/testutil.h +63 -100
- package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +12 -1
- package/deps/rocksdb/rocksdb/tools/blob_dump.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +6 -3
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +1 -0
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +9 -3
- package/deps/rocksdb/rocksdb/tools/db_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +1420 -611
- package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +11 -8
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +11 -1
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +4 -2
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +46 -22
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +655 -179
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +58 -6
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +472 -29
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +23 -2
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +246 -0
- package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +126 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +83 -29
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +38 -17
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +191 -55
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +219 -296
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +87 -53
- package/deps/rocksdb/rocksdb/tools/write_stress.cc +8 -7
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +6 -5
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +5 -4
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +14 -9
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +134 -60
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +49 -38
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +152 -15
- package/deps/rocksdb/rocksdb/trace_replay/trace_record.cc +206 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.cc +190 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.h +46 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_record_result.cc +146 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +475 -344
- package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +83 -95
- package/deps/rocksdb/rocksdb/util/autovector.h +38 -18
- package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +4 -0
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +276 -94
- package/deps/rocksdb/rocksdb/util/build_version.cc.in +81 -4
- package/deps/rocksdb/rocksdb/util/cast_util.h +22 -0
- package/deps/rocksdb/rocksdb/util/channel.h +2 -0
- package/deps/rocksdb/rocksdb/util/coding.h +1 -33
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +8 -0
- package/deps/rocksdb/rocksdb/util/comparator.cc +163 -3
- package/deps/rocksdb/rocksdb/util/compression.cc +122 -0
- package/deps/rocksdb/rocksdb/util/compression.h +212 -7
- package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +1 -3
- package/deps/rocksdb/rocksdb/util/crc32c.cc +165 -2
- package/deps/rocksdb/rocksdb/util/crc32c.h +6 -0
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +14 -0
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +3 -0
- package/deps/rocksdb/rocksdb/util/crc32c_test.cc +47 -0
- package/deps/rocksdb/rocksdb/util/defer.h +30 -1
- package/deps/rocksdb/rocksdb/util/defer_test.cc +11 -0
- package/deps/rocksdb/rocksdb/util/duplicate_detector.h +3 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +3 -3
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +5 -4
- package/deps/rocksdb/rocksdb/util/fastrange.h +2 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +36 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +3 -1
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +512 -52
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +65 -10
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +6 -1
- package/deps/rocksdb/rocksdb/util/hash.cc +121 -3
- package/deps/rocksdb/rocksdb/util/hash.h +31 -1
- package/deps/rocksdb/rocksdb/util/hash128.h +26 -0
- package/deps/rocksdb/rocksdb/util/hash_containers.h +51 -0
- package/deps/rocksdb/rocksdb/util/hash_test.cc +194 -2
- package/deps/rocksdb/rocksdb/util/heap.h +6 -1
- package/deps/rocksdb/rocksdb/util/kv_map.h +1 -1
- package/deps/rocksdb/rocksdb/util/log_write_bench.cc +8 -6
- package/deps/rocksdb/rocksdb/util/math.h +74 -7
- package/deps/rocksdb/rocksdb/util/math128.h +13 -1
- package/deps/rocksdb/rocksdb/util/murmurhash.h +3 -3
- package/deps/rocksdb/rocksdb/util/random.cc +9 -0
- package/deps/rocksdb/rocksdb/util/random.h +6 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter.cc +298 -144
- package/deps/rocksdb/rocksdb/util/rate_limiter.h +68 -19
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +335 -23
- package/deps/rocksdb/rocksdb/util/repeatable_thread.h +10 -12
- package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +18 -15
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +98 -74
- package/deps/rocksdb/rocksdb/util/ribbon_config.cc +506 -0
- package/deps/rocksdb/rocksdb/util/ribbon_config.h +182 -0
- package/deps/rocksdb/rocksdb/util/ribbon_impl.h +154 -79
- package/deps/rocksdb/rocksdb/util/ribbon_test.cc +742 -365
- package/deps/rocksdb/rocksdb/util/set_comparator.h +2 -0
- package/deps/rocksdb/rocksdb/util/slice.cc +198 -35
- package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -1
- package/deps/rocksdb/rocksdb/util/status.cc +32 -29
- package/deps/rocksdb/rocksdb/util/stop_watch.h +18 -18
- package/deps/rocksdb/rocksdb/util/string_util.cc +85 -6
- package/deps/rocksdb/rocksdb/util/string_util.h +47 -2
- package/deps/rocksdb/rocksdb/util/thread_guard.h +41 -0
- package/deps/rocksdb/rocksdb/util/thread_local.h +2 -2
- package/deps/rocksdb/rocksdb/util/thread_local_test.cc +22 -24
- package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +7 -6
- package/deps/rocksdb/rocksdb/util/timer.h +55 -46
- package/deps/rocksdb/rocksdb/util/timer_test.cc +50 -48
- package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +4 -0
- package/deps/rocksdb/rocksdb/util/vector_iterator.h +31 -15
- package/deps/rocksdb/rocksdb/util/work_queue.h +2 -0
- package/deps/rocksdb/rocksdb/util/xxhash.cc +35 -1144
- package/deps/rocksdb/rocksdb/util/xxhash.h +5117 -373
- package/deps/rocksdb/rocksdb/util/xxph3.h +1762 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +238 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.h +49 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +134 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +104 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.h +47 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3164 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_impl.h +29 -0
- package/deps/rocksdb/rocksdb/utilities/{backupable/backupable_db_test.cc → backup/backup_engine_test.cc} +1679 -485
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +6 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +14 -9
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +4 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +37 -27
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +8 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +13 -10
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +44 -25
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +3 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +27 -19
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +4 -2
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +69 -0
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +489 -0
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +366 -0
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +67 -4
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +21 -6
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +107 -7
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_options.h +43 -0
- package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +24 -8
- package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +7 -7
- package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +99 -218
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +8 -24
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +114 -1
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +6 -2
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -4
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +7 -6
- package/deps/rocksdb/rocksdb/utilities/compaction_filters.cc +56 -0
- package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +355 -0
- package/deps/rocksdb/rocksdb/utilities/counted_fs.h +152 -0
- package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +13 -0
- package/deps/rocksdb/rocksdb/utilities/env_timed.cc +164 -122
- package/deps/rocksdb/rocksdb/utilities/env_timed.h +97 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +75 -17
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +19 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +539 -126
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +162 -17
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +110 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +94 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +5 -2
- package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +104 -0
- package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +5 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +4 -1
- package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +11 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +5 -1
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +29 -10
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +29 -14
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +71 -18
- package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +15 -9
- package/deps/rocksdb/rocksdb/utilities/merge_operators.cc +120 -0
- package/deps/rocksdb/rocksdb/utilities/merge_operators.h +3 -23
- package/deps/rocksdb/rocksdb/utilities/object_registry.cc +267 -42
- package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +702 -76
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +26 -5
- package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +124 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +8 -9
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +15 -13
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +4 -4
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +8 -9
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +43 -35
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +20 -18
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +107 -2
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +23 -15
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +316 -0
- package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.h +86 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +4 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +4 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +119 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +20 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +20 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +3 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +38 -14
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +17 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +423 -34
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +82 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +72 -40
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +32 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +13 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +7 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +207 -43
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +50 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +28 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +11 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +516 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +506 -15
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +27 -13
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +14 -14
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +14 -5
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +305 -27
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +55 -159
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +209 -2
- package/deps/rocksdb/rocksdb/utilities/wal_filter.cc +23 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +157 -88
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +501 -114
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +91 -316
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1212 -672
- package/deps/rocksdb/rocksdb.gyp +425 -446
- package/package.json +8 -8
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/darwin-x86/node.napi.node +0 -0
- package/prebuilds/{darwin-x64+arm64 → linux-x64}/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/env/env_hdfs.cc +0 -648
- package/deps/rocksdb/rocksdb/hdfs/README +0 -23
- package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +0 -386
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +0 -535
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +0 -175
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +0 -34
- package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +0 -102
- package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +0 -49
- package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +0 -44
- package/deps/rocksdb/rocksdb/options/customizable_helper.h +0 -216
- package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +0 -27
- package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +0 -45
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +0 -166
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +0 -570
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +0 -92
- package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +0 -54
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +0 -152
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +0 -59
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +0 -141
- package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +0 -33
- package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +0 -74
- package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +0 -117
- package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +0 -263
- package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +0 -96
- package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +0 -40
- package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +0 -29
- package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +0 -144
- package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +0 -30
- package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +0 -51
- package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +0 -28
- package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +0 -10
- package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +0 -26
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +0 -138
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +0 -23
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +0 -57
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +0 -260
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +0 -52
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +0 -328
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +0 -1703
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +0 -16
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +0 -304
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +0 -39
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +0 -26
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +0 -318
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +0 -57
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +0 -219
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +0 -207
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +0 -164
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +0 -57
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +0 -77
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp +0 -1145
- package/deps/rocksdb/rocksdb/util/build_version.h +0 -15
- package/deps/rocksdb/rocksdb/util/xxh3p.h +0 -1392
- package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +0 -2354
- package/deps/rocksdb/rocksdb/utilities/env_librados.cc +0 -1497
- package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +0 -1146
- package/prebuilds/linux-x64/node.napi.glibc.node +0 -0
|
@@ -11,36 +11,48 @@
|
|
|
11
11
|
#include <algorithm>
|
|
12
12
|
#include <array>
|
|
13
13
|
#include <limits>
|
|
14
|
+
#include <memory>
|
|
14
15
|
#include <string>
|
|
16
|
+
#include <unordered_set>
|
|
15
17
|
#include <utility>
|
|
16
18
|
#include <vector>
|
|
17
19
|
|
|
20
|
+
#include "cache/cache_entry_roles.h"
|
|
21
|
+
#include "cache/cache_key.h"
|
|
18
22
|
#include "cache/sharded_cache.h"
|
|
19
|
-
|
|
23
|
+
#include "db/compaction/compaction_picker.h"
|
|
20
24
|
#include "db/dbformat.h"
|
|
21
25
|
#include "db/pinned_iterators_manager.h"
|
|
22
26
|
#include "file/file_prefetch_buffer.h"
|
|
23
27
|
#include "file/file_util.h"
|
|
24
28
|
#include "file/random_access_file_reader.h"
|
|
29
|
+
#include "logging/logging.h"
|
|
25
30
|
#include "monitoring/perf_context_imp.h"
|
|
26
|
-
#include "
|
|
31
|
+
#include "port/lang.h"
|
|
27
32
|
#include "rocksdb/cache.h"
|
|
28
33
|
#include "rocksdb/comparator.h"
|
|
34
|
+
#include "rocksdb/convenience.h"
|
|
29
35
|
#include "rocksdb/env.h"
|
|
30
36
|
#include "rocksdb/file_system.h"
|
|
31
37
|
#include "rocksdb/filter_policy.h"
|
|
32
38
|
#include "rocksdb/iterator.h"
|
|
33
39
|
#include "rocksdb/options.h"
|
|
40
|
+
#include "rocksdb/snapshot.h"
|
|
34
41
|
#include "rocksdb/statistics.h"
|
|
42
|
+
#include "rocksdb/system_clock.h"
|
|
35
43
|
#include "rocksdb/table.h"
|
|
36
44
|
#include "rocksdb/table_properties.h"
|
|
45
|
+
#include "rocksdb/trace_record.h"
|
|
37
46
|
#include "table/block_based/binary_search_index_reader.h"
|
|
38
47
|
#include "table/block_based/block.h"
|
|
39
48
|
#include "table/block_based/block_based_filter_block.h"
|
|
40
49
|
#include "table/block_based/block_based_table_factory.h"
|
|
41
50
|
#include "table/block_based/block_based_table_iterator.h"
|
|
51
|
+
#include "table/block_based/block_like_traits.h"
|
|
42
52
|
#include "table/block_based/block_prefix_index.h"
|
|
53
|
+
#include "table/block_based/block_type.h"
|
|
43
54
|
#include "table/block_based/filter_block.h"
|
|
55
|
+
#include "table/block_based/filter_policy_internal.h"
|
|
44
56
|
#include "table/block_based/full_filter_block.h"
|
|
45
57
|
#include "table/block_based/hash_index_reader.h"
|
|
46
58
|
#include "table/block_based/partitioned_filter_block.h"
|
|
@@ -52,11 +64,9 @@
|
|
|
52
64
|
#include "table/meta_blocks.h"
|
|
53
65
|
#include "table/multiget_context.h"
|
|
54
66
|
#include "table/persistent_cache_helper.h"
|
|
67
|
+
#include "table/persistent_cache_options.h"
|
|
55
68
|
#include "table/sst_file_writer_collectors.h"
|
|
56
69
|
#include "table/two_level_iterator.h"
|
|
57
|
-
|
|
58
|
-
#include "monitoring/perf_context_imp.h"
|
|
59
|
-
#include "port/lang.h"
|
|
60
70
|
#include "test_util/sync_point.h"
|
|
61
71
|
#include "util/coding.h"
|
|
62
72
|
#include "util/crc32c.h"
|
|
@@ -69,83 +79,10 @@ extern const uint64_t kBlockBasedTableMagicNumber;
|
|
|
69
79
|
extern const std::string kHashIndexPrefixesBlock;
|
|
70
80
|
extern const std::string kHashIndexPrefixesMetadataBlock;
|
|
71
81
|
|
|
72
|
-
|
|
73
|
-
// Found that 256 KB readahead size provides the best performance, based on
|
|
74
|
-
// experiments, for auto readahead. Experiment data is in PR #3282.
|
|
75
|
-
const size_t BlockBasedTable::kMaxAutoReadaheadSize = 256 * 1024;
|
|
76
|
-
|
|
77
82
|
BlockBasedTable::~BlockBasedTable() {
|
|
78
83
|
delete rep_;
|
|
79
84
|
}
|
|
80
85
|
|
|
81
|
-
std::atomic<uint64_t> BlockBasedTable::next_cache_key_id_(0);
|
|
82
|
-
|
|
83
|
-
template <typename TBlocklike>
|
|
84
|
-
class BlocklikeTraits;
|
|
85
|
-
|
|
86
|
-
template <>
|
|
87
|
-
class BlocklikeTraits<BlockContents> {
|
|
88
|
-
public:
|
|
89
|
-
static BlockContents* Create(BlockContents&& contents,
|
|
90
|
-
size_t /* read_amp_bytes_per_bit */,
|
|
91
|
-
Statistics* /* statistics */,
|
|
92
|
-
bool /* using_zstd */,
|
|
93
|
-
const FilterPolicy* /* filter_policy */) {
|
|
94
|
-
return new BlockContents(std::move(contents));
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
static uint32_t GetNumRestarts(const BlockContents& /* contents */) {
|
|
98
|
-
return 0;
|
|
99
|
-
}
|
|
100
|
-
};
|
|
101
|
-
|
|
102
|
-
template <>
|
|
103
|
-
class BlocklikeTraits<ParsedFullFilterBlock> {
|
|
104
|
-
public:
|
|
105
|
-
static ParsedFullFilterBlock* Create(BlockContents&& contents,
|
|
106
|
-
size_t /* read_amp_bytes_per_bit */,
|
|
107
|
-
Statistics* /* statistics */,
|
|
108
|
-
bool /* using_zstd */,
|
|
109
|
-
const FilterPolicy* filter_policy) {
|
|
110
|
-
return new ParsedFullFilterBlock(filter_policy, std::move(contents));
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
static uint32_t GetNumRestarts(const ParsedFullFilterBlock& /* block */) {
|
|
114
|
-
return 0;
|
|
115
|
-
}
|
|
116
|
-
};
|
|
117
|
-
|
|
118
|
-
template <>
|
|
119
|
-
class BlocklikeTraits<Block> {
|
|
120
|
-
public:
|
|
121
|
-
static Block* Create(BlockContents&& contents, size_t read_amp_bytes_per_bit,
|
|
122
|
-
Statistics* statistics, bool /* using_zstd */,
|
|
123
|
-
const FilterPolicy* /* filter_policy */) {
|
|
124
|
-
return new Block(std::move(contents), read_amp_bytes_per_bit, statistics);
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
static uint32_t GetNumRestarts(const Block& block) {
|
|
128
|
-
return block.NumRestarts();
|
|
129
|
-
}
|
|
130
|
-
};
|
|
131
|
-
|
|
132
|
-
template <>
|
|
133
|
-
class BlocklikeTraits<UncompressionDict> {
|
|
134
|
-
public:
|
|
135
|
-
static UncompressionDict* Create(BlockContents&& contents,
|
|
136
|
-
size_t /* read_amp_bytes_per_bit */,
|
|
137
|
-
Statistics* /* statistics */,
|
|
138
|
-
bool using_zstd,
|
|
139
|
-
const FilterPolicy* /* filter_policy */) {
|
|
140
|
-
return new UncompressionDict(contents.data, std::move(contents.allocation),
|
|
141
|
-
using_zstd);
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
static uint32_t GetNumRestarts(const UncompressionDict& /* dict */) {
|
|
145
|
-
return 0;
|
|
146
|
-
}
|
|
147
|
-
};
|
|
148
|
-
|
|
149
86
|
namespace {
|
|
150
87
|
// Read the block identified by "handle" from "file".
|
|
151
88
|
// The only relevant option is options.verify_checksums for now.
|
|
@@ -157,7 +94,7 @@ template <typename TBlocklike>
|
|
|
157
94
|
Status ReadBlockFromFile(
|
|
158
95
|
RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer,
|
|
159
96
|
const Footer& footer, const ReadOptions& options, const BlockHandle& handle,
|
|
160
|
-
std::unique_ptr<TBlocklike>* result, const
|
|
97
|
+
std::unique_ptr<TBlocklike>* result, const ImmutableOptions& ioptions,
|
|
161
98
|
bool do_uncompress, bool maybe_compressed, BlockType block_type,
|
|
162
99
|
const UncompressionDict& uncompression_dict,
|
|
163
100
|
const PersistentCacheOptions& cache_options, size_t read_amp_bytes_per_bit,
|
|
@@ -173,33 +110,26 @@ Status ReadBlockFromFile(
|
|
|
173
110
|
Status s = block_fetcher.ReadBlockContents();
|
|
174
111
|
if (s.ok()) {
|
|
175
112
|
result->reset(BlocklikeTraits<TBlocklike>::Create(
|
|
176
|
-
std::move(contents), read_amp_bytes_per_bit, ioptions.
|
|
177
|
-
|
|
113
|
+
std::move(contents), read_amp_bytes_per_bit, ioptions.stats, using_zstd,
|
|
114
|
+
filter_policy));
|
|
178
115
|
}
|
|
179
116
|
|
|
180
117
|
return s;
|
|
181
118
|
}
|
|
182
119
|
|
|
183
|
-
// Delete the entry resided in the cache.
|
|
184
|
-
template <class Entry>
|
|
185
|
-
void DeleteCachedEntry(const Slice& /*key*/, void* value) {
|
|
186
|
-
auto entry = reinterpret_cast<Entry*>(value);
|
|
187
|
-
delete entry;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
120
|
// Release the cached entry and decrement its ref count.
|
|
191
121
|
// Do not force erase
|
|
192
122
|
void ReleaseCachedEntry(void* arg, void* h) {
|
|
193
123
|
Cache* cache = reinterpret_cast<Cache*>(arg);
|
|
194
124
|
Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
|
|
195
|
-
cache->Release(handle, false /*
|
|
125
|
+
cache->Release(handle, false /* erase_if_last_ref */);
|
|
196
126
|
}
|
|
197
127
|
|
|
198
|
-
// For hash based index, return
|
|
199
|
-
//
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
128
|
+
// For hash based index, return false if table_properties->prefix_extractor_name
|
|
129
|
+
// and prefix_extractor both exist and match, otherwise true.
|
|
130
|
+
inline bool PrefixExtractorChangedHelper(
|
|
131
|
+
const TableProperties* table_properties,
|
|
132
|
+
const SliceTransform* prefix_extractor) {
|
|
203
133
|
// BlockBasedTableOptions::kHashSearch requires prefix_extractor to be set.
|
|
204
134
|
// Turn off hash index in prefix_extractor is not set; if prefix_extractor
|
|
205
135
|
// is set but prefix_extractor_block is not set, also disable hash index
|
|
@@ -209,8 +139,7 @@ bool PrefixExtractorChanged(const TableProperties* table_properties,
|
|
|
209
139
|
}
|
|
210
140
|
|
|
211
141
|
// prefix_extractor and prefix_extractor_block are both non-empty
|
|
212
|
-
if (table_properties->prefix_extractor_name
|
|
213
|
-
prefix_extractor->Name()) != 0) {
|
|
142
|
+
if (table_properties->prefix_extractor_name != prefix_extractor->AsString()) {
|
|
214
143
|
return true;
|
|
215
144
|
} else {
|
|
216
145
|
return false;
|
|
@@ -228,7 +157,7 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) {
|
|
|
228
157
|
void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type,
|
|
229
158
|
GetContext* get_context,
|
|
230
159
|
size_t usage) const {
|
|
231
|
-
Statistics* const statistics = rep_->ioptions.
|
|
160
|
+
Statistics* const statistics = rep_->ioptions.stats;
|
|
232
161
|
|
|
233
162
|
PERF_COUNTER_ADD(block_cache_hit_count, 1);
|
|
234
163
|
PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1,
|
|
@@ -286,7 +215,7 @@ void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type,
|
|
|
286
215
|
|
|
287
216
|
void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type,
|
|
288
217
|
GetContext* get_context) const {
|
|
289
|
-
Statistics* const statistics = rep_->ioptions.
|
|
218
|
+
Statistics* const statistics = rep_->ioptions.stats;
|
|
290
219
|
|
|
291
220
|
// TODO: introduce aggregate (not per-level) block cache miss count
|
|
292
221
|
PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 1,
|
|
@@ -336,12 +265,9 @@ void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type,
|
|
|
336
265
|
}
|
|
337
266
|
}
|
|
338
267
|
|
|
339
|
-
void BlockBasedTable::UpdateCacheInsertionMetrics(
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
bool redundant) const {
|
|
343
|
-
Statistics* const statistics = rep_->ioptions.statistics;
|
|
344
|
-
|
|
268
|
+
void BlockBasedTable::UpdateCacheInsertionMetrics(
|
|
269
|
+
BlockType block_type, GetContext* get_context, size_t usage, bool redundant,
|
|
270
|
+
Statistics* const statistics) {
|
|
345
271
|
// TODO: introduce perf counters for block cache insertions
|
|
346
272
|
if (get_context) {
|
|
347
273
|
++get_context->get_context_stats_.num_cache_add;
|
|
@@ -430,9 +356,17 @@ void BlockBasedTable::UpdateCacheInsertionMetrics(BlockType block_type,
|
|
|
430
356
|
}
|
|
431
357
|
|
|
432
358
|
Cache::Handle* BlockBasedTable::GetEntryFromCache(
|
|
433
|
-
Cache* block_cache, const Slice& key,
|
|
434
|
-
GetContext* get_context
|
|
435
|
-
|
|
359
|
+
const CacheTier& cache_tier, Cache* block_cache, const Slice& key,
|
|
360
|
+
BlockType block_type, const bool wait, GetContext* get_context,
|
|
361
|
+
const Cache::CacheItemHelper* cache_helper,
|
|
362
|
+
const Cache::CreateCallback& create_cb, Cache::Priority priority) const {
|
|
363
|
+
Cache::Handle* cache_handle = nullptr;
|
|
364
|
+
if (cache_tier == CacheTier::kNonVolatileBlockTier) {
|
|
365
|
+
cache_handle = block_cache->Lookup(key, cache_helper, create_cb, priority,
|
|
366
|
+
wait, rep_->ioptions.statistics.get());
|
|
367
|
+
} else {
|
|
368
|
+
cache_handle = block_cache->Lookup(key, rep_->ioptions.statistics.get());
|
|
369
|
+
}
|
|
436
370
|
|
|
437
371
|
if (cache_handle != nullptr) {
|
|
438
372
|
UpdateCacheHitMetrics(block_type, get_context,
|
|
@@ -444,28 +378,21 @@ Cache::Handle* BlockBasedTable::GetEntryFromCache(
|
|
|
444
378
|
return cache_handle;
|
|
445
379
|
}
|
|
446
380
|
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
&rep->persistent_cache_key_prefix[0],
|
|
461
|
-
&rep->persistent_cache_key_prefix_size);
|
|
462
|
-
}
|
|
463
|
-
if (rep->table_options.block_cache_compressed != nullptr) {
|
|
464
|
-
GenerateCachePrefix<Cache, FSRandomAccessFile>(
|
|
465
|
-
rep->table_options.block_cache_compressed.get(), rep->file->file(),
|
|
466
|
-
&rep->compressed_cache_key_prefix[0],
|
|
467
|
-
&rep->compressed_cache_key_prefix_size);
|
|
381
|
+
template <typename TBlocklike>
|
|
382
|
+
Status BlockBasedTable::InsertEntryToCache(
|
|
383
|
+
const CacheTier& cache_tier, Cache* block_cache, const Slice& key,
|
|
384
|
+
const Cache::CacheItemHelper* cache_helper,
|
|
385
|
+
std::unique_ptr<TBlocklike>& block_holder, size_t charge,
|
|
386
|
+
Cache::Handle** cache_handle, Cache::Priority priority) const {
|
|
387
|
+
Status s = Status::OK();
|
|
388
|
+
if (cache_tier == CacheTier::kNonVolatileBlockTier) {
|
|
389
|
+
s = block_cache->Insert(key, block_holder.get(), cache_helper, charge,
|
|
390
|
+
cache_handle, priority);
|
|
391
|
+
} else {
|
|
392
|
+
s = block_cache->Insert(key, block_holder.get(), charge,
|
|
393
|
+
cache_helper->del_cb, cache_handle, priority);
|
|
468
394
|
}
|
|
395
|
+
return s;
|
|
469
396
|
}
|
|
470
397
|
|
|
471
398
|
namespace {
|
|
@@ -562,31 +489,79 @@ Status GetGlobalSequenceNumber(const TableProperties& table_properties,
|
|
|
562
489
|
}
|
|
563
490
|
} // namespace
|
|
564
491
|
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
492
|
+
void BlockBasedTable::SetupBaseCacheKey(const TableProperties* properties,
|
|
493
|
+
const std::string& cur_db_session_id,
|
|
494
|
+
uint64_t cur_file_number,
|
|
495
|
+
uint64_t file_size,
|
|
496
|
+
OffsetableCacheKey* out_base_cache_key,
|
|
497
|
+
bool* out_is_stable) {
|
|
498
|
+
// Use a stable cache key if sufficient data is in table properties
|
|
499
|
+
std::string db_session_id;
|
|
500
|
+
uint64_t file_num;
|
|
501
|
+
std::string db_id;
|
|
502
|
+
if (properties && !properties->db_session_id.empty() &&
|
|
503
|
+
properties->orig_file_number > 0) {
|
|
504
|
+
// (Newer SST file case)
|
|
505
|
+
// We must have both properties to get a stable unique id because
|
|
506
|
+
// CreateColumnFamilyWithImport or IngestExternalFiles can change the
|
|
507
|
+
// file numbers on a file.
|
|
508
|
+
db_session_id = properties->db_session_id;
|
|
509
|
+
file_num = properties->orig_file_number;
|
|
510
|
+
// Less critical, populated in earlier release than above
|
|
511
|
+
db_id = properties->db_id;
|
|
512
|
+
if (out_is_stable) {
|
|
513
|
+
*out_is_stable = true;
|
|
514
|
+
}
|
|
515
|
+
} else {
|
|
516
|
+
// (Old SST file case)
|
|
517
|
+
// We use (unique) cache keys based on current identifiers. These are at
|
|
518
|
+
// least stable across table file close and re-open, but not across
|
|
519
|
+
// different DBs nor DB close and re-open.
|
|
520
|
+
db_session_id = cur_db_session_id;
|
|
521
|
+
file_num = cur_file_number;
|
|
522
|
+
// Plumbing through the DB ID to here would be annoying, and of limited
|
|
523
|
+
// value because of the case of VersionSet::Recover opening some table
|
|
524
|
+
// files and later setting the DB ID. So we just rely on uniqueness
|
|
525
|
+
// level provided by session ID.
|
|
526
|
+
db_id = "unknown";
|
|
527
|
+
if (out_is_stable) {
|
|
528
|
+
*out_is_stable = false;
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// Too many tests to update to get these working
|
|
533
|
+
// assert(file_num > 0);
|
|
534
|
+
// assert(!db_session_id.empty());
|
|
535
|
+
// assert(!db_id.empty());
|
|
536
|
+
|
|
537
|
+
// Minimum block size is 5 bytes; therefore we can trim off two lower bits
|
|
538
|
+
// from offets. See GetCacheKey.
|
|
539
|
+
*out_base_cache_key = OffsetableCacheKey(db_id, db_session_id, file_num,
|
|
540
|
+
/*max_offset*/ file_size >> 2);
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
CacheKey BlockBasedTable::GetCacheKey(const OffsetableCacheKey& base_cache_key,
|
|
544
|
+
const BlockHandle& handle) {
|
|
545
|
+
// Minimum block size is 5 bytes; therefore we can trim off two lower bits
|
|
546
|
+
// from offet.
|
|
547
|
+
return base_cache_key.WithOffset(handle.offset() >> 2);
|
|
575
548
|
}
|
|
576
549
|
|
|
577
550
|
Status BlockBasedTable::Open(
|
|
578
|
-
const ReadOptions& read_options, const
|
|
551
|
+
const ReadOptions& read_options, const ImmutableOptions& ioptions,
|
|
579
552
|
const EnvOptions& env_options, const BlockBasedTableOptions& table_options,
|
|
580
553
|
const InternalKeyComparator& internal_comparator,
|
|
581
554
|
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
|
|
582
555
|
std::unique_ptr<TableReader>* table_reader,
|
|
583
|
-
|
|
556
|
+
std::shared_ptr<CacheReservationManager> table_reader_cache_res_mgr,
|
|
557
|
+
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
|
584
558
|
const bool prefetch_index_and_filter_in_cache, const bool skip_filters,
|
|
585
559
|
const int level, const bool immortal_table,
|
|
586
560
|
const SequenceNumber largest_seqno, const bool force_direct_prefetch,
|
|
587
561
|
TailPrefetchStats* tail_prefetch_stats,
|
|
588
562
|
BlockCacheTracer* const block_cache_tracer,
|
|
589
|
-
size_t max_file_size_for_l0_meta_pin
|
|
563
|
+
size_t max_file_size_for_l0_meta_pin, const std::string& cur_db_session_id,
|
|
564
|
+
uint64_t cur_file_num) {
|
|
590
565
|
table_reader->reset();
|
|
591
566
|
|
|
592
567
|
Status s;
|
|
@@ -617,7 +592,8 @@ Status BlockBasedTable::Open(
|
|
|
617
592
|
} else {
|
|
618
593
|
// Should not prefetch for mmap mode.
|
|
619
594
|
prefetch_buffer.reset(new FilePrefetchBuffer(
|
|
620
|
-
|
|
595
|
+
0 /* readahead_size */, 0 /* max_readahead_size */, false /* enable */,
|
|
596
|
+
true /* track_min_offset */));
|
|
621
597
|
}
|
|
622
598
|
|
|
623
599
|
// Read in the following order:
|
|
@@ -629,7 +605,7 @@ Status BlockBasedTable::Open(
|
|
|
629
605
|
// 6. [meta block: index]
|
|
630
606
|
// 7. [meta block: filter]
|
|
631
607
|
IOOptions opts;
|
|
632
|
-
s =
|
|
608
|
+
s = file->PrepareIOOptions(ro, opts);
|
|
633
609
|
if (s.ok()) {
|
|
634
610
|
s = ReadFooterFromFile(opts, file.get(), prefetch_buffer.get(), file_size,
|
|
635
611
|
&footer, kBlockBasedTableMagicNumber);
|
|
@@ -637,39 +613,34 @@ Status BlockBasedTable::Open(
|
|
|
637
613
|
if (!s.ok()) {
|
|
638
614
|
return s;
|
|
639
615
|
}
|
|
640
|
-
if (!
|
|
616
|
+
if (!IsSupportedFormatVersion(footer.format_version())) {
|
|
641
617
|
return Status::Corruption(
|
|
642
618
|
"Unknown Footer version. Maybe this file was created with newer "
|
|
643
619
|
"version of RocksDB?");
|
|
644
620
|
}
|
|
645
621
|
|
|
646
|
-
// We've successfully read the footer. We are ready to serve requests.
|
|
647
|
-
// Better not mutate rep_ after the creation. eg. internal_prefix_transform
|
|
648
|
-
// raw pointer will be used to create HashIndexReader, whose reset may
|
|
649
|
-
// access a dangling pointer.
|
|
650
622
|
BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch};
|
|
651
623
|
Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options,
|
|
652
624
|
internal_comparator, skip_filters,
|
|
653
625
|
file_size, level, immortal_table);
|
|
654
626
|
rep->file = std::move(file);
|
|
655
627
|
rep->footer = footer;
|
|
656
|
-
|
|
628
|
+
// We've successfully read the footer. We are ready to serve requests.
|
|
629
|
+
// Better not mutate rep_ after the creation. eg. internal_prefix_transform
|
|
630
|
+
// raw pointer will be used to create HashIndexReader, whose reset may
|
|
631
|
+
// access a dangling pointer.
|
|
657
632
|
// We need to wrap data with internal_prefix_transform to make sure it can
|
|
658
633
|
// handle prefix correctly.
|
|
634
|
+
// FIXME: is changed prefix_extractor handled anywhere for hash index?
|
|
659
635
|
if (prefix_extractor != nullptr) {
|
|
660
636
|
rep->internal_prefix_transform.reset(
|
|
661
|
-
new InternalKeySliceTransform(prefix_extractor));
|
|
637
|
+
new InternalKeySliceTransform(prefix_extractor.get()));
|
|
662
638
|
}
|
|
663
|
-
SetupCacheKeyPrefix(rep);
|
|
664
|
-
std::unique_ptr<BlockBasedTable> new_table(
|
|
665
|
-
new BlockBasedTable(rep, block_cache_tracer));
|
|
666
639
|
|
|
667
|
-
//
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
rep->persistent_cache_key_prefix_size),
|
|
672
|
-
rep->ioptions.statistics);
|
|
640
|
+
// For fully portable/stable cache keys, we need to read the properties
|
|
641
|
+
// block before setting up cache keys. TODO: consider setting up a bootstrap
|
|
642
|
+
// cache key for PersistentCache to use for metaindex and properties blocks.
|
|
643
|
+
rep->persistent_cache_options = PersistentCacheOptions();
|
|
673
644
|
|
|
674
645
|
// Meta-blocks are not dictionary compressed. Explicitly set the dictionary
|
|
675
646
|
// handle to null, otherwise it may be seen as uninitialized during the below
|
|
@@ -677,6 +648,8 @@ Status BlockBasedTable::Open(
|
|
|
677
648
|
rep->compression_dict_handle = BlockHandle::NullBlockHandle();
|
|
678
649
|
|
|
679
650
|
// Read metaindex
|
|
651
|
+
std::unique_ptr<BlockBasedTable> new_table(
|
|
652
|
+
new BlockBasedTable(rep, block_cache_tracer));
|
|
680
653
|
std::unique_ptr<Block> metaindex;
|
|
681
654
|
std::unique_ptr<InternalIterator> metaindex_iter;
|
|
682
655
|
s = new_table->ReadMetaIndexBlock(ro, prefetch_buffer.get(), &metaindex,
|
|
@@ -692,6 +665,39 @@ Status BlockBasedTable::Open(
|
|
|
692
665
|
if (!s.ok()) {
|
|
693
666
|
return s;
|
|
694
667
|
}
|
|
668
|
+
if (!PrefixExtractorChangedHelper(rep->table_properties.get(),
|
|
669
|
+
prefix_extractor.get())) {
|
|
670
|
+
// Establish fast path for unchanged prefix_extractor
|
|
671
|
+
rep->table_prefix_extractor = prefix_extractor;
|
|
672
|
+
} else {
|
|
673
|
+
// Current prefix_extractor doesn't match table
|
|
674
|
+
#ifndef ROCKSDB_LITE
|
|
675
|
+
if (rep->table_properties) {
|
|
676
|
+
//**TODO: If/When the DBOptions has a registry in it, the ConfigOptions
|
|
677
|
+
// will need to use it
|
|
678
|
+
ConfigOptions config_options;
|
|
679
|
+
Status st = SliceTransform::CreateFromString(
|
|
680
|
+
config_options, rep->table_properties->prefix_extractor_name,
|
|
681
|
+
&(rep->table_prefix_extractor));
|
|
682
|
+
if (!st.ok()) {
|
|
683
|
+
//**TODO: Should this be error be returned or swallowed?
|
|
684
|
+
ROCKS_LOG_ERROR(rep->ioptions.logger,
|
|
685
|
+
"Failed to create prefix extractor[%s]: %s",
|
|
686
|
+
rep->table_properties->prefix_extractor_name.c_str(),
|
|
687
|
+
st.ToString().c_str());
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
#endif // ROCKSDB_LITE
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
// With properties loaded, we can set up portable/stable cache keys
|
|
694
|
+
SetupBaseCacheKey(rep->table_properties.get(), cur_db_session_id,
|
|
695
|
+
cur_file_num, file_size, &rep->base_cache_key);
|
|
696
|
+
|
|
697
|
+
rep->persistent_cache_options =
|
|
698
|
+
PersistentCacheOptions(rep->table_options.persistent_cache,
|
|
699
|
+
rep->base_cache_key, rep->ioptions.stats);
|
|
700
|
+
|
|
695
701
|
s = new_table->ReadRangeDelBlock(ro, prefetch_buffer.get(),
|
|
696
702
|
metaindex_iter.get(), internal_comparator,
|
|
697
703
|
&lookup_context);
|
|
@@ -711,10 +717,22 @@ Status BlockBasedTable::Open(
|
|
|
711
717
|
tail_prefetch_stats->RecordEffectiveSize(
|
|
712
718
|
static_cast<size_t>(file_size) - prefetch_buffer->min_offset_read());
|
|
713
719
|
}
|
|
720
|
+
}
|
|
714
721
|
|
|
715
|
-
|
|
722
|
+
if (s.ok() && table_reader_cache_res_mgr) {
|
|
723
|
+
std::size_t mem_usage = new_table->ApproximateMemoryUsage();
|
|
724
|
+
s = table_reader_cache_res_mgr->MakeCacheReservation(
|
|
725
|
+
mem_usage, &(rep->table_reader_cache_res_handle));
|
|
726
|
+
if (s.IsIncomplete()) {
|
|
727
|
+
s = Status::MemoryLimit(
|
|
728
|
+
"Can't allocate BlockBasedTableReader due to memory limit based on "
|
|
729
|
+
"cache capacity for memory allocation");
|
|
730
|
+
}
|
|
716
731
|
}
|
|
717
732
|
|
|
733
|
+
if (s.ok()) {
|
|
734
|
+
*table_reader = std::move(new_table);
|
|
735
|
+
}
|
|
718
736
|
return s;
|
|
719
737
|
}
|
|
720
738
|
|
|
@@ -753,53 +771,23 @@ Status BlockBasedTable::PrefetchTail(
|
|
|
753
771
|
// Try file system prefetch
|
|
754
772
|
if (!file->use_direct_io() && !force_direct_prefetch) {
|
|
755
773
|
if (!file->Prefetch(prefetch_off, prefetch_len).IsNotSupported()) {
|
|
756
|
-
prefetch_buffer->reset(
|
|
757
|
-
|
|
774
|
+
prefetch_buffer->reset(new FilePrefetchBuffer(
|
|
775
|
+
0 /* readahead_size */, 0 /* max_readahead_size */,
|
|
776
|
+
false /* enable */, true /* track_min_offset */));
|
|
758
777
|
return Status::OK();
|
|
759
778
|
}
|
|
760
779
|
}
|
|
761
780
|
|
|
762
781
|
// Use `FilePrefetchBuffer`
|
|
763
|
-
prefetch_buffer->reset(
|
|
782
|
+
prefetch_buffer->reset(
|
|
783
|
+
new FilePrefetchBuffer(0 /* readahead_size */, 0 /* max_readahead_size */,
|
|
784
|
+
true /* enable */, true /* track_min_offset */));
|
|
764
785
|
IOOptions opts;
|
|
765
|
-
Status s =
|
|
786
|
+
Status s = file->PrepareIOOptions(ro, opts);
|
|
766
787
|
if (s.ok()) {
|
|
767
|
-
s = (*prefetch_buffer)
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
}
|
|
771
|
-
|
|
772
|
-
Status BlockBasedTable::TryReadPropertiesWithGlobalSeqno(
|
|
773
|
-
const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
|
|
774
|
-
const Slice& handle_value, TableProperties** table_properties) {
|
|
775
|
-
assert(table_properties != nullptr);
|
|
776
|
-
// If this is an external SST file ingested with write_global_seqno set to
|
|
777
|
-
// true, then we expect the checksum mismatch because checksum was written
|
|
778
|
-
// by SstFileWriter, but its global seqno in the properties block may have
|
|
779
|
-
// been changed during ingestion. In this case, we read the properties
|
|
780
|
-
// block, copy it to a memory buffer, change the global seqno to its
|
|
781
|
-
// original value, i.e. 0, and verify the checksum again.
|
|
782
|
-
BlockHandle props_block_handle;
|
|
783
|
-
CacheAllocationPtr tmp_buf;
|
|
784
|
-
Status s = ReadProperties(ro, handle_value, rep_->file.get(), prefetch_buffer,
|
|
785
|
-
rep_->footer, rep_->ioptions, table_properties,
|
|
786
|
-
false /* verify_checksum */, &props_block_handle,
|
|
787
|
-
&tmp_buf, false /* compression_type_missing */,
|
|
788
|
-
nullptr /* memory_allocator */);
|
|
789
|
-
if (s.ok() && tmp_buf) {
|
|
790
|
-
const auto seqno_pos_iter =
|
|
791
|
-
(*table_properties)
|
|
792
|
-
->properties_offsets.find(
|
|
793
|
-
ExternalSstFilePropertyNames::kGlobalSeqno);
|
|
794
|
-
size_t block_size = static_cast<size_t>(props_block_handle.size());
|
|
795
|
-
if (seqno_pos_iter != (*table_properties)->properties_offsets.end()) {
|
|
796
|
-
uint64_t global_seqno_offset = seqno_pos_iter->second;
|
|
797
|
-
EncodeFixed64(
|
|
798
|
-
tmp_buf.get() + global_seqno_offset - props_block_handle.offset(), 0);
|
|
799
|
-
}
|
|
800
|
-
s = ROCKSDB_NAMESPACE::VerifyBlockChecksum(
|
|
801
|
-
rep_->footer.checksum(), tmp_buf.get(), block_size,
|
|
802
|
-
rep_->file->file_name(), props_block_handle.offset());
|
|
788
|
+
s = (*prefetch_buffer)
|
|
789
|
+
->Prefetch(opts, file, prefetch_off, prefetch_len,
|
|
790
|
+
ro.rate_limiter_priority);
|
|
803
791
|
}
|
|
804
792
|
return s;
|
|
805
793
|
}
|
|
@@ -807,45 +795,32 @@ Status BlockBasedTable::TryReadPropertiesWithGlobalSeqno(
|
|
|
807
795
|
Status BlockBasedTable::ReadPropertiesBlock(
|
|
808
796
|
const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
|
|
809
797
|
InternalIterator* meta_iter, const SequenceNumber largest_seqno) {
|
|
810
|
-
bool found_properties_block = true;
|
|
811
798
|
Status s;
|
|
812
|
-
|
|
799
|
+
BlockHandle handle;
|
|
800
|
+
s = FindOptionalMetaBlock(meta_iter, kPropertiesBlockName, &handle);
|
|
813
801
|
|
|
814
802
|
if (!s.ok()) {
|
|
815
|
-
ROCKS_LOG_WARN(rep_->ioptions.
|
|
803
|
+
ROCKS_LOG_WARN(rep_->ioptions.logger,
|
|
816
804
|
"Error when seeking to properties block from file: %s",
|
|
817
805
|
s.ToString().c_str());
|
|
818
|
-
} else if (
|
|
806
|
+
} else if (!handle.IsNull()) {
|
|
819
807
|
s = meta_iter->status();
|
|
820
|
-
TableProperties
|
|
808
|
+
std::unique_ptr<TableProperties> table_properties;
|
|
821
809
|
if (s.ok()) {
|
|
822
|
-
s =
|
|
823
|
-
ro,
|
|
824
|
-
rep_->
|
|
825
|
-
true /* verify_checksum */, nullptr /* ret_block_handle */,
|
|
826
|
-
nullptr /* ret_block_contents */,
|
|
827
|
-
false /* compression_type_missing */, nullptr /* memory_allocator */);
|
|
810
|
+
s = ReadTablePropertiesHelper(
|
|
811
|
+
ro, handle, rep_->file.get(), prefetch_buffer, rep_->footer,
|
|
812
|
+
rep_->ioptions, &table_properties, nullptr /* memory_allocator */);
|
|
828
813
|
}
|
|
829
814
|
IGNORE_STATUS_IF_ERROR(s);
|
|
830
815
|
|
|
831
|
-
if (s.IsCorruption()) {
|
|
832
|
-
s = TryReadPropertiesWithGlobalSeqno(
|
|
833
|
-
ro, prefetch_buffer, meta_iter->value(), &table_properties);
|
|
834
|
-
IGNORE_STATUS_IF_ERROR(s);
|
|
835
|
-
}
|
|
836
|
-
std::unique_ptr<TableProperties> props_guard;
|
|
837
|
-
if (table_properties != nullptr) {
|
|
838
|
-
props_guard.reset(table_properties);
|
|
839
|
-
}
|
|
840
|
-
|
|
841
816
|
if (!s.ok()) {
|
|
842
|
-
ROCKS_LOG_WARN(rep_->ioptions.
|
|
817
|
+
ROCKS_LOG_WARN(rep_->ioptions.logger,
|
|
843
818
|
"Encountered error while reading data from properties "
|
|
844
819
|
"block %s",
|
|
845
820
|
s.ToString().c_str());
|
|
846
821
|
} else {
|
|
847
822
|
assert(table_properties != nullptr);
|
|
848
|
-
rep_->table_properties
|
|
823
|
+
rep_->table_properties = std::move(table_properties);
|
|
849
824
|
rep_->blocks_maybe_compressed =
|
|
850
825
|
rep_->table_properties->compression_name !=
|
|
851
826
|
CompressionTypeToString(kNoCompression);
|
|
@@ -856,26 +831,19 @@ Status BlockBasedTable::ReadPropertiesBlock(
|
|
|
856
831
|
CompressionTypeToString(kZSTDNotFinalCompression));
|
|
857
832
|
}
|
|
858
833
|
} else {
|
|
859
|
-
ROCKS_LOG_ERROR(rep_->ioptions.
|
|
834
|
+
ROCKS_LOG_ERROR(rep_->ioptions.logger,
|
|
860
835
|
"Cannot find Properties block from file.");
|
|
861
836
|
}
|
|
862
|
-
#ifndef ROCKSDB_LITE
|
|
863
|
-
if (rep_->table_properties) {
|
|
864
|
-
ParseSliceTransform(rep_->table_properties->prefix_extractor_name,
|
|
865
|
-
&(rep_->table_prefix_extractor));
|
|
866
|
-
}
|
|
867
|
-
#endif // ROCKSDB_LITE
|
|
868
837
|
|
|
869
838
|
// Read the table properties, if provided.
|
|
870
839
|
if (rep_->table_properties) {
|
|
871
840
|
rep_->whole_key_filtering &=
|
|
872
841
|
IsFeatureSupported(*(rep_->table_properties),
|
|
873
842
|
BlockBasedTablePropertyNames::kWholeKeyFiltering,
|
|
874
|
-
rep_->ioptions.
|
|
875
|
-
rep_->prefix_filtering &=
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
rep_->ioptions.info_log);
|
|
843
|
+
rep_->ioptions.logger);
|
|
844
|
+
rep_->prefix_filtering &= IsFeatureSupported(
|
|
845
|
+
*(rep_->table_properties),
|
|
846
|
+
BlockBasedTablePropertyNames::kPrefixFiltering, rep_->ioptions.logger);
|
|
879
847
|
|
|
880
848
|
rep_->index_key_includes_seq =
|
|
881
849
|
rep_->table_properties->index_key_is_user_key == 0;
|
|
@@ -898,7 +866,7 @@ Status BlockBasedTable::ReadPropertiesBlock(
|
|
|
898
866
|
s = GetGlobalSequenceNumber(*(rep_->table_properties), largest_seqno,
|
|
899
867
|
&(rep_->global_seqno));
|
|
900
868
|
if (!s.ok()) {
|
|
901
|
-
ROCKS_LOG_ERROR(rep_->ioptions.
|
|
869
|
+
ROCKS_LOG_ERROR(rep_->ioptions.logger, "%s", s.ToString().c_str());
|
|
902
870
|
}
|
|
903
871
|
}
|
|
904
872
|
return s;
|
|
@@ -910,15 +878,14 @@ Status BlockBasedTable::ReadRangeDelBlock(
|
|
|
910
878
|
const InternalKeyComparator& internal_comparator,
|
|
911
879
|
BlockCacheLookupContext* lookup_context) {
|
|
912
880
|
Status s;
|
|
913
|
-
bool found_range_del_block;
|
|
914
881
|
BlockHandle range_del_handle;
|
|
915
|
-
s =
|
|
882
|
+
s = FindOptionalMetaBlock(meta_iter, kRangeDelBlockName, &range_del_handle);
|
|
916
883
|
if (!s.ok()) {
|
|
917
884
|
ROCKS_LOG_WARN(
|
|
918
|
-
rep_->ioptions.
|
|
885
|
+
rep_->ioptions.logger,
|
|
919
886
|
"Error when seeking to range delete tombstones block from file: %s",
|
|
920
887
|
s.ToString().c_str());
|
|
921
|
-
} else if (
|
|
888
|
+
} else if (!range_del_handle.IsNull()) {
|
|
922
889
|
std::unique_ptr<InternalIterator> iter(NewDataBlockIterator<DataBlockIter>(
|
|
923
890
|
read_options, range_del_handle,
|
|
924
891
|
/*input_iter=*/nullptr, BlockType::kRangeDeletion,
|
|
@@ -927,7 +894,7 @@ Status BlockBasedTable::ReadRangeDelBlock(
|
|
|
927
894
|
s = iter->status();
|
|
928
895
|
if (!s.ok()) {
|
|
929
896
|
ROCKS_LOG_WARN(
|
|
930
|
-
rep_->ioptions.
|
|
897
|
+
rep_->ioptions.logger,
|
|
931
898
|
"Encountered error while reading data from range del block %s",
|
|
932
899
|
s.ToString().c_str());
|
|
933
900
|
IGNORE_STATUS_IF_ERROR(s);
|
|
@@ -946,33 +913,59 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
|
|
|
946
913
|
const BlockBasedTableOptions& table_options, const int level,
|
|
947
914
|
size_t file_size, size_t max_file_size_for_l0_meta_pin,
|
|
948
915
|
BlockCacheLookupContext* lookup_context) {
|
|
949
|
-
Status s;
|
|
950
|
-
|
|
951
916
|
// Find filter handle and filter type
|
|
952
917
|
if (rep_->filter_policy) {
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
918
|
+
auto name = rep_->filter_policy->CompatibilityName();
|
|
919
|
+
bool builtin_compatible =
|
|
920
|
+
strcmp(name, BuiltinFilterPolicy::kCompatibilityName()) == 0;
|
|
921
|
+
|
|
922
|
+
for (const auto& [filter_type, prefix] :
|
|
923
|
+
{std::make_pair(Rep::FilterType::kFullFilter, kFullFilterBlockPrefix),
|
|
924
|
+
std::make_pair(Rep::FilterType::kPartitionedFilter,
|
|
925
|
+
kPartitionedFilterBlockPrefix),
|
|
926
|
+
std::make_pair(Rep::FilterType::kBlockFilter, kFilterBlockPrefix)}) {
|
|
927
|
+
if (builtin_compatible) {
|
|
928
|
+
// This code is only here to deal with a hiccup in early 7.0.x where
|
|
929
|
+
// there was an unintentional name change in the SST files metadata.
|
|
930
|
+
// It should be OK to remove this in the future (late 2022) and just
|
|
931
|
+
// have the 'else' code.
|
|
932
|
+
// NOTE: the test:: names below are likely not needed but included
|
|
933
|
+
// out of caution
|
|
934
|
+
static const std::unordered_set<std::string> kBuiltinNameAndAliases = {
|
|
935
|
+
BuiltinFilterPolicy::kCompatibilityName(),
|
|
936
|
+
test::LegacyBloomFilterPolicy::kClassName(),
|
|
937
|
+
test::FastLocalBloomFilterPolicy::kClassName(),
|
|
938
|
+
test::Standard128RibbonFilterPolicy::kClassName(),
|
|
939
|
+
DeprecatedBlockBasedBloomFilterPolicy::kClassName(),
|
|
940
|
+
BloomFilterPolicy::kClassName(),
|
|
941
|
+
RibbonFilterPolicy::kClassName(),
|
|
942
|
+
};
|
|
943
|
+
|
|
944
|
+
// For efficiency, do a prefix seek and see if the first match is
|
|
945
|
+
// good.
|
|
946
|
+
meta_iter->Seek(prefix);
|
|
947
|
+
if (meta_iter->status().ok() && meta_iter->Valid()) {
|
|
948
|
+
Slice key = meta_iter->key();
|
|
949
|
+
if (key.starts_with(prefix)) {
|
|
950
|
+
key.remove_prefix(prefix.size());
|
|
951
|
+
if (kBuiltinNameAndAliases.find(key.ToString()) !=
|
|
952
|
+
kBuiltinNameAndAliases.end()) {
|
|
953
|
+
Slice v = meta_iter->value();
|
|
954
|
+
Status s = rep_->filter_handle.DecodeFrom(&v);
|
|
955
|
+
if (s.ok()) {
|
|
956
|
+
rep_->filter_type = filter_type;
|
|
957
|
+
break;
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
} else {
|
|
963
|
+
std::string filter_block_key = prefix + name;
|
|
964
|
+
if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle)
|
|
965
|
+
.ok()) {
|
|
966
|
+
rep_->filter_type = filter_type;
|
|
966
967
|
break;
|
|
967
|
-
|
|
968
|
-
assert(0);
|
|
969
|
-
}
|
|
970
|
-
std::string filter_block_key = prefix;
|
|
971
|
-
filter_block_key.append(rep_->filter_policy->Name());
|
|
972
|
-
if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle)
|
|
973
|
-
.ok()) {
|
|
974
|
-
rep_->filter_type = filter_type;
|
|
975
|
-
break;
|
|
968
|
+
}
|
|
976
969
|
}
|
|
977
970
|
}
|
|
978
971
|
}
|
|
@@ -981,9 +974,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
|
|
|
981
974
|
rep_->index_type == BlockBasedTableOptions::kTwoLevelIndexSearch);
|
|
982
975
|
|
|
983
976
|
// Find compression dictionary handle
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
&rep_->compression_dict_handle);
|
|
977
|
+
Status s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlockName,
|
|
978
|
+
&rep_->compression_dict_handle);
|
|
987
979
|
if (!s.ok()) {
|
|
988
980
|
return s;
|
|
989
981
|
}
|
|
@@ -1038,6 +1030,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
|
|
|
1038
1030
|
? pin_top_level_index
|
|
1039
1031
|
: pin_unpartitioned;
|
|
1040
1032
|
// prefetch the first level of index
|
|
1033
|
+
// WART: this might be redundant (unnecessary cache hit) if !pin_index,
|
|
1034
|
+
// depending on prepopulate_block_cache option
|
|
1041
1035
|
const bool prefetch_index = prefetch_all || pin_index;
|
|
1042
1036
|
|
|
1043
1037
|
std::unique_ptr<IndexReader> index_reader;
|
|
@@ -1066,6 +1060,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
|
|
|
1066
1060
|
? pin_top_level_index
|
|
1067
1061
|
: pin_unpartitioned;
|
|
1068
1062
|
// prefetch the first level of filter
|
|
1063
|
+
// WART: this might be redundant (unnecessary cache hit) if !pin_filter,
|
|
1064
|
+
// depending on prepopulate_block_cache option
|
|
1069
1065
|
const bool prefetch_filter = prefetch_all || pin_filter;
|
|
1070
1066
|
|
|
1071
1067
|
if (rep_->filter_policy) {
|
|
@@ -1126,6 +1122,11 @@ std::shared_ptr<const TableProperties> BlockBasedTable::GetTableProperties()
|
|
|
1126
1122
|
|
|
1127
1123
|
size_t BlockBasedTable::ApproximateMemoryUsage() const {
|
|
1128
1124
|
size_t usage = 0;
|
|
1125
|
+
if (rep_) {
|
|
1126
|
+
usage += rep_->ApproximateMemoryUsage();
|
|
1127
|
+
} else {
|
|
1128
|
+
return usage;
|
|
1129
|
+
}
|
|
1129
1130
|
if (rep_->filter) {
|
|
1130
1131
|
usage += rep_->filter->ApproximateMemoryUsage();
|
|
1131
1132
|
}
|
|
@@ -1135,6 +1136,9 @@ size_t BlockBasedTable::ApproximateMemoryUsage() const {
|
|
|
1135
1136
|
if (rep_->uncompression_dict_reader) {
|
|
1136
1137
|
usage += rep_->uncompression_dict_reader->ApproximateMemoryUsage();
|
|
1137
1138
|
}
|
|
1139
|
+
if (rep_->table_properties) {
|
|
1140
|
+
usage += rep_->table_properties->ApproximateMemoryUsage();
|
|
1141
|
+
}
|
|
1138
1142
|
return usage;
|
|
1139
1143
|
}
|
|
1140
1144
|
|
|
@@ -1158,7 +1162,7 @@ Status BlockBasedTable::ReadMetaIndexBlock(
|
|
|
1158
1162
|
nullptr /* filter_policy */);
|
|
1159
1163
|
|
|
1160
1164
|
if (!s.ok()) {
|
|
1161
|
-
ROCKS_LOG_ERROR(rep_->ioptions.
|
|
1165
|
+
ROCKS_LOG_ERROR(rep_->ioptions.logger,
|
|
1162
1166
|
"Encountered error while reading data from properties"
|
|
1163
1167
|
" block %s",
|
|
1164
1168
|
s.ToString().c_str());
|
|
@@ -1167,33 +1171,48 @@ Status BlockBasedTable::ReadMetaIndexBlock(
|
|
|
1167
1171
|
|
|
1168
1172
|
*metaindex_block = std::move(metaindex);
|
|
1169
1173
|
// meta block uses bytewise comparator.
|
|
1170
|
-
iter->reset(metaindex_block->get()->
|
|
1171
|
-
BytewiseComparator(), kDisableGlobalSequenceNumber));
|
|
1174
|
+
iter->reset(metaindex_block->get()->NewMetaIterator());
|
|
1172
1175
|
return Status::OK();
|
|
1173
1176
|
}
|
|
1174
1177
|
|
|
1175
1178
|
template <typename TBlocklike>
|
|
1176
1179
|
Status BlockBasedTable::GetDataBlockFromCache(
|
|
1177
|
-
const Slice&
|
|
1178
|
-
Cache* block_cache, Cache* block_cache_compressed,
|
|
1180
|
+
const Slice& cache_key, Cache* block_cache, Cache* block_cache_compressed,
|
|
1179
1181
|
const ReadOptions& read_options, CachableEntry<TBlocklike>* block,
|
|
1180
1182
|
const UncompressionDict& uncompression_dict, BlockType block_type,
|
|
1181
|
-
GetContext* get_context) const {
|
|
1183
|
+
const bool wait, GetContext* get_context) const {
|
|
1182
1184
|
const size_t read_amp_bytes_per_bit =
|
|
1183
1185
|
block_type == BlockType::kData
|
|
1184
1186
|
? rep_->table_options.read_amp_bytes_per_bit
|
|
1185
1187
|
: 0;
|
|
1186
1188
|
assert(block);
|
|
1187
1189
|
assert(block->IsEmpty());
|
|
1190
|
+
const Cache::Priority priority =
|
|
1191
|
+
rep_->table_options.cache_index_and_filter_blocks_with_high_priority &&
|
|
1192
|
+
(block_type == BlockType::kFilter ||
|
|
1193
|
+
block_type == BlockType::kCompressionDictionary ||
|
|
1194
|
+
block_type == BlockType::kIndex)
|
|
1195
|
+
? Cache::Priority::HIGH
|
|
1196
|
+
: Cache::Priority::LOW;
|
|
1188
1197
|
|
|
1189
1198
|
Status s;
|
|
1190
1199
|
BlockContents* compressed_block = nullptr;
|
|
1191
1200
|
Cache::Handle* block_cache_compressed_handle = nullptr;
|
|
1201
|
+
Statistics* statistics = rep_->ioptions.statistics.get();
|
|
1202
|
+
bool using_zstd = rep_->blocks_definitely_zstd_compressed;
|
|
1203
|
+
const FilterPolicy* filter_policy = rep_->filter_policy;
|
|
1204
|
+
Cache::CreateCallback create_cb = GetCreateCallback<TBlocklike>(
|
|
1205
|
+
read_amp_bytes_per_bit, statistics, using_zstd, filter_policy);
|
|
1192
1206
|
|
|
1193
1207
|
// Lookup uncompressed cache first
|
|
1194
1208
|
if (block_cache != nullptr) {
|
|
1195
|
-
|
|
1196
|
-
|
|
1209
|
+
assert(!cache_key.empty());
|
|
1210
|
+
Cache::Handle* cache_handle = nullptr;
|
|
1211
|
+
cache_handle = GetEntryFromCache(
|
|
1212
|
+
rep_->ioptions.lowest_used_cache_tier, block_cache, cache_key,
|
|
1213
|
+
block_type, wait, get_context,
|
|
1214
|
+
BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type), create_cb,
|
|
1215
|
+
priority);
|
|
1197
1216
|
if (cache_handle != nullptr) {
|
|
1198
1217
|
block->SetCachedValue(
|
|
1199
1218
|
reinterpret_cast<TBlocklike*>(block_cache->Value(cache_handle)),
|
|
@@ -1209,11 +1228,20 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
|
|
1209
1228
|
return s;
|
|
1210
1229
|
}
|
|
1211
1230
|
|
|
1212
|
-
assert(!
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1231
|
+
assert(!cache_key.empty());
|
|
1232
|
+
BlockContents contents;
|
|
1233
|
+
if (rep_->ioptions.lowest_used_cache_tier ==
|
|
1234
|
+
CacheTier::kNonVolatileBlockTier) {
|
|
1235
|
+
Cache::CreateCallback create_cb_special = GetCreateCallback<BlockContents>(
|
|
1236
|
+
read_amp_bytes_per_bit, statistics, using_zstd, filter_policy);
|
|
1237
|
+
block_cache_compressed_handle = block_cache_compressed->Lookup(
|
|
1238
|
+
cache_key,
|
|
1239
|
+
BlocklikeTraits<BlockContents>::GetCacheItemHelper(block_type),
|
|
1240
|
+
create_cb_special, priority, true);
|
|
1241
|
+
} else {
|
|
1242
|
+
block_cache_compressed_handle =
|
|
1243
|
+
block_cache_compressed->Lookup(cache_key, statistics);
|
|
1244
|
+
}
|
|
1217
1245
|
|
|
1218
1246
|
// if we found in the compressed cache, then uncompress and insert into
|
|
1219
1247
|
// uncompressed cache
|
|
@@ -1226,11 +1254,10 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
|
|
1226
1254
|
RecordTick(statistics, BLOCK_CACHE_COMPRESSED_HIT);
|
|
1227
1255
|
compressed_block = reinterpret_cast<BlockContents*>(
|
|
1228
1256
|
block_cache_compressed->Value(block_cache_compressed_handle));
|
|
1229
|
-
CompressionType compression_type = compressed_block
|
|
1257
|
+
CompressionType compression_type = GetBlockCompressionType(*compressed_block);
|
|
1230
1258
|
assert(compression_type != kNoCompression);
|
|
1231
1259
|
|
|
1232
1260
|
// Retrieve the uncompressed contents into a new buffer
|
|
1233
|
-
BlockContents contents;
|
|
1234
1261
|
UncompressionContext context(compression_type);
|
|
1235
1262
|
UncompressionInfo info(context, uncompression_dict, compression_type);
|
|
1236
1263
|
s = UncompressBlockContents(
|
|
@@ -1238,7 +1265,8 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
|
|
1238
1265
|
&contents, rep_->table_options.format_version, rep_->ioptions,
|
|
1239
1266
|
GetMemoryAllocator(rep_->table_options));
|
|
1240
1267
|
|
|
1241
|
-
// Insert uncompressed block into block cache
|
|
1268
|
+
// Insert uncompressed block into block cache, the priority is based on the
|
|
1269
|
+
// data block type.
|
|
1242
1270
|
if (s.ok()) {
|
|
1243
1271
|
std::unique_ptr<TBlocklike> block_holder(
|
|
1244
1272
|
BlocklikeTraits<TBlocklike>::Create(
|
|
@@ -1250,15 +1278,17 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
|
|
1250
1278
|
read_options.fill_cache) {
|
|
1251
1279
|
size_t charge = block_holder->ApproximateMemoryUsage();
|
|
1252
1280
|
Cache::Handle* cache_handle = nullptr;
|
|
1253
|
-
s =
|
|
1254
|
-
|
|
1281
|
+
s = InsertEntryToCache(
|
|
1282
|
+
rep_->ioptions.lowest_used_cache_tier, block_cache, cache_key,
|
|
1283
|
+
BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type),
|
|
1284
|
+
block_holder, charge, &cache_handle, priority);
|
|
1255
1285
|
if (s.ok()) {
|
|
1256
1286
|
assert(cache_handle != nullptr);
|
|
1257
1287
|
block->SetCachedValue(block_holder.release(), block_cache,
|
|
1258
1288
|
cache_handle);
|
|
1259
1289
|
|
|
1260
1290
|
UpdateCacheInsertionMetrics(block_type, get_context, charge,
|
|
1261
|
-
s.IsOkOverwritten());
|
|
1291
|
+
s.IsOkOverwritten(), rep_->ioptions.stats);
|
|
1262
1292
|
} else {
|
|
1263
1293
|
RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
|
|
1264
1294
|
}
|
|
@@ -1274,14 +1304,13 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
|
|
1274
1304
|
|
|
1275
1305
|
template <typename TBlocklike>
|
|
1276
1306
|
Status BlockBasedTable::PutDataBlockToCache(
|
|
1277
|
-
const Slice&
|
|
1278
|
-
Cache* block_cache, Cache* block_cache_compressed,
|
|
1307
|
+
const Slice& cache_key, Cache* block_cache, Cache* block_cache_compressed,
|
|
1279
1308
|
CachableEntry<TBlocklike>* cached_block, BlockContents* raw_block_contents,
|
|
1280
1309
|
CompressionType raw_block_comp_type,
|
|
1281
1310
|
const UncompressionDict& uncompression_dict,
|
|
1282
1311
|
MemoryAllocator* memory_allocator, BlockType block_type,
|
|
1283
1312
|
GetContext* get_context) const {
|
|
1284
|
-
const
|
|
1313
|
+
const ImmutableOptions& ioptions = rep_->ioptions;
|
|
1285
1314
|
const uint32_t format_version = rep_->table_options.format_version;
|
|
1286
1315
|
const size_t read_amp_bytes_per_bit =
|
|
1287
1316
|
block_type == BlockType::kData
|
|
@@ -1298,7 +1327,7 @@ Status BlockBasedTable::PutDataBlockToCache(
|
|
|
1298
1327
|
assert(cached_block->IsEmpty());
|
|
1299
1328
|
|
|
1300
1329
|
Status s;
|
|
1301
|
-
Statistics* statistics = ioptions.
|
|
1330
|
+
Statistics* statistics = ioptions.stats;
|
|
1302
1331
|
|
|
1303
1332
|
std::unique_ptr<TBlocklike> block_holder;
|
|
1304
1333
|
if (raw_block_comp_type != kNoCompression) {
|
|
@@ -1330,24 +1359,28 @@ Status BlockBasedTable::PutDataBlockToCache(
|
|
|
1330
1359
|
if (block_cache_compressed != nullptr &&
|
|
1331
1360
|
raw_block_comp_type != kNoCompression && raw_block_contents != nullptr &&
|
|
1332
1361
|
raw_block_contents->own_bytes()) {
|
|
1333
|
-
#ifndef NDEBUG
|
|
1334
1362
|
assert(raw_block_contents->is_raw_block);
|
|
1335
|
-
|
|
1363
|
+
assert(!cache_key.empty());
|
|
1336
1364
|
|
|
1337
1365
|
// We cannot directly put raw_block_contents because this could point to
|
|
1338
1366
|
// an object in the stack.
|
|
1339
|
-
BlockContents
|
|
1340
|
-
new BlockContents(std::move(*raw_block_contents));
|
|
1341
|
-
s =
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1367
|
+
std::unique_ptr<BlockContents> block_cont_for_comp_cache(
|
|
1368
|
+
new BlockContents(std::move(*raw_block_contents)));
|
|
1369
|
+
s = InsertEntryToCache(
|
|
1370
|
+
rep_->ioptions.lowest_used_cache_tier, block_cache_compressed,
|
|
1371
|
+
cache_key,
|
|
1372
|
+
BlocklikeTraits<BlockContents>::GetCacheItemHelper(block_type),
|
|
1373
|
+
block_cont_for_comp_cache,
|
|
1374
|
+
block_cont_for_comp_cache->ApproximateMemoryUsage(), nullptr,
|
|
1375
|
+
Cache::Priority::LOW);
|
|
1376
|
+
|
|
1377
|
+
BlockContents* block_cont_raw_ptr = block_cont_for_comp_cache.release();
|
|
1345
1378
|
if (s.ok()) {
|
|
1346
1379
|
// Avoid the following code to delete this cached block.
|
|
1347
1380
|
RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD);
|
|
1348
1381
|
} else {
|
|
1349
1382
|
RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
|
|
1350
|
-
delete
|
|
1383
|
+
delete block_cont_raw_ptr;
|
|
1351
1384
|
}
|
|
1352
1385
|
}
|
|
1353
1386
|
|
|
@@ -1355,16 +1388,17 @@ Status BlockBasedTable::PutDataBlockToCache(
|
|
|
1355
1388
|
if (block_cache != nullptr && block_holder->own_bytes()) {
|
|
1356
1389
|
size_t charge = block_holder->ApproximateMemoryUsage();
|
|
1357
1390
|
Cache::Handle* cache_handle = nullptr;
|
|
1358
|
-
s =
|
|
1359
|
-
|
|
1360
|
-
|
|
1391
|
+
s = InsertEntryToCache(
|
|
1392
|
+
rep_->ioptions.lowest_used_cache_tier, block_cache, cache_key,
|
|
1393
|
+
BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type),
|
|
1394
|
+
block_holder, charge, &cache_handle, priority);
|
|
1361
1395
|
if (s.ok()) {
|
|
1362
1396
|
assert(cache_handle != nullptr);
|
|
1363
1397
|
cached_block->SetCachedValue(block_holder.release(), block_cache,
|
|
1364
1398
|
cache_handle);
|
|
1365
1399
|
|
|
1366
1400
|
UpdateCacheInsertionMetrics(block_type, get_context, charge,
|
|
1367
|
-
s.IsOkOverwritten());
|
|
1401
|
+
s.IsOkOverwritten(), rep_->ioptions.stats);
|
|
1368
1402
|
} else {
|
|
1369
1403
|
RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
|
|
1370
1404
|
}
|
|
@@ -1429,8 +1463,7 @@ DataBlockIter* BlockBasedTable::InitBlockIterator<DataBlockIter>(
|
|
|
1429
1463
|
DataBlockIter* input_iter, bool block_contents_pinned) {
|
|
1430
1464
|
return block->NewDataIterator(rep->internal_comparator.user_comparator(),
|
|
1431
1465
|
rep->get_global_seqno(block_type), input_iter,
|
|
1432
|
-
rep->ioptions.
|
|
1433
|
-
block_contents_pinned);
|
|
1466
|
+
rep->ioptions.stats, block_contents_pinned);
|
|
1434
1467
|
}
|
|
1435
1468
|
|
|
1436
1469
|
template <>
|
|
@@ -1439,7 +1472,7 @@ IndexBlockIter* BlockBasedTable::InitBlockIterator<IndexBlockIter>(
|
|
|
1439
1472
|
IndexBlockIter* input_iter, bool block_contents_pinned) {
|
|
1440
1473
|
return block->NewIndexIterator(
|
|
1441
1474
|
rep->internal_comparator.user_comparator(),
|
|
1442
|
-
rep->get_global_seqno(block_type), input_iter, rep->ioptions.
|
|
1475
|
+
rep->get_global_seqno(block_type), input_iter, rep->ioptions.stats,
|
|
1443
1476
|
/* total_order_seek */ true, rep->index_has_first_key,
|
|
1444
1477
|
rep->index_key_includes_seq, rep->index_value_is_full,
|
|
1445
1478
|
block_contents_pinned);
|
|
@@ -1454,6 +1487,7 @@ template <typename TBlocklike>
|
|
|
1454
1487
|
Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|
1455
1488
|
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
|
|
1456
1489
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
|
1490
|
+
const bool wait, const bool for_compaction,
|
|
1457
1491
|
CachableEntry<TBlocklike>* block_entry, BlockType block_type,
|
|
1458
1492
|
GetContext* get_context, BlockCacheLookupContext* lookup_context,
|
|
1459
1493
|
BlockContents* contents) const {
|
|
@@ -1467,39 +1501,40 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|
|
1467
1501
|
//
|
|
1468
1502
|
// If either block cache is enabled, we'll try to read from it.
|
|
1469
1503
|
Status s;
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
Slice key /* key to the block cache */;
|
|
1473
|
-
Slice ckey /* key to the compressed block cache */;
|
|
1504
|
+
CacheKey key_data;
|
|
1505
|
+
Slice key;
|
|
1474
1506
|
bool is_cache_hit = false;
|
|
1475
1507
|
if (block_cache != nullptr || block_cache_compressed != nullptr) {
|
|
1476
1508
|
// create key for block cache
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
handle, cache_key);
|
|
1480
|
-
}
|
|
1481
|
-
|
|
1482
|
-
if (block_cache_compressed != nullptr) {
|
|
1483
|
-
ckey = GetCacheKey(rep_->compressed_cache_key_prefix,
|
|
1484
|
-
rep_->compressed_cache_key_prefix_size, handle,
|
|
1485
|
-
compressed_cache_key);
|
|
1486
|
-
}
|
|
1509
|
+
key_data = GetCacheKey(rep_->base_cache_key, handle);
|
|
1510
|
+
key = key_data.AsSlice();
|
|
1487
1511
|
|
|
1488
1512
|
if (!contents) {
|
|
1489
|
-
s = GetDataBlockFromCache(key,
|
|
1490
|
-
|
|
1491
|
-
get_context);
|
|
1492
|
-
|
|
1513
|
+
s = GetDataBlockFromCache(key, block_cache, block_cache_compressed, ro,
|
|
1514
|
+
block_entry, uncompression_dict, block_type,
|
|
1515
|
+
wait, get_context);
|
|
1516
|
+
// Value could still be null at this point, so check the cache handle
|
|
1517
|
+
// and update the read pattern for prefetching
|
|
1518
|
+
if (block_entry->GetValue() || block_entry->GetCacheHandle()) {
|
|
1493
1519
|
// TODO(haoyu): Differentiate cache hit on uncompressed block cache and
|
|
1494
1520
|
// compressed block cache.
|
|
1495
1521
|
is_cache_hit = true;
|
|
1522
|
+
if (prefetch_buffer) {
|
|
1523
|
+
// Update the block details so that PrefetchBuffer can use the read
|
|
1524
|
+
// pattern to determine if reads are sequential or not for
|
|
1525
|
+
// prefetching. It should also take in account blocks read from cache.
|
|
1526
|
+
prefetch_buffer->UpdateReadPattern(
|
|
1527
|
+
handle.offset(), BlockSizeWithTrailer(handle),
|
|
1528
|
+
ro.adaptive_readahead /*decrease_readahead_size*/);
|
|
1529
|
+
}
|
|
1496
1530
|
}
|
|
1497
1531
|
}
|
|
1498
1532
|
|
|
1499
1533
|
// Can't find the block from the cache. If I/O is allowed, read from the
|
|
1500
1534
|
// file.
|
|
1501
|
-
if (block_entry->GetValue() == nullptr &&
|
|
1502
|
-
|
|
1535
|
+
if (block_entry->GetValue() == nullptr &&
|
|
1536
|
+
block_entry->GetCacheHandle() == nullptr && !no_io && ro.fill_cache) {
|
|
1537
|
+
Statistics* statistics = rep_->ioptions.stats;
|
|
1503
1538
|
const bool maybe_compressed =
|
|
1504
1539
|
block_type != BlockType::kFilter &&
|
|
1505
1540
|
block_type != BlockType::kCompressionDictionary &&
|
|
@@ -1508,7 +1543,9 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|
|
1508
1543
|
CompressionType raw_block_comp_type;
|
|
1509
1544
|
BlockContents raw_block_contents;
|
|
1510
1545
|
if (!contents) {
|
|
1511
|
-
|
|
1546
|
+
Histograms histogram = for_compaction ? READ_BLOCK_COMPACTION_MICROS
|
|
1547
|
+
: READ_BLOCK_GET_MICROS;
|
|
1548
|
+
StopWatch sw(rep_->ioptions.clock, statistics, histogram);
|
|
1512
1549
|
BlockFetcher block_fetcher(
|
|
1513
1550
|
rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle,
|
|
1514
1551
|
&raw_block_contents, rep_->ioptions, do_uncompress,
|
|
@@ -1535,15 +1572,15 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|
|
1535
1572
|
}
|
|
1536
1573
|
}
|
|
1537
1574
|
} else {
|
|
1538
|
-
raw_block_comp_type = contents
|
|
1575
|
+
raw_block_comp_type = GetBlockCompressionType(*contents);
|
|
1539
1576
|
}
|
|
1540
1577
|
|
|
1541
1578
|
if (s.ok()) {
|
|
1542
1579
|
// If filling cache is allowed and a cache is configured, try to put the
|
|
1543
1580
|
// block to the cache.
|
|
1544
1581
|
s = PutDataBlockToCache(
|
|
1545
|
-
key,
|
|
1546
|
-
|
|
1582
|
+
key, block_cache, block_cache_compressed, block_entry, contents,
|
|
1583
|
+
raw_block_comp_type, uncompression_dict,
|
|
1547
1584
|
GetMemoryAllocator(rep_->table_options), block_type, get_context);
|
|
1548
1585
|
}
|
|
1549
1586
|
}
|
|
@@ -1597,7 +1634,7 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|
|
1597
1634
|
// Avoid making copy of block_key and cf_name when constructing the access
|
|
1598
1635
|
// record.
|
|
1599
1636
|
BlockCacheTraceRecord access_record(
|
|
1600
|
-
rep_->ioptions.
|
|
1637
|
+
rep_->ioptions.clock->NowMicros(),
|
|
1601
1638
|
/*block_key=*/"", trace_block_type,
|
|
1602
1639
|
/*block_size=*/usage, rep_->cf_id_for_tracing(),
|
|
1603
1640
|
/*cf_name=*/"", rep_->level_for_tracing(),
|
|
@@ -1642,7 +1679,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1642
1679
|
char* scratch, const UncompressionDict& uncompression_dict) const {
|
|
1643
1680
|
RandomAccessFileReader* file = rep_->file.get();
|
|
1644
1681
|
const Footer& footer = rep_->footer;
|
|
1645
|
-
const
|
|
1682
|
+
const ImmutableOptions& ioptions = rep_->ioptions;
|
|
1646
1683
|
size_t read_amp_bytes_per_bit = rep_->table_options.read_amp_bytes_per_bit;
|
|
1647
1684
|
MemoryAllocator* memory_allocator = GetMemoryAllocator(rep_->table_options);
|
|
1648
1685
|
|
|
@@ -1661,7 +1698,8 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1661
1698
|
RetrieveBlock(nullptr, options, handle, uncompression_dict,
|
|
1662
1699
|
&(*results)[idx_in_batch], BlockType::kData,
|
|
1663
1700
|
mget_iter->get_context, &lookup_data_block_context,
|
|
1664
|
-
/* for_compaction */ false, /* use_cache */ true
|
|
1701
|
+
/* for_compaction */ false, /* use_cache */ true,
|
|
1702
|
+
/* wait_for_cache */ true);
|
|
1665
1703
|
}
|
|
1666
1704
|
return;
|
|
1667
1705
|
}
|
|
@@ -1696,7 +1734,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1696
1734
|
if (use_shared_buffer && !file->use_direct_io() &&
|
|
1697
1735
|
prev_end == handle.offset()) {
|
|
1698
1736
|
req_offset_for_block.emplace_back(prev_len);
|
|
1699
|
-
prev_len +=
|
|
1737
|
+
prev_len += BlockSizeWithTrailer(handle);
|
|
1700
1738
|
} else {
|
|
1701
1739
|
// No compression or current block and previous one is not adjacent:
|
|
1702
1740
|
// Step 1, create a new request for previous blocks
|
|
@@ -1717,10 +1755,13 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1717
1755
|
|
|
1718
1756
|
// Step 2, remeber the previous block info
|
|
1719
1757
|
prev_offset = handle.offset();
|
|
1720
|
-
prev_len =
|
|
1758
|
+
prev_len = BlockSizeWithTrailer(handle);
|
|
1721
1759
|
req_offset_for_block.emplace_back(0);
|
|
1722
1760
|
}
|
|
1723
1761
|
req_idx_for_block.emplace_back(read_reqs.size());
|
|
1762
|
+
|
|
1763
|
+
PERF_COUNTER_ADD(block_read_count, 1);
|
|
1764
|
+
PERF_COUNTER_ADD(block_read_byte, BlockSizeWithTrailer(handle));
|
|
1724
1765
|
}
|
|
1725
1766
|
// Handle the last block and process the pending last request
|
|
1726
1767
|
if (prev_len != 0) {
|
|
@@ -1740,15 +1781,17 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1740
1781
|
AlignedBuf direct_io_buf;
|
|
1741
1782
|
{
|
|
1742
1783
|
IOOptions opts;
|
|
1743
|
-
IOStatus s =
|
|
1744
|
-
if (s.
|
|
1784
|
+
IOStatus s = file->PrepareIOOptions(options, opts);
|
|
1785
|
+
if (s.ok()) {
|
|
1786
|
+
s = file->MultiRead(opts, &read_reqs[0], read_reqs.size(), &direct_io_buf,
|
|
1787
|
+
options.rate_limiter_priority);
|
|
1788
|
+
}
|
|
1789
|
+
if (!s.ok()) {
|
|
1790
|
+
// Discard all the results in this batch if there is any time out
|
|
1791
|
+
// or overall MultiRead error
|
|
1745
1792
|
for (FSReadRequest& req : read_reqs) {
|
|
1746
1793
|
req.status = s;
|
|
1747
1794
|
}
|
|
1748
|
-
} else {
|
|
1749
|
-
// How to handle this status code?
|
|
1750
|
-
file->MultiRead(opts, &read_reqs[0], read_reqs.size(), &direct_io_buf)
|
|
1751
|
-
.PermitUncheckedError();
|
|
1752
1795
|
}
|
|
1753
1796
|
}
|
|
1754
1797
|
|
|
@@ -1775,7 +1818,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1775
1818
|
Status s = req.status;
|
|
1776
1819
|
if (s.ok()) {
|
|
1777
1820
|
if ((req.result.size() != req.len) ||
|
|
1778
|
-
(req_offset +
|
|
1821
|
+
(req_offset + BlockSizeWithTrailer(handle) > req.result.size())) {
|
|
1779
1822
|
s = Status::Corruption(
|
|
1780
1823
|
"truncated block read from " + rep_->file->file_name() +
|
|
1781
1824
|
" offset " + ToString(handle.offset()) + ", expected " +
|
|
@@ -1789,7 +1832,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1789
1832
|
// We allocated a buffer for this block. Give ownership of it to
|
|
1790
1833
|
// BlockContents so it can free the memory
|
|
1791
1834
|
assert(req.result.data() == req.scratch);
|
|
1792
|
-
assert(req.result.size() ==
|
|
1835
|
+
assert(req.result.size() == BlockSizeWithTrailer(handle));
|
|
1793
1836
|
assert(req_offset == 0);
|
|
1794
1837
|
std::unique_ptr<char[]> raw_block(req.scratch);
|
|
1795
1838
|
raw_block_contents = BlockContents(std::move(raw_block), handle.size());
|
|
@@ -1812,9 +1855,9 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1812
1855
|
// begin address of each read request, we need to add the offset
|
|
1813
1856
|
// in each read request. Checksum is stored in the block trailer,
|
|
1814
1857
|
// beyond the payload size.
|
|
1815
|
-
s =
|
|
1816
|
-
|
|
1817
|
-
|
|
1858
|
+
s = VerifyBlockChecksum(footer.checksum_type(), data + req_offset,
|
|
1859
|
+
handle.size(), rep_->file->file_name(),
|
|
1860
|
+
handle.offset());
|
|
1818
1861
|
TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
|
|
1819
1862
|
}
|
|
1820
1863
|
} else if (!use_shared_buffer) {
|
|
@@ -1835,11 +1878,12 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1835
1878
|
// In all other cases, the raw block is either uncompressed into a heap
|
|
1836
1879
|
// buffer or there is no cache at all.
|
|
1837
1880
|
CompressionType compression_type =
|
|
1838
|
-
raw_block_contents
|
|
1881
|
+
GetBlockCompressionType(raw_block_contents);
|
|
1839
1882
|
if (use_shared_buffer && (compression_type == kNoCompression ||
|
|
1840
1883
|
(compression_type != kNoCompression &&
|
|
1841
1884
|
rep_->table_options.block_cache_compressed))) {
|
|
1842
|
-
Slice raw =
|
|
1885
|
+
Slice raw =
|
|
1886
|
+
Slice(req.result.data() + req_offset, BlockSizeWithTrailer(handle));
|
|
1843
1887
|
raw_block_contents = BlockContents(
|
|
1844
1888
|
CopyBufferToHeap(GetMemoryAllocator(rep_->table_options), raw),
|
|
1845
1889
|
handle.size());
|
|
@@ -1858,9 +1902,10 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1858
1902
|
// necessary. Since we're passing the raw block contents, it will
|
|
1859
1903
|
// avoid looking up the block cache
|
|
1860
1904
|
s = MaybeReadBlockAndLoadToCache(
|
|
1861
|
-
nullptr, options, handle, uncompression_dict,
|
|
1862
|
-
BlockType::kData,
|
|
1863
|
-
&lookup_data_block_context,
|
|
1905
|
+
nullptr, options, handle, uncompression_dict, /*wait=*/true,
|
|
1906
|
+
/*for_compaction=*/false, block_entry, BlockType::kData,
|
|
1907
|
+
mget_iter->get_context, &lookup_data_block_context,
|
|
1908
|
+
&raw_block_contents);
|
|
1864
1909
|
|
|
1865
1910
|
// block_entry value could be null if no block cache is present, i.e
|
|
1866
1911
|
// BlockBasedTableOptions::no_block_cache is true and no compressed
|
|
@@ -1873,14 +1918,14 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1873
1918
|
}
|
|
1874
1919
|
|
|
1875
1920
|
CompressionType compression_type =
|
|
1876
|
-
raw_block_contents
|
|
1921
|
+
GetBlockCompressionType(raw_block_contents);
|
|
1877
1922
|
BlockContents contents;
|
|
1878
1923
|
if (compression_type != kNoCompression) {
|
|
1879
1924
|
UncompressionContext context(compression_type);
|
|
1880
1925
|
UncompressionInfo info(context, uncompression_dict, compression_type);
|
|
1881
|
-
s = UncompressBlockContents(
|
|
1882
|
-
|
|
1883
|
-
|
|
1926
|
+
s = UncompressBlockContents(
|
|
1927
|
+
info, req.result.data() + req_offset, handle.size(), &contents,
|
|
1928
|
+
footer.format_version(), rep_->ioptions, memory_allocator);
|
|
1884
1929
|
} else {
|
|
1885
1930
|
// There are two cases here:
|
|
1886
1931
|
// 1) caller uses the shared buffer (scratch or direct io buffer);
|
|
@@ -1893,7 +1938,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|
|
1893
1938
|
}
|
|
1894
1939
|
if (s.ok()) {
|
|
1895
1940
|
(*results)[idx_in_batch].SetOwnedValue(new Block(
|
|
1896
|
-
std::move(contents), read_amp_bytes_per_bit, ioptions.
|
|
1941
|
+
std::move(contents), read_amp_bytes_per_bit, ioptions.stats));
|
|
1897
1942
|
}
|
|
1898
1943
|
}
|
|
1899
1944
|
(*statuses)[idx_in_batch] = s;
|
|
@@ -1906,22 +1951,23 @@ Status BlockBasedTable::RetrieveBlock(
|
|
|
1906
1951
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
|
1907
1952
|
CachableEntry<TBlocklike>* block_entry, BlockType block_type,
|
|
1908
1953
|
GetContext* get_context, BlockCacheLookupContext* lookup_context,
|
|
1909
|
-
bool for_compaction, bool use_cache) const {
|
|
1954
|
+
bool for_compaction, bool use_cache, bool wait_for_cache) const {
|
|
1910
1955
|
assert(block_entry);
|
|
1911
1956
|
assert(block_entry->IsEmpty());
|
|
1912
1957
|
|
|
1913
1958
|
Status s;
|
|
1914
1959
|
if (use_cache) {
|
|
1915
|
-
s = MaybeReadBlockAndLoadToCache(
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1960
|
+
s = MaybeReadBlockAndLoadToCache(
|
|
1961
|
+
prefetch_buffer, ro, handle, uncompression_dict, wait_for_cache,
|
|
1962
|
+
for_compaction, block_entry, block_type, get_context, lookup_context,
|
|
1963
|
+
/*contents=*/nullptr);
|
|
1919
1964
|
|
|
1920
1965
|
if (!s.ok()) {
|
|
1921
1966
|
return s;
|
|
1922
1967
|
}
|
|
1923
1968
|
|
|
1924
|
-
if (block_entry->GetValue() != nullptr
|
|
1969
|
+
if (block_entry->GetValue() != nullptr ||
|
|
1970
|
+
block_entry->GetCacheHandle() != nullptr) {
|
|
1925
1971
|
assert(s.ok());
|
|
1926
1972
|
return s;
|
|
1927
1973
|
}
|
|
@@ -1942,8 +1988,9 @@ Status BlockBasedTable::RetrieveBlock(
|
|
|
1942
1988
|
std::unique_ptr<TBlocklike> block;
|
|
1943
1989
|
|
|
1944
1990
|
{
|
|
1945
|
-
|
|
1946
|
-
|
|
1991
|
+
Histograms histogram =
|
|
1992
|
+
for_compaction ? READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS;
|
|
1993
|
+
StopWatch sw(rep_->ioptions.clock, rep_->ioptions.stats, histogram);
|
|
1947
1994
|
s = ReadBlockFromFile(
|
|
1948
1995
|
rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, &block,
|
|
1949
1996
|
rep_->ioptions, do_uncompress, maybe_compressed, block_type,
|
|
@@ -1989,32 +2036,32 @@ template Status BlockBasedTable::RetrieveBlock<BlockContents>(
|
|
|
1989
2036
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
|
1990
2037
|
CachableEntry<BlockContents>* block_entry, BlockType block_type,
|
|
1991
2038
|
GetContext* get_context, BlockCacheLookupContext* lookup_context,
|
|
1992
|
-
bool for_compaction, bool use_cache) const;
|
|
2039
|
+
bool for_compaction, bool use_cache, bool wait_for_cache) const;
|
|
1993
2040
|
|
|
1994
2041
|
template Status BlockBasedTable::RetrieveBlock<ParsedFullFilterBlock>(
|
|
1995
2042
|
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
|
|
1996
2043
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
|
1997
2044
|
CachableEntry<ParsedFullFilterBlock>* block_entry, BlockType block_type,
|
|
1998
2045
|
GetContext* get_context, BlockCacheLookupContext* lookup_context,
|
|
1999
|
-
bool for_compaction, bool use_cache) const;
|
|
2046
|
+
bool for_compaction, bool use_cache, bool wait_for_cache) const;
|
|
2000
2047
|
|
|
2001
2048
|
template Status BlockBasedTable::RetrieveBlock<Block>(
|
|
2002
2049
|
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
|
|
2003
2050
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
|
2004
2051
|
CachableEntry<Block>* block_entry, BlockType block_type,
|
|
2005
2052
|
GetContext* get_context, BlockCacheLookupContext* lookup_context,
|
|
2006
|
-
bool for_compaction, bool use_cache) const;
|
|
2053
|
+
bool for_compaction, bool use_cache, bool wait_for_cache) const;
|
|
2007
2054
|
|
|
2008
2055
|
template Status BlockBasedTable::RetrieveBlock<UncompressionDict>(
|
|
2009
2056
|
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
|
|
2010
2057
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
|
2011
2058
|
CachableEntry<UncompressionDict>* block_entry, BlockType block_type,
|
|
2012
2059
|
GetContext* get_context, BlockCacheLookupContext* lookup_context,
|
|
2013
|
-
bool for_compaction, bool use_cache) const;
|
|
2060
|
+
bool for_compaction, bool use_cache, bool wait_for_cache) const;
|
|
2014
2061
|
|
|
2015
2062
|
BlockBasedTable::PartitionedIndexIteratorState::PartitionedIndexIteratorState(
|
|
2016
2063
|
const BlockBasedTable* table,
|
|
2017
|
-
|
|
2064
|
+
UnorderedMap<uint64_t, CachableEntry<Block>>* block_map)
|
|
2018
2065
|
: table_(table), block_map_(block_map) {}
|
|
2019
2066
|
|
|
2020
2067
|
InternalIteratorBase<IndexValue>*
|
|
@@ -2022,24 +2069,23 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
|
|
|
2022
2069
|
const BlockHandle& handle) {
|
|
2023
2070
|
// Return a block iterator on the index partition
|
|
2024
2071
|
auto block = block_map_->find(handle.offset());
|
|
2025
|
-
//
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
return new IndexBlockIter();
|
|
2072
|
+
// block_map_ must be exhaustive
|
|
2073
|
+
if (block == block_map_->end()) {
|
|
2074
|
+
assert(false);
|
|
2075
|
+
// Signal problem to caller
|
|
2076
|
+
return nullptr;
|
|
2077
|
+
}
|
|
2078
|
+
const Rep* rep = table_->get_rep();
|
|
2079
|
+
assert(rep);
|
|
2080
|
+
|
|
2081
|
+
Statistics* kNullStats = nullptr;
|
|
2082
|
+
// We don't return pinned data from index blocks, so no need
|
|
2083
|
+
// to set `block_contents_pinned`.
|
|
2084
|
+
return block->second.GetValue()->NewIndexIterator(
|
|
2085
|
+
rep->internal_comparator.user_comparator(),
|
|
2086
|
+
rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
|
|
2087
|
+
rep->index_has_first_key, rep->index_key_includes_seq,
|
|
2088
|
+
rep->index_value_is_full);
|
|
2043
2089
|
}
|
|
2044
2090
|
|
|
2045
2091
|
// This will be broken if the user specifies an unusual implementation
|
|
@@ -2156,7 +2202,7 @@ bool BlockBasedTable::PrefixMayMatch(
|
|
|
2156
2202
|
}
|
|
2157
2203
|
|
|
2158
2204
|
if (filter_checked) {
|
|
2159
|
-
Statistics* statistics = rep_->ioptions.
|
|
2205
|
+
Statistics* statistics = rep_->ioptions.stats;
|
|
2160
2206
|
RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED);
|
|
2161
2207
|
if (!may_match) {
|
|
2162
2208
|
RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
|
|
@@ -2166,6 +2212,17 @@ bool BlockBasedTable::PrefixMayMatch(
|
|
|
2166
2212
|
return may_match;
|
|
2167
2213
|
}
|
|
2168
2214
|
|
|
2215
|
+
bool BlockBasedTable::PrefixExtractorChanged(
|
|
2216
|
+
const SliceTransform* prefix_extractor) const {
|
|
2217
|
+
if (prefix_extractor == nullptr) {
|
|
2218
|
+
return true;
|
|
2219
|
+
} else if (prefix_extractor == rep_->table_prefix_extractor.get()) {
|
|
2220
|
+
return false;
|
|
2221
|
+
} else {
|
|
2222
|
+
return PrefixExtractorChangedHelper(rep_->table_properties.get(),
|
|
2223
|
+
prefix_extractor);
|
|
2224
|
+
}
|
|
2225
|
+
}
|
|
2169
2226
|
|
|
2170
2227
|
InternalIterator* BlockBasedTable::NewIterator(
|
|
2171
2228
|
const ReadOptions& read_options, const SliceTransform* prefix_extractor,
|
|
@@ -2173,8 +2230,7 @@ InternalIterator* BlockBasedTable::NewIterator(
|
|
|
2173
2230
|
size_t compaction_readahead_size, bool allow_unprepared_value) {
|
|
2174
2231
|
BlockCacheLookupContext lookup_context{caller};
|
|
2175
2232
|
bool need_upper_bound_check =
|
|
2176
|
-
read_options.auto_prefix_mode ||
|
|
2177
|
-
PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor);
|
|
2233
|
+
read_options.auto_prefix_mode || PrefixExtractorChanged(prefix_extractor);
|
|
2178
2234
|
std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter(NewIndexIterator(
|
|
2179
2235
|
read_options,
|
|
2180
2236
|
need_upper_bound_check &&
|
|
@@ -2212,8 +2268,7 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
|
|
|
2212
2268
|
}
|
|
2213
2269
|
|
|
2214
2270
|
bool BlockBasedTable::FullFilterKeyMayMatch(
|
|
2215
|
-
const
|
|
2216
|
-
const Slice& internal_key, const bool no_io,
|
|
2271
|
+
FilterBlockReader* filter, const Slice& internal_key, const bool no_io,
|
|
2217
2272
|
const SliceTransform* prefix_extractor, GetContext* get_context,
|
|
2218
2273
|
BlockCacheLookupContext* lookup_context) const {
|
|
2219
2274
|
if (filter == nullptr || filter->IsBlockBased()) {
|
|
@@ -2228,26 +2283,25 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
|
|
|
2228
2283
|
may_match =
|
|
2229
2284
|
filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid,
|
|
2230
2285
|
no_io, const_ikey_ptr, get_context, lookup_context);
|
|
2231
|
-
} else if (!
|
|
2232
|
-
rep_->table_properties->prefix_extractor_name.compare(
|
|
2233
|
-
prefix_extractor->Name()) == 0 &&
|
|
2286
|
+
} else if (!PrefixExtractorChanged(prefix_extractor) &&
|
|
2234
2287
|
prefix_extractor->InDomain(user_key_without_ts) &&
|
|
2235
2288
|
!filter->PrefixMayMatch(
|
|
2236
2289
|
prefix_extractor->Transform(user_key_without_ts),
|
|
2237
2290
|
prefix_extractor, kNotValid, no_io, const_ikey_ptr,
|
|
2238
2291
|
get_context, lookup_context)) {
|
|
2292
|
+
// FIXME ^^^: there should be no reason for Get() to depend on current
|
|
2293
|
+
// prefix_extractor at all. It should always use table_prefix_extractor.
|
|
2239
2294
|
may_match = false;
|
|
2240
2295
|
}
|
|
2241
2296
|
if (may_match) {
|
|
2242
|
-
RecordTick(rep_->ioptions.
|
|
2297
|
+
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE);
|
|
2243
2298
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, rep_->level);
|
|
2244
2299
|
}
|
|
2245
2300
|
return may_match;
|
|
2246
2301
|
}
|
|
2247
2302
|
|
|
2248
2303
|
void BlockBasedTable::FullFilterKeysMayMatch(
|
|
2249
|
-
|
|
2250
|
-
MultiGetRange* range, const bool no_io,
|
|
2304
|
+
FilterBlockReader* filter, MultiGetRange* range, const bool no_io,
|
|
2251
2305
|
const SliceTransform* prefix_extractor,
|
|
2252
2306
|
BlockCacheLookupContext* lookup_context) const {
|
|
2253
2307
|
if (filter == nullptr || filter->IsBlockBased()) {
|
|
@@ -2260,28 +2314,26 @@ void BlockBasedTable::FullFilterKeysMayMatch(
|
|
|
2260
2314
|
lookup_context);
|
|
2261
2315
|
uint64_t after_keys = range->KeysLeft();
|
|
2262
2316
|
if (after_keys) {
|
|
2263
|
-
RecordTick(rep_->ioptions.
|
|
2264
|
-
after_keys);
|
|
2317
|
+
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE, after_keys);
|
|
2265
2318
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, after_keys,
|
|
2266
2319
|
rep_->level);
|
|
2267
2320
|
}
|
|
2268
2321
|
uint64_t filtered_keys = before_keys - after_keys;
|
|
2269
2322
|
if (filtered_keys) {
|
|
2270
|
-
RecordTick(rep_->ioptions.
|
|
2323
|
+
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL, filtered_keys);
|
|
2271
2324
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, filtered_keys,
|
|
2272
2325
|
rep_->level);
|
|
2273
2326
|
}
|
|
2274
|
-
} else if (!
|
|
2275
|
-
|
|
2276
|
-
|
|
2327
|
+
} else if (!PrefixExtractorChanged(prefix_extractor)) {
|
|
2328
|
+
// FIXME ^^^: there should be no reason for MultiGet() to depend on current
|
|
2329
|
+
// prefix_extractor at all. It should always use table_prefix_extractor.
|
|
2277
2330
|
filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false,
|
|
2278
2331
|
lookup_context);
|
|
2279
|
-
RecordTick(rep_->ioptions.
|
|
2280
|
-
before_keys);
|
|
2332
|
+
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys);
|
|
2281
2333
|
uint64_t after_keys = range->KeysLeft();
|
|
2282
2334
|
uint64_t filtered_keys = before_keys - after_keys;
|
|
2283
2335
|
if (filtered_keys) {
|
|
2284
|
-
RecordTick(rep_->ioptions.
|
|
2336
|
+
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_USEFUL,
|
|
2285
2337
|
filtered_keys);
|
|
2286
2338
|
}
|
|
2287
2339
|
}
|
|
@@ -2312,12 +2364,11 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|
|
2312
2364
|
read_options.snapshot != nullptr;
|
|
2313
2365
|
}
|
|
2314
2366
|
TEST_SYNC_POINT("BlockBasedTable::Get:BeforeFilterMatch");
|
|
2315
|
-
const bool may_match =
|
|
2316
|
-
|
|
2317
|
-
get_context, &lookup_context);
|
|
2367
|
+
const bool may_match = FullFilterKeyMayMatch(
|
|
2368
|
+
filter, key, no_io, prefix_extractor, get_context, &lookup_context);
|
|
2318
2369
|
TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch");
|
|
2319
2370
|
if (!may_match) {
|
|
2320
|
-
RecordTick(rep_->ioptions.
|
|
2371
|
+
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL);
|
|
2321
2372
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
|
|
2322
2373
|
} else {
|
|
2323
2374
|
IndexBlockIter iiter_on_stack;
|
|
@@ -2325,8 +2376,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|
|
2325
2376
|
// BlockPrefixIndex. Only do this check when index_type is kHashSearch.
|
|
2326
2377
|
bool need_upper_bound_check = false;
|
|
2327
2378
|
if (rep_->index_type == BlockBasedTableOptions::kHashSearch) {
|
|
2328
|
-
need_upper_bound_check = PrefixExtractorChanged(
|
|
2329
|
-
rep_->table_properties.get(), prefix_extractor);
|
|
2379
|
+
need_upper_bound_check = PrefixExtractorChanged(prefix_extractor);
|
|
2330
2380
|
}
|
|
2331
2381
|
auto iiter =
|
|
2332
2382
|
NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
|
|
@@ -2354,15 +2404,16 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|
|
2354
2404
|
// Not found
|
|
2355
2405
|
// TODO: think about interaction with Merge. If a user key cannot
|
|
2356
2406
|
// cross one data block, we should be fine.
|
|
2357
|
-
RecordTick(rep_->ioptions.
|
|
2407
|
+
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL);
|
|
2358
2408
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
|
|
2359
2409
|
break;
|
|
2360
2410
|
}
|
|
2361
2411
|
|
|
2362
2412
|
if (!v.first_internal_key.empty() && !skip_filters &&
|
|
2363
2413
|
UserComparatorWrapper(rep_->internal_comparator.user_comparator())
|
|
2364
|
-
.
|
|
2365
|
-
|
|
2414
|
+
.CompareWithoutTimestamp(
|
|
2415
|
+
ExtractUserKey(key),
|
|
2416
|
+
ExtractUserKey(v.first_internal_key)) < 0) {
|
|
2366
2417
|
// The requested key falls between highest key in previous block and
|
|
2367
2418
|
// lowest key in current block.
|
|
2368
2419
|
break;
|
|
@@ -2385,6 +2436,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|
|
2385
2436
|
// Update Saver.state to Found because we are only looking for
|
|
2386
2437
|
// whether we can guarantee the key is not there when "no_io" is set
|
|
2387
2438
|
get_context->MarkKeyMayExist();
|
|
2439
|
+
s = biter.status();
|
|
2388
2440
|
break;
|
|
2389
2441
|
}
|
|
2390
2442
|
if (!biter.status().ok()) {
|
|
@@ -2435,7 +2487,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|
|
2435
2487
|
referenced_key = key;
|
|
2436
2488
|
}
|
|
2437
2489
|
BlockCacheTraceRecord access_record(
|
|
2438
|
-
rep_->ioptions.
|
|
2490
|
+
rep_->ioptions.clock->NowMicros(),
|
|
2439
2491
|
/*block_key=*/"", lookup_data_block_context.block_type,
|
|
2440
2492
|
lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
|
|
2441
2493
|
/*cf_name=*/"", rep_->level_for_tracing(),
|
|
@@ -2461,7 +2513,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|
|
2461
2513
|
}
|
|
2462
2514
|
}
|
|
2463
2515
|
if (matched && filter != nullptr && !filter->IsBlockBased()) {
|
|
2464
|
-
RecordTick(rep_->ioptions.
|
|
2516
|
+
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE);
|
|
2465
2517
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
|
|
2466
2518
|
rep_->level);
|
|
2467
2519
|
}
|
|
@@ -2499,8 +2551,8 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|
|
2499
2551
|
BlockCacheLookupContext lookup_context{
|
|
2500
2552
|
TableReaderCaller::kUserMultiGet, tracing_mget_id,
|
|
2501
2553
|
/*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
|
|
2502
|
-
FullFilterKeysMayMatch(
|
|
2503
|
-
|
|
2554
|
+
FullFilterKeysMayMatch(filter, &sst_file_range, no_io, prefix_extractor,
|
|
2555
|
+
&lookup_context);
|
|
2504
2556
|
|
|
2505
2557
|
if (!sst_file_range.empty()) {
|
|
2506
2558
|
IndexBlockIter iiter_on_stack;
|
|
@@ -2508,8 +2560,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|
|
2508
2560
|
// BlockPrefixIndex. Only do this check when index_type is kHashSearch.
|
|
2509
2561
|
bool need_upper_bound_check = false;
|
|
2510
2562
|
if (rep_->index_type == BlockBasedTableOptions::kHashSearch) {
|
|
2511
|
-
need_upper_bound_check = PrefixExtractorChanged(
|
|
2512
|
-
rep_->table_properties.get(), prefix_extractor);
|
|
2563
|
+
need_upper_bound_check = PrefixExtractorChanged(prefix_extractor);
|
|
2513
2564
|
}
|
|
2514
2565
|
auto iiter =
|
|
2515
2566
|
NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
|
|
@@ -2528,6 +2579,8 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|
|
2528
2579
|
{
|
|
2529
2580
|
MultiGetRange data_block_range(sst_file_range, sst_file_range.begin(),
|
|
2530
2581
|
sst_file_range.end());
|
|
2582
|
+
std::vector<Cache::Handle*> cache_handles;
|
|
2583
|
+
bool wait_for_cache_results = false;
|
|
2531
2584
|
|
|
2532
2585
|
CachableEntry<UncompressionDict> uncompression_dict;
|
|
2533
2586
|
Status uncompression_dict_status;
|
|
@@ -2549,8 +2602,9 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|
|
2549
2602
|
if (!iiter->Valid() ||
|
|
2550
2603
|
(!v.first_internal_key.empty() && !skip_filters &&
|
|
2551
2604
|
UserComparatorWrapper(rep_->internal_comparator.user_comparator())
|
|
2552
|
-
.
|
|
2553
|
-
|
|
2605
|
+
.CompareWithoutTimestamp(
|
|
2606
|
+
ExtractUserKey(key),
|
|
2607
|
+
ExtractUserKey(v.first_internal_key)) < 0)) {
|
|
2554
2608
|
// The requested key falls between highest key in previous block and
|
|
2555
2609
|
// lowest key in current block.
|
|
2556
2610
|
if (!iiter->status().IsNotFound()) {
|
|
@@ -2565,6 +2619,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|
|
2565
2619
|
uncompression_dict_status =
|
|
2566
2620
|
rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
|
|
2567
2621
|
nullptr /* prefetch_buffer */, no_io,
|
|
2622
|
+
read_options.verify_checksums,
|
|
2568
2623
|
sst_file_range.begin()->get_context, &lookup_context,
|
|
2569
2624
|
&uncompression_dict);
|
|
2570
2625
|
uncompression_dict_inited = true;
|
|
@@ -2599,17 +2654,58 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|
|
2599
2654
|
Status s = RetrieveBlock(
|
|
2600
2655
|
nullptr, ro, handle, dict, &(results.back()), BlockType::kData,
|
|
2601
2656
|
miter->get_context, &lookup_data_block_context,
|
|
2602
|
-
/* for_compaction */ false, /* use_cache */ true
|
|
2657
|
+
/* for_compaction */ false, /* use_cache */ true,
|
|
2658
|
+
/* wait_for_cache */ false);
|
|
2603
2659
|
if (s.IsIncomplete()) {
|
|
2604
2660
|
s = Status::OK();
|
|
2605
2661
|
}
|
|
2606
2662
|
if (s.ok() && !results.back().IsEmpty()) {
|
|
2607
|
-
//
|
|
2608
|
-
//
|
|
2609
|
-
|
|
2663
|
+
// Since we have a valid handle, check the value. If its nullptr,
|
|
2664
|
+
// it means the cache is waiting for the final result and we're
|
|
2665
|
+
// supposed to call WaitAll() to wait for the result.
|
|
2666
|
+
if (results.back().GetValue() != nullptr) {
|
|
2667
|
+
// Found it in the cache. Add NULL handle to indicate there is
|
|
2668
|
+
// nothing to read from disk.
|
|
2669
|
+
if (results.back().GetCacheHandle()) {
|
|
2670
|
+
results.back().UpdateCachedValue();
|
|
2671
|
+
}
|
|
2672
|
+
block_handles.emplace_back(BlockHandle::NullBlockHandle());
|
|
2673
|
+
} else {
|
|
2674
|
+
// We have to wait for the cache lookup to finish in the
|
|
2675
|
+
// background, and then we may have to read the block from disk
|
|
2676
|
+
// anyway
|
|
2677
|
+
assert(results.back().GetCacheHandle());
|
|
2678
|
+
wait_for_cache_results = true;
|
|
2679
|
+
block_handles.emplace_back(handle);
|
|
2680
|
+
cache_handles.emplace_back(results.back().GetCacheHandle());
|
|
2681
|
+
}
|
|
2610
2682
|
} else {
|
|
2611
2683
|
block_handles.emplace_back(handle);
|
|
2612
|
-
total_len +=
|
|
2684
|
+
total_len += BlockSizeWithTrailer(handle);
|
|
2685
|
+
}
|
|
2686
|
+
}
|
|
2687
|
+
|
|
2688
|
+
if (wait_for_cache_results) {
|
|
2689
|
+
Cache* block_cache = rep_->table_options.block_cache.get();
|
|
2690
|
+
block_cache->WaitAll(cache_handles);
|
|
2691
|
+
for (size_t i = 0; i < block_handles.size(); ++i) {
|
|
2692
|
+
// If this block was a success or failure or not needed because
|
|
2693
|
+
// the corresponding key is in the same block as a prior key, skip
|
|
2694
|
+
if (block_handles[i] == BlockHandle::NullBlockHandle() ||
|
|
2695
|
+
results[i].IsEmpty()) {
|
|
2696
|
+
continue;
|
|
2697
|
+
}
|
|
2698
|
+
results[i].UpdateCachedValue();
|
|
2699
|
+
void* val = results[i].GetValue();
|
|
2700
|
+
if (!val) {
|
|
2701
|
+
// The async cache lookup failed - could be due to an error
|
|
2702
|
+
// or a false positive. We need to read the data block from
|
|
2703
|
+
// the SST file
|
|
2704
|
+
results[i].Reset();
|
|
2705
|
+
total_len += BlockSizeWithTrailer(block_handles[i]);
|
|
2706
|
+
} else {
|
|
2707
|
+
block_handles[i] = BlockHandle::NullBlockHandle();
|
|
2708
|
+
}
|
|
2613
2709
|
}
|
|
2614
2710
|
}
|
|
2615
2711
|
|
|
@@ -2688,8 +2784,9 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|
|
2688
2784
|
IndexValue v = iiter->value();
|
|
2689
2785
|
if (!v.first_internal_key.empty() && !skip_filters &&
|
|
2690
2786
|
UserComparatorWrapper(rep_->internal_comparator.user_comparator())
|
|
2691
|
-
.
|
|
2692
|
-
|
|
2787
|
+
.CompareWithoutTimestamp(
|
|
2788
|
+
ExtractUserKey(key),
|
|
2789
|
+
ExtractUserKey(v.first_internal_key)) < 0) {
|
|
2693
2790
|
// The requested key falls between highest key in previous block and
|
|
2694
2791
|
// lowest key in current block.
|
|
2695
2792
|
break;
|
|
@@ -2771,7 +2868,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|
|
2771
2868
|
referenced_key = key;
|
|
2772
2869
|
}
|
|
2773
2870
|
BlockCacheTraceRecord access_record(
|
|
2774
|
-
rep_->ioptions.
|
|
2871
|
+
rep_->ioptions.clock->NowMicros(),
|
|
2775
2872
|
/*block_key=*/"", lookup_data_block_context.block_type,
|
|
2776
2873
|
lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
|
|
2777
2874
|
/*cf_name=*/"", rep_->level_for_tracing(),
|
|
@@ -2803,7 +2900,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|
|
2803
2900
|
} while (iiter->Valid());
|
|
2804
2901
|
|
|
2805
2902
|
if (matched && filter != nullptr && !filter->IsBlockBased()) {
|
|
2806
|
-
RecordTick(rep_->ioptions.
|
|
2903
|
+
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE);
|
|
2807
2904
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
|
|
2808
2905
|
rep_->level);
|
|
2809
2906
|
}
|
|
@@ -2924,11 +3021,11 @@ Status BlockBasedTable::VerifyChecksumInBlocks(
|
|
|
2924
3021
|
// increasing of the buffer size.
|
|
2925
3022
|
size_t readahead_size = (read_options.readahead_size != 0)
|
|
2926
3023
|
? read_options.readahead_size
|
|
2927
|
-
:
|
|
3024
|
+
: rep_->table_options.max_auto_readahead_size;
|
|
2928
3025
|
// FilePrefetchBuffer doesn't work in mmap mode and readahead is not
|
|
2929
3026
|
// needed there.
|
|
2930
3027
|
FilePrefetchBuffer prefetch_buffer(
|
|
2931
|
-
|
|
3028
|
+
readahead_size /* readahead_size */,
|
|
2932
3029
|
readahead_size /* max_readahead_size */,
|
|
2933
3030
|
!rep_->ioptions.allow_mmap_reads /* enable */);
|
|
2934
3031
|
|
|
@@ -2940,7 +3037,7 @@ Status BlockBasedTable::VerifyChecksumInBlocks(
|
|
|
2940
3037
|
BlockHandle handle = index_iter->value().handle;
|
|
2941
3038
|
BlockContents contents;
|
|
2942
3039
|
BlockFetcher block_fetcher(
|
|
2943
|
-
rep_->file.get(), &prefetch_buffer, rep_->footer,
|
|
3040
|
+
rep_->file.get(), &prefetch_buffer, rep_->footer, read_options, handle,
|
|
2944
3041
|
&contents, rep_->ioptions, false /* decompress */,
|
|
2945
3042
|
false /*maybe_compressed*/, BlockType::kData,
|
|
2946
3043
|
UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options);
|
|
@@ -2966,15 +3063,15 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName(
|
|
|
2966
3063
|
return BlockType::kFilter;
|
|
2967
3064
|
}
|
|
2968
3065
|
|
|
2969
|
-
if (meta_block_name ==
|
|
3066
|
+
if (meta_block_name == kPropertiesBlockName) {
|
|
2970
3067
|
return BlockType::kProperties;
|
|
2971
3068
|
}
|
|
2972
3069
|
|
|
2973
|
-
if (meta_block_name ==
|
|
3070
|
+
if (meta_block_name == kCompressionDictBlockName) {
|
|
2974
3071
|
return BlockType::kCompressionDictionary;
|
|
2975
3072
|
}
|
|
2976
3073
|
|
|
2977
|
-
if (meta_block_name ==
|
|
3074
|
+
if (meta_block_name == kRangeDelBlockName) {
|
|
2978
3075
|
return BlockType::kRangeDeletion;
|
|
2979
3076
|
}
|
|
2980
3077
|
|
|
@@ -3003,20 +3100,22 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks(
|
|
|
3003
3100
|
s = handle.DecodeFrom(&input);
|
|
3004
3101
|
BlockContents contents;
|
|
3005
3102
|
const Slice meta_block_name = index_iter->key();
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
|
|
3017
|
-
|
|
3018
|
-
|
|
3019
|
-
|
|
3103
|
+
if (meta_block_name == kPropertiesBlockName) {
|
|
3104
|
+
// Unfortunate special handling for properties block checksum w/
|
|
3105
|
+
// global seqno
|
|
3106
|
+
std::unique_ptr<TableProperties> table_properties;
|
|
3107
|
+
s = ReadTablePropertiesHelper(ReadOptions(), handle, rep_->file.get(),
|
|
3108
|
+
nullptr /* prefetch_buffer */, rep_->footer,
|
|
3109
|
+
rep_->ioptions, &table_properties,
|
|
3110
|
+
nullptr /* memory_allocator */);
|
|
3111
|
+
} else {
|
|
3112
|
+
s = BlockFetcher(
|
|
3113
|
+
rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer,
|
|
3114
|
+
ReadOptions(), handle, &contents, rep_->ioptions,
|
|
3115
|
+
false /* decompress */, false /*maybe_compressed*/,
|
|
3116
|
+
GetBlockTypeForMetaBlockByName(meta_block_name),
|
|
3117
|
+
UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options)
|
|
3118
|
+
.ReadBlockContents();
|
|
3020
3119
|
}
|
|
3021
3120
|
if (!s.ok()) {
|
|
3022
3121
|
break;
|
|
@@ -3033,12 +3132,9 @@ bool BlockBasedTable::TEST_BlockInCache(const BlockHandle& handle) const {
|
|
|
3033
3132
|
return false;
|
|
3034
3133
|
}
|
|
3035
3134
|
|
|
3036
|
-
|
|
3037
|
-
Slice cache_key =
|
|
3038
|
-
GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, handle,
|
|
3039
|
-
cache_key_storage);
|
|
3135
|
+
CacheKey key = GetCacheKey(rep_->base_cache_key, handle);
|
|
3040
3136
|
|
|
3041
|
-
Cache::Handle* const cache_handle = cache->Lookup(
|
|
3137
|
+
Cache::Handle* const cache_handle = cache->Lookup(key.AsSlice());
|
|
3042
3138
|
if (cache_handle == nullptr) {
|
|
3043
3139
|
return false;
|
|
3044
3140
|
}
|
|
@@ -3067,15 +3163,9 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
|
|
|
3067
3163
|
// 5. index_type
|
|
3068
3164
|
Status BlockBasedTable::CreateIndexReader(
|
|
3069
3165
|
const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
|
|
3070
|
-
InternalIterator*
|
|
3071
|
-
|
|
3166
|
+
InternalIterator* meta_iter, bool use_cache, bool prefetch, bool pin,
|
|
3167
|
+
BlockCacheLookupContext* lookup_context,
|
|
3072
3168
|
std::unique_ptr<IndexReader>* index_reader) {
|
|
3073
|
-
// kHashSearch requires non-empty prefix_extractor but bypass checking
|
|
3074
|
-
// prefix_extractor here since we have no access to MutableCFOptions.
|
|
3075
|
-
// Add need_upper_bound_check flag in BlockBasedTable::NewIndexIterator.
|
|
3076
|
-
// If prefix_extractor does not match prefix_extractor_name from table
|
|
3077
|
-
// properties, turn off Hash Index by setting total_order_seek to true
|
|
3078
|
-
|
|
3079
3169
|
switch (rep_->index_type) {
|
|
3080
3170
|
case BlockBasedTableOptions::kTwoLevelIndexSearch: {
|
|
3081
3171
|
return PartitionIndexReader::Create(this, ro, prefetch_buffer, use_cache,
|
|
@@ -3092,25 +3182,13 @@ Status BlockBasedTable::CreateIndexReader(
|
|
|
3092
3182
|
case BlockBasedTableOptions::kHashSearch: {
|
|
3093
3183
|
std::unique_ptr<Block> metaindex_guard;
|
|
3094
3184
|
std::unique_ptr<InternalIterator> metaindex_iter_guard;
|
|
3095
|
-
auto meta_index_iter = preloaded_meta_index_iter;
|
|
3096
3185
|
bool should_fallback = false;
|
|
3186
|
+
// FIXME: is changed prefix_extractor handled anywhere for hash index?
|
|
3097
3187
|
if (rep_->internal_prefix_transform.get() == nullptr) {
|
|
3098
|
-
ROCKS_LOG_WARN(rep_->ioptions.
|
|
3188
|
+
ROCKS_LOG_WARN(rep_->ioptions.logger,
|
|
3099
3189
|
"No prefix extractor passed in. Fall back to binary"
|
|
3100
3190
|
" search index.");
|
|
3101
3191
|
should_fallback = true;
|
|
3102
|
-
} else if (meta_index_iter == nullptr) {
|
|
3103
|
-
auto s = ReadMetaIndexBlock(ro, prefetch_buffer, &metaindex_guard,
|
|
3104
|
-
&metaindex_iter_guard);
|
|
3105
|
-
if (!s.ok()) {
|
|
3106
|
-
// we simply fall back to binary search in case there is any
|
|
3107
|
-
// problem with prefix hash index loading.
|
|
3108
|
-
ROCKS_LOG_WARN(rep_->ioptions.info_log,
|
|
3109
|
-
"Unable to read the metaindex block."
|
|
3110
|
-
" Fall back to binary search index.");
|
|
3111
|
-
should_fallback = true;
|
|
3112
|
-
}
|
|
3113
|
-
meta_index_iter = metaindex_iter_guard.get();
|
|
3114
3192
|
}
|
|
3115
3193
|
|
|
3116
3194
|
if (should_fallback) {
|
|
@@ -3118,9 +3196,9 @@ Status BlockBasedTable::CreateIndexReader(
|
|
|
3118
3196
|
use_cache, prefetch, pin,
|
|
3119
3197
|
lookup_context, index_reader);
|
|
3120
3198
|
} else {
|
|
3121
|
-
return HashIndexReader::Create(this, ro, prefetch_buffer,
|
|
3122
|
-
|
|
3123
|
-
|
|
3199
|
+
return HashIndexReader::Create(this, ro, prefetch_buffer, meta_iter,
|
|
3200
|
+
use_cache, prefetch, pin, lookup_context,
|
|
3201
|
+
index_reader);
|
|
3124
3202
|
}
|
|
3125
3203
|
}
|
|
3126
3204
|
default: {
|
|
@@ -3134,6 +3212,7 @@ Status BlockBasedTable::CreateIndexReader(
|
|
|
3134
3212
|
uint64_t BlockBasedTable::ApproximateDataOffsetOf(
|
|
3135
3213
|
const InternalIteratorBase<IndexValue>& index_iter,
|
|
3136
3214
|
uint64_t data_size) const {
|
|
3215
|
+
assert(index_iter.status().ok());
|
|
3137
3216
|
if (index_iter.Valid()) {
|
|
3138
3217
|
BlockHandle handle = index_iter.value().handle;
|
|
3139
3218
|
return handle.offset();
|
|
@@ -3176,8 +3255,16 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key,
|
|
|
3176
3255
|
}
|
|
3177
3256
|
|
|
3178
3257
|
index_iter->Seek(key);
|
|
3258
|
+
uint64_t offset;
|
|
3259
|
+
if (index_iter->status().ok()) {
|
|
3260
|
+
offset = ApproximateDataOffsetOf(*index_iter, data_size);
|
|
3261
|
+
} else {
|
|
3262
|
+
// Split in half to avoid skewing one way or another,
|
|
3263
|
+
// since we don't know whether we're operating on lower bound or
|
|
3264
|
+
// upper bound.
|
|
3265
|
+
return rep_->file_size / 2;
|
|
3266
|
+
}
|
|
3179
3267
|
|
|
3180
|
-
uint64_t offset = ApproximateDataOffsetOf(*index_iter, data_size);
|
|
3181
3268
|
// Pro-rate file metadata (incl filters) size-proportionally across data
|
|
3182
3269
|
// blocks.
|
|
3183
3270
|
double size_ratio =
|
|
@@ -3193,7 +3280,9 @@ uint64_t BlockBasedTable::ApproximateSize(const Slice& start, const Slice& end,
|
|
|
3193
3280
|
uint64_t data_size = GetApproximateDataSize();
|
|
3194
3281
|
if (UNLIKELY(data_size == 0)) {
|
|
3195
3282
|
// Hmm. Assume whole file is involved, since we have lower and upper
|
|
3196
|
-
// bound.
|
|
3283
|
+
// bound. This likely skews the estimate if we consider that this function
|
|
3284
|
+
// is typically called with `[start, end]` fully contained in the file's
|
|
3285
|
+
// key-range.
|
|
3197
3286
|
return rep_->file_size;
|
|
3198
3287
|
}
|
|
3199
3288
|
|
|
@@ -3211,9 +3300,24 @@ uint64_t BlockBasedTable::ApproximateSize(const Slice& start, const Slice& end,
|
|
|
3211
3300
|
}
|
|
3212
3301
|
|
|
3213
3302
|
index_iter->Seek(start);
|
|
3214
|
-
uint64_t start_offset
|
|
3303
|
+
uint64_t start_offset;
|
|
3304
|
+
if (index_iter->status().ok()) {
|
|
3305
|
+
start_offset = ApproximateDataOffsetOf(*index_iter, data_size);
|
|
3306
|
+
} else {
|
|
3307
|
+
// Assume file is involved from the start. This likely skews the estimate
|
|
3308
|
+
// but is consistent with the above error handling.
|
|
3309
|
+
start_offset = 0;
|
|
3310
|
+
}
|
|
3311
|
+
|
|
3215
3312
|
index_iter->Seek(end);
|
|
3216
|
-
uint64_t end_offset
|
|
3313
|
+
uint64_t end_offset;
|
|
3314
|
+
if (index_iter->status().ok()) {
|
|
3315
|
+
end_offset = ApproximateDataOffsetOf(*index_iter, data_size);
|
|
3316
|
+
} else {
|
|
3317
|
+
// Assume file is involved until the end. This likely skews the estimate
|
|
3318
|
+
// but is consistent with the above error handling.
|
|
3319
|
+
end_offset = data_size;
|
|
3320
|
+
}
|
|
3217
3321
|
|
|
3218
3322
|
assert(end_offset >= start_offset);
|
|
3219
3323
|
// Pro-rate file metadata (incl filters) size-proportionally across data
|
|
@@ -3226,7 +3330,8 @@ uint64_t BlockBasedTable::ApproximateSize(const Slice& start, const Slice& end,
|
|
|
3226
3330
|
|
|
3227
3331
|
bool BlockBasedTable::TEST_FilterBlockInCache() const {
|
|
3228
3332
|
assert(rep_ != nullptr);
|
|
3229
|
-
return
|
|
3333
|
+
return rep_->filter_type != Rep::FilterType::kNoFilter &&
|
|
3334
|
+
TEST_BlockInCache(rep_->filter_handle);
|
|
3230
3335
|
}
|
|
3231
3336
|
|
|
3232
3337
|
bool BlockBasedTable::TEST_IndexBlockInCache() const {
|
|
@@ -3313,17 +3418,17 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
|
|
|
3313
3418
|
if (!s.ok()) {
|
|
3314
3419
|
return s;
|
|
3315
3420
|
}
|
|
3316
|
-
if (metaindex_iter->key() ==
|
|
3421
|
+
if (metaindex_iter->key() == kPropertiesBlockName) {
|
|
3317
3422
|
out_stream << " Properties block handle: "
|
|
3318
3423
|
<< metaindex_iter->value().ToString(true) << "\n";
|
|
3319
|
-
} else if (metaindex_iter->key() ==
|
|
3424
|
+
} else if (metaindex_iter->key() == kCompressionDictBlockName) {
|
|
3320
3425
|
out_stream << " Compression dictionary block handle: "
|
|
3321
3426
|
<< metaindex_iter->value().ToString(true) << "\n";
|
|
3322
3427
|
} else if (strstr(metaindex_iter->key().ToString().c_str(),
|
|
3323
3428
|
"filter.rocksdb.") != nullptr) {
|
|
3324
3429
|
out_stream << " Filter block handle: "
|
|
3325
3430
|
<< metaindex_iter->value().ToString(true) << "\n";
|
|
3326
|
-
} else if (metaindex_iter->key() ==
|
|
3431
|
+
} else if (metaindex_iter->key() == kRangeDelBlockName) {
|
|
3327
3432
|
out_stream << " Range deletion block handle: "
|
|
3328
3433
|
<< metaindex_iter->value().ToString(true) << "\n";
|
|
3329
3434
|
}
|
|
@@ -3360,6 +3465,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
|
|
|
3360
3465
|
CachableEntry<UncompressionDict> uncompression_dict;
|
|
3361
3466
|
s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
|
|
3362
3467
|
nullptr /* prefetch_buffer */, false /* no_io */,
|
|
3468
|
+
false, /* verify_checksums */
|
|
3363
3469
|
nullptr /* get_context */, nullptr /* lookup_context */,
|
|
3364
3470
|
&uncompression_dict);
|
|
3365
3471
|
if (!s.ok()) {
|