@nxtedition/rocksdb 10.1.5 → 10.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +19 -11
- package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -5
- package/deps/rocksdb/rocksdb/Makefile +38 -15
- package/deps/rocksdb/rocksdb/TARGETS +10 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +58 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +4 -2
- package/deps/rocksdb/rocksdb/db/builder.cc +2 -2
- package/deps/rocksdb/rocksdb/db/builder.h +1 -1
- package/deps/rocksdb/rocksdb/db/c.cc +205 -6
- package/deps/rocksdb/rocksdb/db/c_test.c +189 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +28 -0
- package/deps/rocksdb/rocksdb/db/column_family.h +17 -0
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +234 -60
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +11 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +22 -25
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +112 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +72 -21
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +77 -0
- package/deps/rocksdb/rocksdb/db/convenience.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +269 -112
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +107 -43
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +93 -24
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +157 -68
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +56 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +78 -105
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +39 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +21 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +107 -63
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +43 -2
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +10 -2
- package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -6
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +92 -2
- package/deps/rocksdb/rocksdb/db/error_handler.cc +34 -39
- package/deps/rocksdb/rocksdb/db/error_handler.h +3 -4
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +6 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +71 -15
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +383 -4
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +88 -72
- package/deps/rocksdb/rocksdb/db/flush_job.cc +30 -3
- package/deps/rocksdb/rocksdb/db/flush_job.h +14 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +60 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +20 -1
- package/deps/rocksdb/rocksdb/db/log_writer.cc +24 -0
- package/deps/rocksdb/rocksdb/db/log_writer.h +5 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
- package/deps/rocksdb/rocksdb/db/memtable.h +10 -10
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +4 -4
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +10 -3
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +8 -10
- package/deps/rocksdb/rocksdb/db/repair.cc +4 -3
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +30 -0
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +9 -0
- package/deps/rocksdb/rocksdb/db/table_cache.cc +17 -2
- package/deps/rocksdb/rocksdb/db/table_cache.h +9 -1
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +9 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +3 -3
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_edit.cc +0 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -6
- package/deps/rocksdb/rocksdb/db/version_set.cc +54 -31
- package/deps/rocksdb/rocksdb/db/version_set.h +14 -7
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +37 -29
- package/deps/rocksdb/rocksdb/db/wal_manager.h +6 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +6 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +54 -23
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +46 -5
- package/deps/rocksdb/rocksdb/db/write_thread.cc +53 -5
- package/deps/rocksdb/rocksdb/db/write_thread.h +36 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +57 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +11 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +8 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +10 -25
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +25 -88
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.cc +93 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.h +16 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +43 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +109 -21
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +8 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +666 -205
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +55 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +18 -16
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +782 -494
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +21 -0
- package/deps/rocksdb/rocksdb/env/env.cc +6 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +0 -1
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +34 -19
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +29 -32
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +41 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +4 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +63 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +16 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +0 -16
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +76 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +12 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +31 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/user_write_callback.h +29 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +17 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -2
- package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +8 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +9 -5
- package/deps/rocksdb/rocksdb/options/options.cc +3 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +2 -2
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +1 -0
- package/deps/rocksdb/rocksdb/port/win/port_win.cc +3 -2
- package/deps/rocksdb/rocksdb/src.mk +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +15 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +15 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +31 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +10 -5
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +11 -15
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -21
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +9 -11
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -16
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +14 -9
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +82 -41
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +13 -14
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +18 -22
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +9 -10
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +3 -2
- package/deps/rocksdb/rocksdb/table/format.cc +1 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +18 -13
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +5 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +2 -2
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +8 -7
- package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +1 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +6 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +19 -0
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +434 -110
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -1
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +3 -0
- package/deps/rocksdb/rocksdb/util/aligned_storage.h +24 -0
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/util/random.cc +2 -1
- package/deps/rocksdb/rocksdb/util/stderr_logger.h +1 -1
- package/deps/rocksdb/rocksdb/util/udt_util.cc +33 -0
- package/deps/rocksdb/rocksdb/util/udt_util.h +7 -0
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +33 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.h +5 -0
- package/deps/rocksdb/rocksdb/util/xxhash.h +10 -3
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +13 -13
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +104 -48
- package/deps/rocksdb/rocksdb/utilities/debug.cc +16 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +647 -235
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -157
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector_test.cc +139 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +105 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +64 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +43 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +154 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +158 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +16 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +9 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +43 -7
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/.tap/test-results/node_modules/abstract-level/test/chained-batch-test.js.tap +0 -0
- package/.tap/test-results/node_modules/abstract-level/test/get-test.js.tap +0 -0
- package/.tap/test-results/test/abstract-level-test.js.tap +0 -1077
- package/.tap/test-results/test/batch-test.js.tap +0 -12
- package/.tap/test-results/test/chained-batch-gc-test.js.tap +0 -11
- package/.tap/test-results/test/cleanup-hanging-iterators-test.js.tap +0 -135
- package/.tap/test-results/test/clear-gc-test.js.tap +0 -13
- package/.tap/test-results/test/column-test.js.tap +0 -55
- package/.tap/test-results/test/common.js.tap +0 -0
- package/.tap/test-results/test/compression-test.js.tap +0 -30
- package/.tap/test-results/test/db-identity.js.tap +0 -12
- package/.tap/test-results/test/electron.js.tap +0 -0
- package/.tap/test-results/test/env-cleanup-hook-test.js.tap +0 -40
- package/.tap/test-results/test/env-cleanup-hook.js.tap +0 -0
- package/.tap/test-results/test/gc.js.tap +0 -0
- package/.tap/test-results/test/getproperty-test.js.tap +0 -29
- package/.tap/test-results/test/iterator-gc-test.js.tap +0 -15
- package/.tap/test-results/test/iterator-hwm-test.js.tap +0 -131
- package/.tap/test-results/test/iterator-recursion-test.js.tap +0 -12
- package/.tap/test-results/test/iterator-starvation-test.js.tap +0 -73
- package/.tap/test-results/test/iterator-test.js.tap +0 -6
- package/.tap/test-results/test/leak-tester-batch.js.tap +0 -0
- package/.tap/test-results/test/leak-tester-iterator.js.tap +0 -0
- package/.tap/test-results/test/leak-tester.js.tap +0 -0
- package/.tap/test-results/test/lock-test.js.tap +0 -18
- package/.tap/test-results/test/lock.js.tap +0 -0
- package/.tap/test-results/test/make.js.tap +0 -0
- package/.tap/test-results/test/max-rev-merge.js.tap +0 -0
- package/.tap/test-results/test/merge-operator-test.js.tap +0 -12
- package/.tap/test-results/test/mkdir-test.js.tap +0 -15
- package/.tap/test-results/test/segfault-test.js.tap +0 -76
- package/.tap/test-results/test/stack-blower.js.tap +0 -0
- package/deps/rocksdb/rocksdb/README.md +0 -29
- package/deps/rocksdb/rocksdb/microbench/README.md +0 -60
- package/deps/rocksdb/rocksdb/plugin/README.md +0 -43
- package/deps/rocksdb/rocksdb/port/README +0 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
|
@@ -173,7 +173,8 @@ struct MutableCFOptions {
|
|
|
173
173
|
compression_per_level(options.compression_per_level),
|
|
174
174
|
memtable_max_range_deletions(options.memtable_max_range_deletions),
|
|
175
175
|
bottommost_file_compaction_delay(
|
|
176
|
-
options.bottommost_file_compaction_delay)
|
|
176
|
+
options.bottommost_file_compaction_delay),
|
|
177
|
+
uncache_aggressiveness(options.uncache_aggressiveness) {
|
|
177
178
|
RefreshDerivedOptions(options.num_levels, options.compaction_style);
|
|
178
179
|
}
|
|
179
180
|
|
|
@@ -223,7 +224,9 @@ struct MutableCFOptions {
|
|
|
223
224
|
memtable_protection_bytes_per_key(0),
|
|
224
225
|
block_protection_bytes_per_key(0),
|
|
225
226
|
sample_for_compression(0),
|
|
226
|
-
memtable_max_range_deletions(0)
|
|
227
|
+
memtable_max_range_deletions(0),
|
|
228
|
+
bottommost_file_compaction_delay(0),
|
|
229
|
+
uncache_aggressiveness(0) {}
|
|
227
230
|
|
|
228
231
|
explicit MutableCFOptions(const Options& options);
|
|
229
232
|
|
|
@@ -319,6 +322,7 @@ struct MutableCFOptions {
|
|
|
319
322
|
std::vector<CompressionType> compression_per_level;
|
|
320
323
|
uint32_t memtable_max_range_deletions;
|
|
321
324
|
uint32_t bottommost_file_compaction_delay;
|
|
325
|
+
uint32_t uncache_aggressiveness;
|
|
322
326
|
|
|
323
327
|
// Derived options
|
|
324
328
|
// Per-level target file size.
|
|
@@ -388,6 +388,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
|
|
|
388
388
|
{offsetof(struct ImmutableDBOptions, wal_compression),
|
|
389
389
|
OptionType::kCompressionType, OptionVerificationType::kNormal,
|
|
390
390
|
OptionTypeFlags::kNone}},
|
|
391
|
+
{"background_close_inactive_wals",
|
|
392
|
+
{offsetof(struct ImmutableDBOptions, background_close_inactive_wals),
|
|
393
|
+
OptionType::kBoolean, OptionVerificationType::kNormal,
|
|
394
|
+
OptionTypeFlags::kNone}},
|
|
391
395
|
{"seq_per_batch",
|
|
392
396
|
{0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
|
|
393
397
|
OptionTypeFlags::kNone}},
|
|
@@ -755,6 +759,7 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
|
|
|
755
759
|
two_write_queues(options.two_write_queues),
|
|
756
760
|
manual_wal_flush(options.manual_wal_flush),
|
|
757
761
|
wal_compression(options.wal_compression),
|
|
762
|
+
background_close_inactive_wals(options.background_close_inactive_wals),
|
|
758
763
|
atomic_flush(options.atomic_flush),
|
|
759
764
|
avoid_unnecessary_blocking_io(options.avoid_unnecessary_blocking_io),
|
|
760
765
|
persist_stats_to_disk(options.persist_stats_to_disk),
|
|
@@ -921,6 +926,9 @@ void ImmutableDBOptions::Dump(Logger* log) const {
|
|
|
921
926
|
manual_wal_flush);
|
|
922
927
|
ROCKS_LOG_HEADER(log, " Options.wal_compression: %d",
|
|
923
928
|
wal_compression);
|
|
929
|
+
ROCKS_LOG_HEADER(log,
|
|
930
|
+
" Options.background_close_inactive_wals: %d",
|
|
931
|
+
background_close_inactive_wals);
|
|
924
932
|
ROCKS_LOG_HEADER(log, " Options.atomic_flush: %d", atomic_flush);
|
|
925
933
|
ROCKS_LOG_HEADER(log,
|
|
926
934
|
" Options.avoid_unnecessary_blocking_io: %d",
|
|
@@ -84,6 +84,7 @@ struct ImmutableDBOptions {
|
|
|
84
84
|
bool two_write_queues;
|
|
85
85
|
bool manual_wal_flush;
|
|
86
86
|
CompressionType wal_compression;
|
|
87
|
+
bool background_close_inactive_wals;
|
|
87
88
|
bool atomic_flush;
|
|
88
89
|
bool avoid_unnecessary_blocking_io;
|
|
89
90
|
bool persist_stats_to_disk;
|
|
@@ -97,17 +98,20 @@ struct ImmutableDBOptions {
|
|
|
97
98
|
std::string db_host_id;
|
|
98
99
|
FileTypeSet checksum_handoff_file_types;
|
|
99
100
|
CacheTier lowest_used_cache_tier;
|
|
100
|
-
// Convenience/Helper objects that are not part of the base DBOptions
|
|
101
|
-
std::shared_ptr<FileSystem> fs;
|
|
102
|
-
SystemClock* clock;
|
|
103
|
-
Statistics* stats;
|
|
104
|
-
Logger* logger;
|
|
105
101
|
std::shared_ptr<CompactionService> compaction_service;
|
|
106
102
|
bool enforce_single_del_contracts;
|
|
107
103
|
uint64_t follower_refresh_catchup_period_ms;
|
|
108
104
|
uint64_t follower_catchup_retry_count;
|
|
109
105
|
uint64_t follower_catchup_retry_wait_ms;
|
|
110
106
|
|
|
107
|
+
// Beginning convenience/helper objects that are not part of the base
|
|
108
|
+
// DBOptions
|
|
109
|
+
std::shared_ptr<FileSystem> fs;
|
|
110
|
+
SystemClock* clock;
|
|
111
|
+
Statistics* stats;
|
|
112
|
+
Logger* logger;
|
|
113
|
+
// End of convenience/helper objects.
|
|
114
|
+
|
|
111
115
|
bool IsWalDirSameAsDBPath() const;
|
|
112
116
|
bool IsWalDirSameAsDBPath(const std::string& path) const;
|
|
113
117
|
const std::string& GetWalDir() const;
|
|
@@ -360,6 +360,9 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
|
|
|
360
360
|
ROCKS_LOG_HEADER(log,
|
|
361
361
|
"Options.compaction_options_universal.stop_style: %s",
|
|
362
362
|
str_compaction_stop_style.c_str());
|
|
363
|
+
ROCKS_LOG_HEADER(log,
|
|
364
|
+
"Options.compaction_options_universal.max_read_amp: %d",
|
|
365
|
+
compaction_options_universal.max_read_amp);
|
|
363
366
|
ROCKS_LOG_HEADER(
|
|
364
367
|
log, "Options.compaction_options_fifo.max_table_files_size: %" PRIu64,
|
|
365
368
|
compaction_options_fifo.max_table_files_size);
|
|
@@ -274,6 +274,7 @@ void UpdateColumnFamilyOptions(const MutableCFOptions& moptions,
|
|
|
274
274
|
cf_opts->last_level_temperature = moptions.last_level_temperature;
|
|
275
275
|
cf_opts->default_write_temperature = moptions.default_write_temperature;
|
|
276
276
|
cf_opts->memtable_max_range_deletions = moptions.memtable_max_range_deletions;
|
|
277
|
+
cf_opts->uncache_aggressiveness = moptions.uncache_aggressiveness;
|
|
277
278
|
}
|
|
278
279
|
|
|
279
280
|
void UpdateColumnFamilyOptions(const ImmutableCFOptions& ioptions,
|
|
@@ -353,6 +353,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
|
|
|
353
353
|
"two_write_queues=false;"
|
|
354
354
|
"manual_wal_flush=false;"
|
|
355
355
|
"wal_compression=kZSTD;"
|
|
356
|
+
"background_close_inactive_wals=true;"
|
|
356
357
|
"seq_per_batch=false;"
|
|
357
358
|
"atomic_flush=false;"
|
|
358
359
|
"avoid_unnecessary_blocking_io=false;"
|
|
@@ -565,7 +566,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
|
|
|
565
566
|
"persist_user_defined_timestamps=true;"
|
|
566
567
|
"block_protection_bytes_per_key=1;"
|
|
567
568
|
"memtable_max_range_deletions=999999;"
|
|
568
|
-
"bottommost_file_compaction_delay=7200;"
|
|
569
|
+
"bottommost_file_compaction_delay=7200;"
|
|
570
|
+
"uncache_aggressiveness=1234;",
|
|
569
571
|
new_options));
|
|
570
572
|
|
|
571
573
|
ASSERT_NE(new_options->blob_cache.get(), nullptr);
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
#pragma once
|
|
7
7
|
|
|
8
8
|
#if defined(__clang__) && defined(__GLIBC__)
|
|
9
|
-
// glibc's `posix_memalign()` declaration specifies `
|
|
9
|
+
// glibc's `posix_memalign()` declaration specifies `noexcept` while clang's
|
|
10
10
|
// declaration does not. There is a hack in clang to make its re-declaration
|
|
11
11
|
// compatible with glibc's if they are declared consecutively. That hack breaks
|
|
12
12
|
// if yet another `posix_memalign()` declaration comes between glibc's and
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
// declarations both come before "jemalloc.h"'s `posix_memalign()` declaration.
|
|
15
15
|
//
|
|
16
16
|
// This problem could also be avoided if "jemalloc.h"'s `posix_memalign()`
|
|
17
|
-
// declaration did not specify `
|
|
17
|
+
// declaration did not specify `noexcept` when built with clang.
|
|
18
18
|
#include <mm_malloc.h>
|
|
19
19
|
#endif
|
|
20
20
|
|
|
@@ -101,8 +101,9 @@ bool CondVar::TimedWait(uint64_t abs_time_us) {
|
|
|
101
101
|
std::unique_lock<std::mutex> lk(mu_->getLock(), std::adopt_lock);
|
|
102
102
|
|
|
103
103
|
// Work around https://github.com/microsoft/STL/issues/369
|
|
104
|
-
|
|
105
|
-
|
|
104
|
+
// std::condition_variable_any::wait_for had a fix, but
|
|
105
|
+
// std::condition_variable still doesn't have a fix in STL yet
|
|
106
|
+
#if defined(_MSC_VER)
|
|
106
107
|
if (relTimeUs == std::chrono::microseconds::zero()) {
|
|
107
108
|
lk.unlock();
|
|
108
109
|
lk.lock();
|
|
@@ -304,6 +304,7 @@ LIB_SOURCES = \
|
|
|
304
304
|
utilities/persistent_cache/volatile_tier_impl.cc \
|
|
305
305
|
utilities/simulator_cache/cache_simulator.cc \
|
|
306
306
|
utilities/simulator_cache/sim_cache.cc \
|
|
307
|
+
utilities/table_properties_collectors/compact_for_tiering_collector.cc \
|
|
307
308
|
utilities/table_properties_collectors/compact_on_deletion_collector.cc \
|
|
308
309
|
utilities/trace/file_trace_reader_writer.cc \
|
|
309
310
|
utilities/trace/replayer_impl.cc \
|
|
@@ -380,6 +381,7 @@ STRESS_LIB_SOURCES = \
|
|
|
380
381
|
db_stress_tool/cf_consistency_stress.cc \
|
|
381
382
|
db_stress_tool/db_stress_common.cc \
|
|
382
383
|
db_stress_tool/db_stress_driver.cc \
|
|
384
|
+
db_stress_tool/db_stress_filters.cc \
|
|
383
385
|
db_stress_tool/db_stress_gflags.cc \
|
|
384
386
|
db_stress_tool/db_stress_listener.cc \
|
|
385
387
|
db_stress_tool/db_stress_shared_state.cc \
|
|
@@ -405,6 +407,7 @@ TEST_LIB_SOURCES = \
|
|
|
405
407
|
FOLLY_SOURCES = \
|
|
406
408
|
$(FOLLY_DIR)/folly/container/detail/F14Table.cpp \
|
|
407
409
|
$(FOLLY_DIR)/folly/detail/Futex.cpp \
|
|
410
|
+
$(FOLLY_DIR)/folly/lang/Exception.cpp \
|
|
408
411
|
$(FOLLY_DIR)/folly/lang/SafeAssert.cpp \
|
|
409
412
|
$(FOLLY_DIR)/folly/lang/ToAscii.cpp \
|
|
410
413
|
$(FOLLY_DIR)/folly/ScopeGuard.cpp \
|
|
@@ -628,6 +631,7 @@ TEST_MAIN_SOURCES = \
|
|
|
628
631
|
utilities/persistent_cache/persistent_cache_test.cc \
|
|
629
632
|
utilities/simulator_cache/cache_simulator_test.cc \
|
|
630
633
|
utilities/simulator_cache/sim_cache_test.cc \
|
|
634
|
+
utilities/table_properties_collectors/compact_for_tiering_collector_test.cc \
|
|
631
635
|
utilities/table_properties_collectors/compact_on_deletion_collector_test.cc \
|
|
632
636
|
utilities/transactions/optimistic_transaction_test.cc \
|
|
633
637
|
utilities/transactions/lock/range/range_locking_test.cc \
|
|
@@ -44,9 +44,8 @@ InternalIteratorBase<IndexValue>* BinarySearchIndexReader::NewIterator(
|
|
|
44
44
|
IndexBlockIter* iter, GetContext* get_context,
|
|
45
45
|
BlockCacheLookupContext* lookup_context) {
|
|
46
46
|
const BlockBasedTable::Rep* rep = table()->get_rep();
|
|
47
|
-
const bool no_io = (read_options.read_tier == kBlockCacheTier);
|
|
48
47
|
CachableEntry<Block> index_block;
|
|
49
|
-
const Status s = GetOrReadIndexBlock(
|
|
48
|
+
const Status s = GetOrReadIndexBlock(get_context, lookup_context,
|
|
50
49
|
&index_block, read_options);
|
|
51
50
|
if (!s.ok()) {
|
|
52
51
|
if (iter != nullptr) {
|
|
@@ -582,8 +582,10 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
582
582
|
assert(factory);
|
|
583
583
|
|
|
584
584
|
std::unique_ptr<InternalTblPropColl> collector{
|
|
585
|
-
factory->CreateInternalTblPropColl(
|
|
586
|
-
|
|
585
|
+
factory->CreateInternalTblPropColl(
|
|
586
|
+
tbo.column_family_id, tbo.level_at_creation,
|
|
587
|
+
tbo.ioptions.num_levels,
|
|
588
|
+
tbo.last_level_inclusive_max_seqno_threshold)};
|
|
587
589
|
if (collector) {
|
|
588
590
|
table_properties_collectors.emplace_back(std::move(collector));
|
|
589
591
|
}
|
|
@@ -823,6 +823,12 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize(
|
|
|
823
823
|
read_options_, block_handle,
|
|
824
824
|
&(block_handle_info.cachable_entry_).As<Block_kData>());
|
|
825
825
|
if (!s.ok()) {
|
|
826
|
+
#ifndef NDEBUG
|
|
827
|
+
// To allow fault injection verification to pass since non-okay status in
|
|
828
|
+
// `BlockCacheLookupForReadAheadSize()` won't fail the read but to have
|
|
829
|
+
// less or no readahead
|
|
830
|
+
IGNORE_STATUS_IF_ERROR(s);
|
|
831
|
+
#endif
|
|
826
832
|
break;
|
|
827
833
|
}
|
|
828
834
|
|
|
@@ -852,6 +858,15 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize(
|
|
|
852
858
|
is_index_at_curr_block_ = false;
|
|
853
859
|
}
|
|
854
860
|
|
|
861
|
+
#ifndef NDEBUG
|
|
862
|
+
// To allow fault injection verification to pass since non-okay status in
|
|
863
|
+
// `BlockCacheLookupForReadAheadSize()` won't fail the read but to have less
|
|
864
|
+
// or no readahead
|
|
865
|
+
if (!index_iter_->status().ok()) {
|
|
866
|
+
IGNORE_STATUS_IF_ERROR(index_iter_->status());
|
|
867
|
+
}
|
|
868
|
+
#endif
|
|
869
|
+
|
|
855
870
|
if (found_first_miss_block) {
|
|
856
871
|
// Iterate cache hit block handles from the end till a Miss is there, to
|
|
857
872
|
// truncate and update the end offset till that Miss.
|
|
@@ -135,7 +135,46 @@ extern const uint64_t kBlockBasedTableMagicNumber;
|
|
|
135
135
|
extern const std::string kHashIndexPrefixesBlock;
|
|
136
136
|
extern const std::string kHashIndexPrefixesMetadataBlock;
|
|
137
137
|
|
|
138
|
-
BlockBasedTable::~BlockBasedTable() {
|
|
138
|
+
BlockBasedTable::~BlockBasedTable() {
|
|
139
|
+
auto ua = rep_->uncache_aggressiveness.LoadRelaxed();
|
|
140
|
+
if (ua > 0 && rep_->table_options.block_cache) {
|
|
141
|
+
if (rep_->filter) {
|
|
142
|
+
rep_->filter->EraseFromCacheBeforeDestruction(ua);
|
|
143
|
+
}
|
|
144
|
+
if (rep_->index_reader) {
|
|
145
|
+
{
|
|
146
|
+
// TODO: Also uncache data blocks known after any gaps in partitioned
|
|
147
|
+
// index. Right now the iterator errors out as soon as there's an
|
|
148
|
+
// index partition not in cache.
|
|
149
|
+
IndexBlockIter iiter_on_stack;
|
|
150
|
+
ReadOptions ropts;
|
|
151
|
+
ropts.read_tier = kBlockCacheTier; // No I/O
|
|
152
|
+
auto iiter = NewIndexIterator(
|
|
153
|
+
ropts, /*disable_prefix_seek=*/false, &iiter_on_stack,
|
|
154
|
+
/*get_context=*/nullptr, /*lookup_context=*/nullptr);
|
|
155
|
+
std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
|
|
156
|
+
if (iiter != &iiter_on_stack) {
|
|
157
|
+
iiter_unique_ptr.reset(iiter);
|
|
158
|
+
}
|
|
159
|
+
// Un-cache the data blocks the index iterator with tell us about
|
|
160
|
+
// without I/O. (NOTE: It's extremely unlikely that a data block
|
|
161
|
+
// will be in block cache without the index block pointing to it
|
|
162
|
+
// also in block cache.)
|
|
163
|
+
UncacheAggressivenessAdvisor advisor(ua);
|
|
164
|
+
for (iiter->SeekToFirst(); iiter->Valid() && advisor.ShouldContinue();
|
|
165
|
+
iiter->Next()) {
|
|
166
|
+
bool erased = EraseFromCache(iiter->value().handle);
|
|
167
|
+
advisor.Report(erased);
|
|
168
|
+
}
|
|
169
|
+
iiter->status().PermitUncheckedError();
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Un-cache the index block(s)
|
|
173
|
+
rep_->index_reader->EraseFromCacheBeforeDestruction(ua);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
delete rep_;
|
|
177
|
+
}
|
|
139
178
|
|
|
140
179
|
namespace {
|
|
141
180
|
// Read the block identified by "handle" from "file".
|
|
@@ -439,6 +478,7 @@ bool IsFeatureSupported(const TableProperties& table_properties,
|
|
|
439
478
|
}
|
|
440
479
|
|
|
441
480
|
// Caller has to ensure seqno is not nullptr.
|
|
481
|
+
// Set *seqno to the global sequence number for reading this file.
|
|
442
482
|
Status GetGlobalSequenceNumber(const TableProperties& table_properties,
|
|
443
483
|
SequenceNumber largest_seqno,
|
|
444
484
|
SequenceNumber* seqno) {
|
|
@@ -461,12 +501,17 @@ Status GetGlobalSequenceNumber(const TableProperties& table_properties,
|
|
|
461
501
|
}
|
|
462
502
|
|
|
463
503
|
uint32_t version = DecodeFixed32(version_pos->second.c_str());
|
|
464
|
-
if (version
|
|
465
|
-
|
|
466
|
-
|
|
504
|
+
if (version != 2) {
|
|
505
|
+
std::array<char, 200> msg_buf;
|
|
506
|
+
if (version != 1) {
|
|
507
|
+
snprintf(msg_buf.data(), msg_buf.max_size(),
|
|
508
|
+
"An external sst file has corrupted version %u.", version);
|
|
509
|
+
return Status::Corruption(msg_buf.data());
|
|
510
|
+
}
|
|
511
|
+
if (seqno_pos != props.end()) {
|
|
467
512
|
// This is a v1 external sst file, global_seqno is not supported.
|
|
468
513
|
snprintf(msg_buf.data(), msg_buf.max_size(),
|
|
469
|
-
"An external sst file with version %u
|
|
514
|
+
"An external sst file with version %u has global seqno "
|
|
470
515
|
"property with value %s",
|
|
471
516
|
version, seqno_pos->second.c_str());
|
|
472
517
|
return Status::Corruption(msg_buf.data());
|
|
@@ -594,6 +639,8 @@ Status BlockBasedTable::Open(
|
|
|
594
639
|
|
|
595
640
|
// From read_options, retain deadline, io_timeout, rate_limiter_priority, and
|
|
596
641
|
// verify_checksums. In future, we may retain more options.
|
|
642
|
+
// TODO: audit more ReadOptions and do this in a way that brings attention
|
|
643
|
+
// on new ReadOptions?
|
|
597
644
|
ReadOptions ro;
|
|
598
645
|
ro.deadline = read_options.deadline;
|
|
599
646
|
ro.io_timeout = read_options.io_timeout;
|
|
@@ -844,6 +891,10 @@ Status BlockBasedTable::PrefetchTail(
|
|
|
844
891
|
if (tail_size != 0) {
|
|
845
892
|
tail_prefetch_size = tail_size;
|
|
846
893
|
} else {
|
|
894
|
+
// Fallback for SST files, for which tail size is not recorded in the
|
|
895
|
+
// manifest. Eventually, this fallback might be removed, so it's
|
|
896
|
+
// better to make sure that such SST files get compacted.
|
|
897
|
+
// See https://github.com/facebook/rocksdb/issues/12664
|
|
847
898
|
if (tail_prefetch_stats != nullptr) {
|
|
848
899
|
// Multiple threads may get a 0 (no history) when running in parallel,
|
|
849
900
|
// but it will get cleared after the first of them finishes.
|
|
@@ -858,14 +909,15 @@ Status BlockBasedTable::PrefetchTail(
|
|
|
858
909
|
// properties, at which point we don't yet know the index type.
|
|
859
910
|
tail_prefetch_size = prefetch_all || preload_all ? 512 * 1024 : 4 * 1024;
|
|
860
911
|
|
|
861
|
-
ROCKS_LOG_WARN(logger,
|
|
862
|
-
"Tail prefetch size %zu is calculated based on heuristics",
|
|
863
|
-
tail_prefetch_size);
|
|
864
|
-
} else {
|
|
865
912
|
ROCKS_LOG_WARN(
|
|
866
913
|
logger,
|
|
867
|
-
"Tail prefetch size %zu is calculated based on
|
|
868
|
-
tail_prefetch_size);
|
|
914
|
+
"[%s] Tail prefetch size %zu is calculated based on heuristics.",
|
|
915
|
+
file->file_name().c_str(), tail_prefetch_size);
|
|
916
|
+
} else {
|
|
917
|
+
ROCKS_LOG_WARN(logger,
|
|
918
|
+
"[%s] Tail prefetch size %zu is calculated based on "
|
|
919
|
+
"TailPrefetchStats.",
|
|
920
|
+
file->file_name().c_str(), tail_prefetch_size);
|
|
869
921
|
}
|
|
870
922
|
}
|
|
871
923
|
size_t prefetch_off;
|
|
@@ -1521,9 +1573,8 @@ Status BlockBasedTable::LookupAndPinBlocksInCache(
|
|
|
1521
1573
|
Status s;
|
|
1522
1574
|
CachableEntry<UncompressionDict> uncompression_dict;
|
|
1523
1575
|
if (rep_->uncompression_dict_reader) {
|
|
1524
|
-
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
|
1525
1576
|
s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
|
|
1526
|
-
/* prefetch_buffer= */ nullptr, ro,
|
|
1577
|
+
/* prefetch_buffer= */ nullptr, ro,
|
|
1527
1578
|
/* get_context= */ nullptr, /* lookup_context= */ nullptr,
|
|
1528
1579
|
&uncompression_dict);
|
|
1529
1580
|
if (!s.ok()) {
|
|
@@ -1978,14 +2029,11 @@ bool BlockBasedTable::PrefixRangeMayMatch(
|
|
|
1978
2029
|
FilterBlockReader* const filter = rep_->filter.get();
|
|
1979
2030
|
*filter_checked = false;
|
|
1980
2031
|
if (filter != nullptr) {
|
|
1981
|
-
const bool no_io = read_options.read_tier == kBlockCacheTier;
|
|
1982
|
-
|
|
1983
2032
|
const Slice* const const_ikey_ptr = &internal_key;
|
|
1984
2033
|
may_match = filter->RangeMayExist(
|
|
1985
2034
|
read_options.iterate_upper_bound, user_key_without_ts, prefix_extractor,
|
|
1986
2035
|
rep_->internal_comparator.user_comparator(), const_ikey_ptr,
|
|
1987
|
-
filter_checked, need_upper_bound_check,
|
|
1988
|
-
read_options);
|
|
2036
|
+
filter_checked, need_upper_bound_check, lookup_context, read_options);
|
|
1989
2037
|
}
|
|
1990
2038
|
|
|
1991
2039
|
return may_match;
|
|
@@ -2065,7 +2113,7 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
|
|
|
2065
2113
|
}
|
|
2066
2114
|
|
|
2067
2115
|
bool BlockBasedTable::FullFilterKeyMayMatch(
|
|
2068
|
-
FilterBlockReader* filter, const Slice& internal_key,
|
|
2116
|
+
FilterBlockReader* filter, const Slice& internal_key,
|
|
2069
2117
|
const SliceTransform* prefix_extractor, GetContext* get_context,
|
|
2070
2118
|
BlockCacheLookupContext* lookup_context,
|
|
2071
2119
|
const ReadOptions& read_options) const {
|
|
@@ -2078,7 +2126,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
|
|
|
2078
2126
|
size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size();
|
|
2079
2127
|
Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz);
|
|
2080
2128
|
if (rep_->whole_key_filtering) {
|
|
2081
|
-
may_match = filter->KeyMayMatch(user_key_without_ts,
|
|
2129
|
+
may_match = filter->KeyMayMatch(user_key_without_ts, const_ikey_ptr,
|
|
2082
2130
|
get_context, lookup_context, read_options);
|
|
2083
2131
|
if (may_match) {
|
|
2084
2132
|
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE);
|
|
@@ -2092,7 +2140,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
|
|
|
2092
2140
|
// FIXME ^^^: there should be no reason for Get() to depend on current
|
|
2093
2141
|
// prefix_extractor at all. It should always use table_prefix_extractor.
|
|
2094
2142
|
may_match = filter->PrefixMayMatch(
|
|
2095
|
-
prefix_extractor->Transform(user_key_without_ts),
|
|
2143
|
+
prefix_extractor->Transform(user_key_without_ts), const_ikey_ptr,
|
|
2096
2144
|
get_context, lookup_context, read_options);
|
|
2097
2145
|
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED);
|
|
2098
2146
|
if (may_match) {
|
|
@@ -2108,7 +2156,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
|
|
|
2108
2156
|
}
|
|
2109
2157
|
|
|
2110
2158
|
void BlockBasedTable::FullFilterKeysMayMatch(
|
|
2111
|
-
FilterBlockReader* filter, MultiGetRange* range,
|
|
2159
|
+
FilterBlockReader* filter, MultiGetRange* range,
|
|
2112
2160
|
const SliceTransform* prefix_extractor,
|
|
2113
2161
|
BlockCacheLookupContext* lookup_context,
|
|
2114
2162
|
const ReadOptions& read_options) const {
|
|
@@ -2118,7 +2166,7 @@ void BlockBasedTable::FullFilterKeysMayMatch(
|
|
|
2118
2166
|
uint64_t before_keys = range->KeysLeft();
|
|
2119
2167
|
assert(before_keys > 0); // Caller should ensure
|
|
2120
2168
|
if (rep_->whole_key_filtering) {
|
|
2121
|
-
filter->KeysMayMatch(range,
|
|
2169
|
+
filter->KeysMayMatch(range, lookup_context, read_options);
|
|
2122
2170
|
uint64_t after_keys = range->KeysLeft();
|
|
2123
2171
|
if (after_keys) {
|
|
2124
2172
|
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE, after_keys);
|
|
@@ -2134,7 +2182,7 @@ void BlockBasedTable::FullFilterKeysMayMatch(
|
|
|
2134
2182
|
} else if (!PrefixExtractorChanged(prefix_extractor)) {
|
|
2135
2183
|
// FIXME ^^^: there should be no reason for MultiGet() to depend on current
|
|
2136
2184
|
// prefix_extractor at all. It should always use table_prefix_extractor.
|
|
2137
|
-
filter->PrefixesMayMatch(range, prefix_extractor,
|
|
2185
|
+
filter->PrefixesMayMatch(range, prefix_extractor, lookup_context,
|
|
2138
2186
|
read_options);
|
|
2139
2187
|
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys);
|
|
2140
2188
|
uint64_t after_keys = range->KeysLeft();
|
|
@@ -2240,7 +2288,6 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|
|
2240
2288
|
assert(key.size() >= 8); // key must be internal key
|
|
2241
2289
|
assert(get_context != nullptr);
|
|
2242
2290
|
Status s;
|
|
2243
|
-
const bool no_io = read_options.read_tier == kBlockCacheTier;
|
|
2244
2291
|
|
|
2245
2292
|
FilterBlockReader* const filter =
|
|
2246
2293
|
!skip_filters ? rep_->filter.get() : nullptr;
|
|
@@ -2259,7 +2306,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|
|
2259
2306
|
}
|
|
2260
2307
|
TEST_SYNC_POINT("BlockBasedTable::Get:BeforeFilterMatch");
|
|
2261
2308
|
const bool may_match =
|
|
2262
|
-
FullFilterKeyMayMatch(filter, key,
|
|
2309
|
+
FullFilterKeyMayMatch(filter, key, prefix_extractor, get_context,
|
|
2263
2310
|
&lookup_context, read_options);
|
|
2264
2311
|
TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch");
|
|
2265
2312
|
if (may_match) {
|
|
@@ -2309,7 +2356,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|
|
2309
2356
|
/*for_compaction=*/false, /*async_read=*/false, tmp_status,
|
|
2310
2357
|
/*use_block_cache_for_lookup=*/true);
|
|
2311
2358
|
|
|
2312
|
-
if (
|
|
2359
|
+
if (read_options.read_tier == kBlockCacheTier &&
|
|
2360
|
+
biter.status().IsIncomplete()) {
|
|
2313
2361
|
// couldn't get block from block_cache
|
|
2314
2362
|
// Update Saver.state to Found because we are only looking for
|
|
2315
2363
|
// whether we can guarantee the key is not there when "no_io" is set
|
|
@@ -2421,7 +2469,6 @@ Status BlockBasedTable::MultiGetFilter(const ReadOptions& read_options,
|
|
|
2421
2469
|
|
|
2422
2470
|
// First check the full filter
|
|
2423
2471
|
// If full filter not useful, Then go into each block
|
|
2424
|
-
const bool no_io = read_options.read_tier == kBlockCacheTier;
|
|
2425
2472
|
uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
|
|
2426
2473
|
if (mget_range->begin()->get_context) {
|
|
2427
2474
|
tracing_mget_id = mget_range->begin()->get_context->get_tracing_get_id();
|
|
@@ -2429,8 +2476,8 @@ Status BlockBasedTable::MultiGetFilter(const ReadOptions& read_options,
|
|
|
2429
2476
|
BlockCacheLookupContext lookup_context{
|
|
2430
2477
|
TableReaderCaller::kUserMultiGet, tracing_mget_id,
|
|
2431
2478
|
/*_get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
|
|
2432
|
-
FullFilterKeysMayMatch(filter, mget_range,
|
|
2433
|
-
|
|
2479
|
+
FullFilterKeysMayMatch(filter, mget_range, prefix_extractor, &lookup_context,
|
|
2480
|
+
read_options);
|
|
2434
2481
|
|
|
2435
2482
|
return Status::OK();
|
|
2436
2483
|
}
|
|
@@ -2663,6 +2710,24 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks(
|
|
|
2663
2710
|
return s;
|
|
2664
2711
|
}
|
|
2665
2712
|
|
|
2713
|
+
bool BlockBasedTable::EraseFromCache(const BlockHandle& handle) const {
|
|
2714
|
+
assert(rep_ != nullptr);
|
|
2715
|
+
|
|
2716
|
+
Cache* const cache = rep_->table_options.block_cache.get();
|
|
2717
|
+
if (cache == nullptr) {
|
|
2718
|
+
return false;
|
|
2719
|
+
}
|
|
2720
|
+
|
|
2721
|
+
CacheKey key = GetCacheKey(rep_->base_cache_key, handle);
|
|
2722
|
+
|
|
2723
|
+
Cache::Handle* const cache_handle = cache->Lookup(key.AsSlice());
|
|
2724
|
+
if (cache_handle == nullptr) {
|
|
2725
|
+
return false;
|
|
2726
|
+
}
|
|
2727
|
+
|
|
2728
|
+
return cache->Release(cache_handle, /*erase_if_last_ref=*/true);
|
|
2729
|
+
}
|
|
2730
|
+
|
|
2666
2731
|
bool BlockBasedTable::TEST_BlockInCache(const BlockHandle& handle) const {
|
|
2667
2732
|
assert(rep_ != nullptr);
|
|
2668
2733
|
|
|
@@ -2796,11 +2861,8 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const ReadOptions& read_options,
|
|
|
2796
2861
|
|
|
2797
2862
|
BlockCacheLookupContext context(caller);
|
|
2798
2863
|
IndexBlockIter iiter_on_stack;
|
|
2799
|
-
ReadOptions ro;
|
|
2800
|
-
ro.total_order_seek = true;
|
|
2801
|
-
ro.io_activity = read_options.io_activity;
|
|
2802
2864
|
auto index_iter =
|
|
2803
|
-
NewIndexIterator(
|
|
2865
|
+
NewIndexIterator(read_options, /*disable_prefix_seek=*/true,
|
|
2804
2866
|
/*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
|
|
2805
2867
|
/*lookup_context=*/&context);
|
|
2806
2868
|
std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
|
|
@@ -2843,11 +2905,8 @@ uint64_t BlockBasedTable::ApproximateSize(const ReadOptions& read_options,
|
|
|
2843
2905
|
|
|
2844
2906
|
BlockCacheLookupContext context(caller);
|
|
2845
2907
|
IndexBlockIter iiter_on_stack;
|
|
2846
|
-
ReadOptions ro;
|
|
2847
|
-
ro.total_order_seek = true;
|
|
2848
|
-
ro.io_activity = read_options.io_activity;
|
|
2849
2908
|
auto index_iter =
|
|
2850
|
-
NewIndexIterator(
|
|
2909
|
+
NewIndexIterator(read_options, /*disable_prefix_seek=*/true,
|
|
2851
2910
|
/*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
|
|
2852
2911
|
/*lookup_context=*/&context);
|
|
2853
2912
|
std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
|
|
@@ -3023,10 +3082,8 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
|
|
|
3023
3082
|
if (rep_->uncompression_dict_reader) {
|
|
3024
3083
|
CachableEntry<UncompressionDict> uncompression_dict;
|
|
3025
3084
|
s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
|
|
3026
|
-
nullptr /* prefetch_buffer */, ro,
|
|
3027
|
-
|
|
3028
|
-
nullptr /* get_context */, nullptr /* lookup_context */,
|
|
3029
|
-
&uncompression_dict);
|
|
3085
|
+
nullptr /* prefetch_buffer */, ro, nullptr /* get_context */,
|
|
3086
|
+
nullptr /* lookup_context */, &uncompression_dict);
|
|
3030
3087
|
if (!s.ok()) {
|
|
3031
3088
|
return s;
|
|
3032
3089
|
}
|
|
@@ -3232,4 +3289,8 @@ void BlockBasedTable::DumpKeyValue(const Slice& key, const Slice& value,
|
|
|
3232
3289
|
out_stream << " ------\n";
|
|
3233
3290
|
}
|
|
3234
3291
|
|
|
3292
|
+
void BlockBasedTable::MarkObsolete(uint32_t uncache_aggressiveness) {
|
|
3293
|
+
rep_->uncache_aggressiveness.StoreRelaxed(uncache_aggressiveness);
|
|
3294
|
+
}
|
|
3295
|
+
|
|
3235
3296
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
#include "table/table_reader.h"
|
|
34
34
|
#include "table/two_level_iterator.h"
|
|
35
35
|
#include "trace_replay/block_cache_tracer.h"
|
|
36
|
+
#include "util/atomic.h"
|
|
36
37
|
#include "util/coro_utils.h"
|
|
37
38
|
#include "util/hash_containers.h"
|
|
38
39
|
|
|
@@ -183,6 +184,8 @@ class BlockBasedTable : public TableReader {
|
|
|
183
184
|
Status ApproximateKeyAnchors(const ReadOptions& read_options,
|
|
184
185
|
std::vector<Anchor>& anchors) override;
|
|
185
186
|
|
|
187
|
+
bool EraseFromCache(const BlockHandle& handle) const;
|
|
188
|
+
|
|
186
189
|
bool TEST_BlockInCache(const BlockHandle& handle) const;
|
|
187
190
|
|
|
188
191
|
// Returns true if the block for the specified key is in cache.
|
|
@@ -208,6 +211,8 @@ class BlockBasedTable : public TableReader {
|
|
|
208
211
|
Status VerifyChecksum(const ReadOptions& readOptions,
|
|
209
212
|
TableReaderCaller caller) override;
|
|
210
213
|
|
|
214
|
+
void MarkObsolete(uint32_t uncache_aggressiveness) override;
|
|
215
|
+
|
|
211
216
|
~BlockBasedTable();
|
|
212
217
|
|
|
213
218
|
bool TEST_FilterBlockInCache() const;
|
|
@@ -241,6 +246,8 @@ class BlockBasedTable : public TableReader {
|
|
|
241
246
|
FilePrefetchBuffer* /* tail_prefetch_buffer */) {
|
|
242
247
|
return Status::OK();
|
|
243
248
|
}
|
|
249
|
+
virtual void EraseFromCacheBeforeDestruction(
|
|
250
|
+
uint32_t /*uncache_aggressiveness*/) {}
|
|
244
251
|
};
|
|
245
252
|
|
|
246
253
|
class IndexReaderCommon;
|
|
@@ -462,14 +469,12 @@ class BlockBasedTable : public TableReader {
|
|
|
462
469
|
std::unique_ptr<IndexReader>* index_reader);
|
|
463
470
|
|
|
464
471
|
bool FullFilterKeyMayMatch(FilterBlockReader* filter, const Slice& user_key,
|
|
465
|
-
const bool no_io,
|
|
466
472
|
const SliceTransform* prefix_extractor,
|
|
467
473
|
GetContext* get_context,
|
|
468
474
|
BlockCacheLookupContext* lookup_context,
|
|
469
475
|
const ReadOptions& read_options) const;
|
|
470
476
|
|
|
471
477
|
void FullFilterKeysMayMatch(FilterBlockReader* filter, MultiGetRange* range,
|
|
472
|
-
const bool no_io,
|
|
473
478
|
const SliceTransform* prefix_extractor,
|
|
474
479
|
BlockCacheLookupContext* lookup_context,
|
|
475
480
|
const ReadOptions& read_options) const;
|
|
@@ -619,11 +624,7 @@ struct BlockBasedTable::Rep {
|
|
|
619
624
|
|
|
620
625
|
std::shared_ptr<FragmentedRangeTombstoneList> fragmented_range_dels;
|
|
621
626
|
|
|
622
|
-
//
|
|
623
|
-
// If true, data blocks in this file are definitely ZSTD compressed. If false
|
|
624
|
-
// they might not be. When false we skip creating a ZSTD digested
|
|
625
|
-
// uncompression dictionary. Even if we get a false negative, things should
|
|
626
|
-
// still work, just not as quickly.
|
|
627
|
+
// Context for block cache CreateCallback
|
|
627
628
|
BlockCreateContext create_context;
|
|
628
629
|
|
|
629
630
|
// If global_seqno is used, all Keys in this file will have the same
|
|
@@ -672,6 +673,13 @@ struct BlockBasedTable::Rep {
|
|
|
672
673
|
// `end_key` for range deletion entries.
|
|
673
674
|
const bool user_defined_timestamps_persisted;
|
|
674
675
|
|
|
676
|
+
// Set to >0 when the file is known to be obsolete and should have its block
|
|
677
|
+
// cache entries evicted on close. NOTE: when the file becomes obsolete,
|
|
678
|
+
// there could be multiple table cache references that all mark this file as
|
|
679
|
+
// obsolete. An atomic resolves the race quite reasonably. Even in the rare
|
|
680
|
+
// case of such a race, they will most likely be storing the same value.
|
|
681
|
+
RelaxedAtomic<uint32_t> uncache_aggressiveness{0};
|
|
682
|
+
|
|
675
683
|
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
|
|
676
684
|
table_reader_cache_res_handle = nullptr;
|
|
677
685
|
|