@nxtedition/rocksdb 7.1.14 → 7.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +1 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +72 -18
- package/deps/rocksdb/rocksdb/Makefile +91 -11
- package/deps/rocksdb/rocksdb/TARGETS +8 -4
- package/deps/rocksdb/rocksdb/cache/cache.cc +5 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +13 -8
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +116 -57
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +958 -459
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +407 -622
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +104 -40
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +23 -8
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +350 -184
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +12 -2
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +2 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +130 -43
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +24 -2
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +423 -98
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +19 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +10 -7
- package/deps/rocksdb/rocksdb/crash_test.mk +2 -2
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +46 -26
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +9 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +90 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +56 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -10
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +64 -59
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +11 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +92 -62
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +159 -136
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -13
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +129 -57
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +81 -3
- package/deps/rocksdb/rocksdb/db/c.cc +29 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +10 -1
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +21 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -36
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +344 -102
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +163 -28
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +52 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +35 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +167 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +0 -117
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +6 -49
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -4
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +18 -11
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +4 -10
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +12 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -93
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +28 -32
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +11 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +76 -138
- package/deps/rocksdb/rocksdb/db/db_iter.h +26 -23
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +931 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +44 -22
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -14
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +155 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +45 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -1
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +5 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +24 -12
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +7 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +3 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +79 -18
- package/deps/rocksdb/rocksdb/db/memtable.h +5 -0
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +26 -4
- package/deps/rocksdb/rocksdb/db/memtable_list.h +2 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +113 -0
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +110 -0
- package/deps/rocksdb/rocksdb/db/{periodic_work_scheduler_test.cc → periodic_task_scheduler_test.cc} +33 -39
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +12 -20
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +6 -5
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +12 -8
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +20 -5
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +14 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +17 -8
- package/deps/rocksdb/rocksdb/db/repair_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +49 -66
- package/deps/rocksdb/rocksdb/db/table_cache.cc +92 -63
- package/deps/rocksdb/rocksdb/db/table_cache.h +16 -9
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +2 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -3
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_set.cc +379 -145
- package/deps/rocksdb/rocksdb/db/version_set.h +26 -24
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +9 -9
- package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +10 -2
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +5 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +71 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +14 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +23 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +26 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +105 -34
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +16 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +4 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +282 -25
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
- package/deps/rocksdb/rocksdb/env/io_posix.cc +3 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +367 -177
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +144 -56
- package/deps/rocksdb/rocksdb/file/filename.cc +3 -3
- package/deps/rocksdb/rocksdb/file/filename.h +4 -2
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +415 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +2 -0
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +36 -45
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +21 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +11 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +15 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +163 -68
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +23 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +21 -17
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +17 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +2 -1
- package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -2
- package/deps/rocksdb/rocksdb/monitoring/histogram.cc +4 -2
- package/deps/rocksdb/rocksdb/monitoring/histogram.h +2 -0
- package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +15 -1
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +17 -0
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +14 -3
- package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +3 -0
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +50 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +31 -32
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options.cc +2 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -1
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -0
- package/deps/rocksdb/rocksdb/src.mk +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block.h +9 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +110 -99
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +12 -10
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +11 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +138 -83
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +25 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +31 -30
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -13
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +4 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +17 -19
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
- package/deps/rocksdb/rocksdb/table/format.cc +26 -29
- package/deps/rocksdb/rocksdb/table/format.h +44 -26
- package/deps/rocksdb/rocksdb/table/get_context.cc +17 -12
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +7 -0
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +4 -0
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +950 -104
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +28 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +3 -2
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -1
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +10 -9
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +22 -20
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +1 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +9 -21
- package/deps/rocksdb/rocksdb/table/table_test.cc +12 -12
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +4 -4
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +1 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +116 -34
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +6 -1
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/util/autovector.h +12 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +3 -2
- package/deps/rocksdb/rocksdb/util/stderr_logger.cc +30 -0
- package/deps/rocksdb/rocksdb/util/stderr_logger.h +5 -18
- package/deps/rocksdb/rocksdb/util/timer.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +9 -2
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +34 -53
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +9 -14
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +3 -1
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +26 -8
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +114 -16
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +59 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +39 -0
- package/deps/rocksdb/rocksdb.gyp +0 -1
- package/index.js +6 -10
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +0 -168
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +0 -90
|
@@ -511,6 +511,7 @@ class FilePickerMultiGet {
|
|
|
511
511
|
MultiGetRange& GetRange() { return range_; }
|
|
512
512
|
|
|
513
513
|
void ReplaceRange(const MultiGetRange& other) {
|
|
514
|
+
assert(hit_file_ == nullptr);
|
|
514
515
|
range_ = other;
|
|
515
516
|
current_level_range_ = other;
|
|
516
517
|
}
|
|
@@ -940,17 +941,18 @@ namespace {
|
|
|
940
941
|
class LevelIterator final : public InternalIterator {
|
|
941
942
|
public:
|
|
942
943
|
// @param read_options Must outlive this iterator.
|
|
943
|
-
LevelIterator(
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
944
|
+
LevelIterator(
|
|
945
|
+
TableCache* table_cache, const ReadOptions& read_options,
|
|
946
|
+
const FileOptions& file_options, const InternalKeyComparator& icomparator,
|
|
947
|
+
const LevelFilesBrief* flevel,
|
|
948
|
+
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
|
949
|
+
bool should_sample, HistogramImpl* file_read_hist,
|
|
950
|
+
TableReaderCaller caller, bool skip_filters, int level,
|
|
951
|
+
RangeDelAggregator* range_del_agg,
|
|
952
|
+
const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
|
|
953
|
+
nullptr,
|
|
954
|
+
bool allow_unprepared_value = false,
|
|
955
|
+
TruncatedRangeDelIterator**** range_tombstone_iter_ptr_ = nullptr)
|
|
954
956
|
: table_cache_(table_cache),
|
|
955
957
|
read_options_(read_options),
|
|
956
958
|
file_options_(file_options),
|
|
@@ -968,13 +970,23 @@ class LevelIterator final : public InternalIterator {
|
|
|
968
970
|
range_del_agg_(range_del_agg),
|
|
969
971
|
pinned_iters_mgr_(nullptr),
|
|
970
972
|
compaction_boundaries_(compaction_boundaries),
|
|
971
|
-
is_next_read_sequential_(false)
|
|
973
|
+
is_next_read_sequential_(false),
|
|
974
|
+
range_tombstone_iter_(nullptr),
|
|
975
|
+
to_return_sentinel_(false) {
|
|
972
976
|
// Empty level is not supported.
|
|
973
977
|
assert(flevel_ != nullptr && flevel_->num_files > 0);
|
|
978
|
+
if (range_tombstone_iter_ptr_) {
|
|
979
|
+
*range_tombstone_iter_ptr_ = &range_tombstone_iter_;
|
|
980
|
+
}
|
|
974
981
|
}
|
|
975
982
|
|
|
976
983
|
~LevelIterator() override { delete file_iter_.Set(nullptr); }
|
|
977
984
|
|
|
985
|
+
// Seek to the first file with a key >= target.
|
|
986
|
+
// If range_tombstone_iter_ is not nullptr, then we pretend that file
|
|
987
|
+
// boundaries are fake keys (sentinel keys). These keys are used to keep range
|
|
988
|
+
// tombstones alive even when all point keys in an SST file are exhausted.
|
|
989
|
+
// These sentinel keys will be skipped in merging iterator.
|
|
978
990
|
void Seek(const Slice& target) override;
|
|
979
991
|
void SeekForPrev(const Slice& target) override;
|
|
980
992
|
void SeekToFirst() override;
|
|
@@ -983,14 +995,29 @@ class LevelIterator final : public InternalIterator {
|
|
|
983
995
|
bool NextAndGetResult(IterateResult* result) override;
|
|
984
996
|
void Prev() override;
|
|
985
997
|
|
|
986
|
-
|
|
998
|
+
// In addition to valid and invalid state (!file_iter.Valid() and
|
|
999
|
+
// status.ok()), a third state of the iterator is when !file_iter_.Valid() and
|
|
1000
|
+
// to_return_sentinel_. This means we are at the end of a file, and a sentinel
|
|
1001
|
+
// key (the file boundary that we pretend as a key) is to be returned next.
|
|
1002
|
+
// file_iter_.Valid() and to_return_sentinel_ should not both be true.
|
|
1003
|
+
bool Valid() const override {
|
|
1004
|
+
assert(!(file_iter_.Valid() && to_return_sentinel_));
|
|
1005
|
+
return file_iter_.Valid() || to_return_sentinel_;
|
|
1006
|
+
}
|
|
987
1007
|
Slice key() const override {
|
|
988
1008
|
assert(Valid());
|
|
1009
|
+
if (to_return_sentinel_) {
|
|
1010
|
+
// Sentinel should be returned after file_iter_ reaches the end of the
|
|
1011
|
+
// file
|
|
1012
|
+
assert(!file_iter_.Valid());
|
|
1013
|
+
return sentinel_;
|
|
1014
|
+
}
|
|
989
1015
|
return file_iter_.key();
|
|
990
1016
|
}
|
|
991
1017
|
|
|
992
1018
|
Slice value() const override {
|
|
993
1019
|
assert(Valid());
|
|
1020
|
+
assert(!to_return_sentinel_);
|
|
994
1021
|
return file_iter_.value();
|
|
995
1022
|
}
|
|
996
1023
|
|
|
@@ -1032,6 +1059,8 @@ class LevelIterator final : public InternalIterator {
|
|
|
1032
1059
|
file_iter_.iter() && file_iter_.IsValuePinned();
|
|
1033
1060
|
}
|
|
1034
1061
|
|
|
1062
|
+
bool IsDeleteRangeSentinelKey() const override { return to_return_sentinel_; }
|
|
1063
|
+
|
|
1035
1064
|
private:
|
|
1036
1065
|
// Return true if at least one invalid file is seen and skipped.
|
|
1037
1066
|
bool SkipEmptyFileForward();
|
|
@@ -1044,6 +1073,11 @@ class LevelIterator final : public InternalIterator {
|
|
|
1044
1073
|
return flevel_->files[file_index].smallest_key;
|
|
1045
1074
|
}
|
|
1046
1075
|
|
|
1076
|
+
const Slice& file_largest_key(size_t file_index) {
|
|
1077
|
+
assert(file_index < flevel_->num_files);
|
|
1078
|
+
return flevel_->files[file_index].largest_key;
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1047
1081
|
bool KeyReachedUpperBound(const Slice& internal_key) {
|
|
1048
1082
|
return read_options_.iterate_upper_bound != nullptr &&
|
|
1049
1083
|
user_comparator_.CompareWithoutTimestamp(
|
|
@@ -1051,6 +1085,16 @@ class LevelIterator final : public InternalIterator {
|
|
|
1051
1085
|
*read_options_.iterate_upper_bound, /*b_has_ts=*/false) >= 0;
|
|
1052
1086
|
}
|
|
1053
1087
|
|
|
1088
|
+
void ClearRangeTombstoneIter() {
|
|
1089
|
+
if (range_tombstone_iter_ && *range_tombstone_iter_) {
|
|
1090
|
+
delete *range_tombstone_iter_;
|
|
1091
|
+
*range_tombstone_iter_ = nullptr;
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
// Move file_iter_ to the file at file_index_.
|
|
1096
|
+
// range_tombstone_iter_ is updated with a range tombstone iterator
|
|
1097
|
+
// into the new file. Old range tombstone iterator is cleared.
|
|
1054
1098
|
InternalIterator* NewFileIterator() {
|
|
1055
1099
|
assert(file_index_ < flevel_->num_files);
|
|
1056
1100
|
auto file_meta = flevel_->files[file_index_];
|
|
@@ -1065,13 +1109,14 @@ class LevelIterator final : public InternalIterator {
|
|
|
1065
1109
|
largest_compaction_key = (*compaction_boundaries_)[file_index_].largest;
|
|
1066
1110
|
}
|
|
1067
1111
|
CheckMayBeOutOfLowerBound();
|
|
1112
|
+
ClearRangeTombstoneIter();
|
|
1068
1113
|
return table_cache_->NewIterator(
|
|
1069
1114
|
read_options_, file_options_, icomparator_, *file_meta.file_metadata,
|
|
1070
1115
|
range_del_agg_, prefix_extractor_,
|
|
1071
1116
|
nullptr /* don't need reference to table */, file_read_hist_, caller_,
|
|
1072
1117
|
/*arena=*/nullptr, skip_filters_, level_,
|
|
1073
1118
|
/*max_file_size_for_l0_meta_pin=*/0, smallest_compaction_key,
|
|
1074
|
-
largest_compaction_key, allow_unprepared_value_);
|
|
1119
|
+
largest_compaction_key, allow_unprepared_value_, range_tombstone_iter_);
|
|
1075
1120
|
}
|
|
1076
1121
|
|
|
1077
1122
|
// Check if current file being fully within iterate_lower_bound.
|
|
@@ -1117,9 +1162,51 @@ class LevelIterator final : public InternalIterator {
|
|
|
1117
1162
|
const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries_;
|
|
1118
1163
|
|
|
1119
1164
|
bool is_next_read_sequential_;
|
|
1165
|
+
|
|
1166
|
+
// This is set when this level iterator is used under a merging iterator
|
|
1167
|
+
// that processes range tombstones. range_tombstone_iter_ points to where the
|
|
1168
|
+
// merging iterator stores the range tombstones iterator for this level. When
|
|
1169
|
+
// this level iterator moves to a new SST file, it updates the range
|
|
1170
|
+
// tombstones accordingly through this pointer. So the merging iterator always
|
|
1171
|
+
// has access to the current SST file's range tombstones.
|
|
1172
|
+
//
|
|
1173
|
+
// The level iterator treats file boundary as fake keys (sentinel keys) to
|
|
1174
|
+
// keep range tombstones alive if needed and make upper level, i.e. merging
|
|
1175
|
+
// iterator, aware of file changes (when level iterator moves to a new SST
|
|
1176
|
+
// file, there is some bookkeeping work that needs to be done at merging
|
|
1177
|
+
// iterator end).
|
|
1178
|
+
//
|
|
1179
|
+
// *range_tombstone_iter_ points to range tombstones of the current SST file
|
|
1180
|
+
TruncatedRangeDelIterator** range_tombstone_iter_;
|
|
1181
|
+
|
|
1182
|
+
// Whether next/prev key is a sentinel key.
|
|
1183
|
+
bool to_return_sentinel_ = false;
|
|
1184
|
+
// The sentinel key to be returned
|
|
1185
|
+
Slice sentinel_;
|
|
1186
|
+
// Sets flags for if we should return the sentinel key next.
|
|
1187
|
+
// The condition for returning sentinel is reaching the end of current
|
|
1188
|
+
// file_iter_: !Valid() && status.().ok().
|
|
1189
|
+
void TrySetDeleteRangeSentinel(const Slice& boundary_key);
|
|
1190
|
+
void ClearSentinel() { to_return_sentinel_ = false; }
|
|
1191
|
+
|
|
1192
|
+
// Set in Seek() when a prefix seek reaches end of the current file,
|
|
1193
|
+
// and the next file has a different prefix. SkipEmptyFileForward()
|
|
1194
|
+
// will not move to next file when this flag is set.
|
|
1195
|
+
bool prefix_exhausted_ = false;
|
|
1120
1196
|
};
|
|
1121
1197
|
|
|
1198
|
+
void LevelIterator::TrySetDeleteRangeSentinel(const Slice& boundary_key) {
|
|
1199
|
+
assert(range_tombstone_iter_);
|
|
1200
|
+
if (file_iter_.iter() != nullptr && !file_iter_.Valid() &&
|
|
1201
|
+
file_iter_.status().ok()) {
|
|
1202
|
+
to_return_sentinel_ = true;
|
|
1203
|
+
sentinel_ = boundary_key;
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1122
1207
|
void LevelIterator::Seek(const Slice& target) {
|
|
1208
|
+
prefix_exhausted_ = false;
|
|
1209
|
+
ClearSentinel();
|
|
1123
1210
|
// Check whether the seek key fall under the same file
|
|
1124
1211
|
bool need_to_reseek = true;
|
|
1125
1212
|
if (file_iter_.iter() != nullptr && file_index_ < flevel_->num_files) {
|
|
@@ -1148,44 +1235,82 @@ void LevelIterator::Seek(const Slice& target) {
|
|
|
1148
1235
|
if (file_iter_.status() == Status::TryAgain()) {
|
|
1149
1236
|
return;
|
|
1150
1237
|
}
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
(
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1238
|
+
if (!file_iter_.Valid() && file_iter_.status().ok() &&
|
|
1239
|
+
prefix_extractor_ != nullptr && !read_options_.total_order_seek &&
|
|
1240
|
+
!read_options_.auto_prefix_mode &&
|
|
1241
|
+
file_index_ < flevel_->num_files - 1) {
|
|
1242
|
+
size_t ts_sz = user_comparator_.timestamp_size();
|
|
1243
|
+
Slice target_user_key_without_ts =
|
|
1244
|
+
ExtractUserKeyAndStripTimestamp(target, ts_sz);
|
|
1245
|
+
Slice next_file_first_user_key_without_ts =
|
|
1246
|
+
ExtractUserKeyAndStripTimestamp(file_smallest_key(file_index_ + 1),
|
|
1247
|
+
ts_sz);
|
|
1248
|
+
if (prefix_extractor_->InDomain(target_user_key_without_ts) &&
|
|
1249
|
+
(!prefix_extractor_->InDomain(next_file_first_user_key_without_ts) ||
|
|
1250
|
+
user_comparator_.CompareWithoutTimestamp(
|
|
1251
|
+
prefix_extractor_->Transform(target_user_key_without_ts), false,
|
|
1252
|
+
prefix_extractor_->Transform(
|
|
1253
|
+
next_file_first_user_key_without_ts),
|
|
1254
|
+
false) != 0)) {
|
|
1255
|
+
// SkipEmptyFileForward() will not advance to next file when this flag
|
|
1256
|
+
// is set for reason detailed below.
|
|
1257
|
+
//
|
|
1258
|
+
// The file we initially positioned to has no keys under the target
|
|
1259
|
+
// prefix, and the next file's smallest key has a different prefix than
|
|
1260
|
+
// target. When doing prefix iterator seek, when keys for one prefix
|
|
1261
|
+
// have been exhausted, it can jump to any key that is larger. Here we
|
|
1262
|
+
// are enforcing a stricter contract than that, in order to make it
|
|
1263
|
+
// easier for higher layers (merging and DB iterator) to reason the
|
|
1264
|
+
// correctness:
|
|
1265
|
+
// 1. Within the prefix, the result should be accurate.
|
|
1266
|
+
// 2. If keys for the prefix is exhausted, it is either positioned to
|
|
1267
|
+
// the next key after the prefix, or make the iterator invalid.
|
|
1268
|
+
// A side benefit will be that it invalidates the iterator earlier so
|
|
1269
|
+
// that the upper level merging iterator can merge fewer child
|
|
1270
|
+
// iterators.
|
|
1271
|
+
//
|
|
1272
|
+
// The flag is cleared in Seek*() calls. There is no need to clear the
|
|
1273
|
+
// flag in Prev() since Prev() will not be called when the flag is set
|
|
1274
|
+
// for reasons explained below. If range_tombstone_iter_ is nullptr,
|
|
1275
|
+
// then there is no file boundary sentinel key. Since
|
|
1276
|
+
// !file_iter_.Valid() from the if condition above, this level iterator
|
|
1277
|
+
// is !Valid(), so Prev() will not be called. If range_tombstone_iter_
|
|
1278
|
+
// is not nullptr, there are two cases depending on if this level
|
|
1279
|
+
// iterator reaches top of the heap in merging iterator (the upper
|
|
1280
|
+
// layer).
|
|
1281
|
+
// If so, merging iterator will see the sentinel key, call
|
|
1282
|
+
// NextAndGetResult() and the call to NextAndGetResult() will skip the
|
|
1283
|
+
// sentinel key and makes this level iterator invalid. If not, then it
|
|
1284
|
+
// could be because the upper layer is done before any method of this
|
|
1285
|
+
// level iterator is called or another Seek*() call is invoked. Either
|
|
1286
|
+
// way, Prev() is never called before Seek*().
|
|
1287
|
+
// The flag should not be cleared at the beginning of
|
|
1288
|
+
// Next/NextAndGetResult() since it is used in SkipEmptyFileForward()
|
|
1289
|
+
// called in Next/NextAndGetResult().
|
|
1290
|
+
prefix_exhausted_ = true;
|
|
1291
|
+
}
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
if (range_tombstone_iter_) {
|
|
1295
|
+
TrySetDeleteRangeSentinel(file_largest_key(file_index_));
|
|
1182
1296
|
}
|
|
1183
1297
|
}
|
|
1298
|
+
SkipEmptyFileForward();
|
|
1184
1299
|
CheckMayBeOutOfLowerBound();
|
|
1185
1300
|
}
|
|
1186
1301
|
|
|
1187
1302
|
void LevelIterator::SeekForPrev(const Slice& target) {
|
|
1303
|
+
prefix_exhausted_ = false;
|
|
1304
|
+
ClearSentinel();
|
|
1188
1305
|
size_t new_file_index = FindFile(icomparator_, *flevel_, target);
|
|
1306
|
+
// Seek beyond this level's smallest key
|
|
1307
|
+
if (new_file_index == 0 &&
|
|
1308
|
+
icomparator_.Compare(target, file_smallest_key(0)) < 0) {
|
|
1309
|
+
SetFileIterator(nullptr);
|
|
1310
|
+
ClearRangeTombstoneIter();
|
|
1311
|
+
CheckMayBeOutOfLowerBound();
|
|
1312
|
+
return;
|
|
1313
|
+
}
|
|
1189
1314
|
if (new_file_index >= flevel_->num_files) {
|
|
1190
1315
|
new_file_index = flevel_->num_files - 1;
|
|
1191
1316
|
}
|
|
@@ -1193,24 +1318,47 @@ void LevelIterator::SeekForPrev(const Slice& target) {
|
|
|
1193
1318
|
InitFileIterator(new_file_index);
|
|
1194
1319
|
if (file_iter_.iter() != nullptr) {
|
|
1195
1320
|
file_iter_.SeekForPrev(target);
|
|
1321
|
+
if (range_tombstone_iter_ &&
|
|
1322
|
+
icomparator_.Compare(target, file_smallest_key(file_index_)) >= 0) {
|
|
1323
|
+
// In SeekForPrev() case, it is possible that the target is less than
|
|
1324
|
+
// file's lower boundary since largest key is used to determine file index
|
|
1325
|
+
// (FindFile()). When target is less than file's lower boundary, sentinel
|
|
1326
|
+
// key should not be set so that SeekForPrev() does not result in a key
|
|
1327
|
+
// larger than target. This is correct in that there is no need to keep
|
|
1328
|
+
// the range tombstones in this file alive as they only cover keys
|
|
1329
|
+
// starting from the file's lower boundary, which is after `target`.
|
|
1330
|
+
TrySetDeleteRangeSentinel(file_smallest_key(file_index_));
|
|
1331
|
+
}
|
|
1196
1332
|
SkipEmptyFileBackward();
|
|
1197
1333
|
}
|
|
1198
1334
|
CheckMayBeOutOfLowerBound();
|
|
1199
1335
|
}
|
|
1200
1336
|
|
|
1201
1337
|
void LevelIterator::SeekToFirst() {
|
|
1338
|
+
prefix_exhausted_ = false;
|
|
1339
|
+
ClearSentinel();
|
|
1202
1340
|
InitFileIterator(0);
|
|
1203
1341
|
if (file_iter_.iter() != nullptr) {
|
|
1204
1342
|
file_iter_.SeekToFirst();
|
|
1343
|
+
if (range_tombstone_iter_) {
|
|
1344
|
+
// We do this in SeekToFirst() and SeekToLast() since
|
|
1345
|
+
// we could have an empty file with only range tombstones.
|
|
1346
|
+
TrySetDeleteRangeSentinel(file_largest_key(file_index_));
|
|
1347
|
+
}
|
|
1205
1348
|
}
|
|
1206
1349
|
SkipEmptyFileForward();
|
|
1207
1350
|
CheckMayBeOutOfLowerBound();
|
|
1208
1351
|
}
|
|
1209
1352
|
|
|
1210
1353
|
void LevelIterator::SeekToLast() {
|
|
1354
|
+
prefix_exhausted_ = false;
|
|
1355
|
+
ClearSentinel();
|
|
1211
1356
|
InitFileIterator(flevel_->num_files - 1);
|
|
1212
1357
|
if (file_iter_.iter() != nullptr) {
|
|
1213
1358
|
file_iter_.SeekToLast();
|
|
1359
|
+
if (range_tombstone_iter_) {
|
|
1360
|
+
TrySetDeleteRangeSentinel(file_smallest_key(file_index_));
|
|
1361
|
+
}
|
|
1214
1362
|
}
|
|
1215
1363
|
SkipEmptyFileBackward();
|
|
1216
1364
|
CheckMayBeOutOfLowerBound();
|
|
@@ -1218,25 +1366,47 @@ void LevelIterator::SeekToLast() {
|
|
|
1218
1366
|
|
|
1219
1367
|
void LevelIterator::Next() {
|
|
1220
1368
|
assert(Valid());
|
|
1221
|
-
|
|
1369
|
+
if (to_return_sentinel_) {
|
|
1370
|
+
// file_iter_ is at EOF already when to_return_sentinel_
|
|
1371
|
+
ClearSentinel();
|
|
1372
|
+
} else {
|
|
1373
|
+
file_iter_.Next();
|
|
1374
|
+
if (range_tombstone_iter_) {
|
|
1375
|
+
TrySetDeleteRangeSentinel(file_largest_key(file_index_));
|
|
1376
|
+
}
|
|
1377
|
+
}
|
|
1222
1378
|
SkipEmptyFileForward();
|
|
1223
1379
|
}
|
|
1224
1380
|
|
|
1225
1381
|
bool LevelIterator::NextAndGetResult(IterateResult* result) {
|
|
1226
1382
|
assert(Valid());
|
|
1227
|
-
|
|
1383
|
+
// file_iter_ is at EOF already when to_return_sentinel_
|
|
1384
|
+
bool is_valid = !to_return_sentinel_ && file_iter_.NextAndGetResult(result);
|
|
1228
1385
|
if (!is_valid) {
|
|
1386
|
+
if (to_return_sentinel_) {
|
|
1387
|
+
ClearSentinel();
|
|
1388
|
+
} else if (range_tombstone_iter_) {
|
|
1389
|
+
TrySetDeleteRangeSentinel(file_largest_key(file_index_));
|
|
1390
|
+
}
|
|
1229
1391
|
is_next_read_sequential_ = true;
|
|
1230
1392
|
SkipEmptyFileForward();
|
|
1231
1393
|
is_next_read_sequential_ = false;
|
|
1232
1394
|
is_valid = Valid();
|
|
1233
1395
|
if (is_valid) {
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1396
|
+
// This could be set in TrySetDeleteRangeSentinel() or
|
|
1397
|
+
// SkipEmptyFileForward() above.
|
|
1398
|
+
if (to_return_sentinel_) {
|
|
1399
|
+
result->key = sentinel_;
|
|
1400
|
+
result->bound_check_result = IterBoundCheck::kUnknown;
|
|
1401
|
+
result->value_prepared = true;
|
|
1402
|
+
} else {
|
|
1403
|
+
result->key = key();
|
|
1404
|
+
result->bound_check_result = file_iter_.UpperBoundCheckResult();
|
|
1405
|
+
// Ideally, we should return the real file_iter_.value_prepared but the
|
|
1406
|
+
// information is not here. It would casue an extra PrepareValue()
|
|
1407
|
+
// for the first key of a file.
|
|
1408
|
+
result->value_prepared = !allow_unprepared_value_;
|
|
1409
|
+
}
|
|
1240
1410
|
}
|
|
1241
1411
|
}
|
|
1242
1412
|
return is_valid;
|
|
@@ -1244,47 +1414,81 @@ bool LevelIterator::NextAndGetResult(IterateResult* result) {
|
|
|
1244
1414
|
|
|
1245
1415
|
void LevelIterator::Prev() {
|
|
1246
1416
|
assert(Valid());
|
|
1247
|
-
|
|
1417
|
+
if (to_return_sentinel_) {
|
|
1418
|
+
ClearSentinel();
|
|
1419
|
+
} else {
|
|
1420
|
+
file_iter_.Prev();
|
|
1421
|
+
if (range_tombstone_iter_) {
|
|
1422
|
+
TrySetDeleteRangeSentinel(file_smallest_key(file_index_));
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1248
1425
|
SkipEmptyFileBackward();
|
|
1249
1426
|
}
|
|
1250
1427
|
|
|
1251
1428
|
bool LevelIterator::SkipEmptyFileForward() {
|
|
1252
1429
|
bool seen_empty_file = false;
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1430
|
+
// Pause at sentinel key
|
|
1431
|
+
while (!to_return_sentinel_ &&
|
|
1432
|
+
(file_iter_.iter() == nullptr ||
|
|
1433
|
+
(!file_iter_.Valid() && file_iter_.status().ok() &&
|
|
1434
|
+
file_iter_.iter()->UpperBoundCheckResult() !=
|
|
1435
|
+
IterBoundCheck::kOutOfBound))) {
|
|
1257
1436
|
seen_empty_file = true;
|
|
1258
1437
|
// Move to next file
|
|
1259
|
-
if (file_index_ >= flevel_->num_files - 1
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
break;
|
|
1263
|
-
}
|
|
1264
|
-
if (KeyReachedUpperBound(file_smallest_key(file_index_ + 1))) {
|
|
1438
|
+
if (file_index_ >= flevel_->num_files - 1 ||
|
|
1439
|
+
KeyReachedUpperBound(file_smallest_key(file_index_ + 1)) ||
|
|
1440
|
+
prefix_exhausted_) {
|
|
1265
1441
|
SetFileIterator(nullptr);
|
|
1442
|
+
ClearRangeTombstoneIter();
|
|
1266
1443
|
break;
|
|
1267
1444
|
}
|
|
1445
|
+
// may init a new *range_tombstone_iter
|
|
1268
1446
|
InitFileIterator(file_index_ + 1);
|
|
1447
|
+
// We moved to a new SST file
|
|
1448
|
+
// Seek range_tombstone_iter_ to reset its !Valid() default state.
|
|
1449
|
+
// We do not need to call range_tombstone_iter_.Seek* in
|
|
1450
|
+
// LevelIterator::Seek* since when the merging iterator calls
|
|
1451
|
+
// LevelIterator::Seek*, it should also call Seek* into the corresponding
|
|
1452
|
+
// range tombstone iterator.
|
|
1269
1453
|
if (file_iter_.iter() != nullptr) {
|
|
1270
1454
|
file_iter_.SeekToFirst();
|
|
1455
|
+
if (range_tombstone_iter_) {
|
|
1456
|
+
if (*range_tombstone_iter_) {
|
|
1457
|
+
(*range_tombstone_iter_)->SeekToFirst();
|
|
1458
|
+
}
|
|
1459
|
+
TrySetDeleteRangeSentinel(file_largest_key(file_index_));
|
|
1460
|
+
}
|
|
1271
1461
|
}
|
|
1272
1462
|
}
|
|
1273
1463
|
return seen_empty_file;
|
|
1274
1464
|
}
|
|
1275
1465
|
|
|
1276
1466
|
void LevelIterator::SkipEmptyFileBackward() {
|
|
1277
|
-
|
|
1278
|
-
|
|
1467
|
+
// Pause at sentinel key
|
|
1468
|
+
while (!to_return_sentinel_ &&
|
|
1469
|
+
(file_iter_.iter() == nullptr ||
|
|
1470
|
+
(!file_iter_.Valid() && file_iter_.status().ok()))) {
|
|
1279
1471
|
// Move to previous file
|
|
1280
1472
|
if (file_index_ == 0) {
|
|
1281
1473
|
// Already the first file
|
|
1282
1474
|
SetFileIterator(nullptr);
|
|
1475
|
+
ClearRangeTombstoneIter();
|
|
1283
1476
|
return;
|
|
1284
1477
|
}
|
|
1285
1478
|
InitFileIterator(file_index_ - 1);
|
|
1479
|
+
// We moved to a new SST file
|
|
1480
|
+
// Seek range_tombstone_iter_ to reset its !Valid() default state.
|
|
1286
1481
|
if (file_iter_.iter() != nullptr) {
|
|
1287
1482
|
file_iter_.SeekToLast();
|
|
1483
|
+
if (range_tombstone_iter_) {
|
|
1484
|
+
if (*range_tombstone_iter_) {
|
|
1485
|
+
(*range_tombstone_iter_)->SeekToLast();
|
|
1486
|
+
}
|
|
1487
|
+
TrySetDeleteRangeSentinel(file_smallest_key(file_index_));
|
|
1488
|
+
if (to_return_sentinel_) {
|
|
1489
|
+
break;
|
|
1490
|
+
}
|
|
1491
|
+
}
|
|
1288
1492
|
}
|
|
1289
1493
|
}
|
|
1290
1494
|
}
|
|
@@ -1312,6 +1516,7 @@ void LevelIterator::InitFileIterator(size_t new_file_index) {
|
|
|
1312
1516
|
if (new_file_index >= flevel_->num_files) {
|
|
1313
1517
|
file_index_ = new_file_index;
|
|
1314
1518
|
SetFileIterator(nullptr);
|
|
1519
|
+
ClearRangeTombstoneIter();
|
|
1315
1520
|
return;
|
|
1316
1521
|
} else {
|
|
1317
1522
|
// If the file iterator shows incomplete, we try it again if users seek
|
|
@@ -1337,7 +1542,7 @@ Status Version::GetTableProperties(std::shared_ptr<const TableProperties>* tp,
|
|
|
1337
1542
|
auto table_cache = cfd_->table_cache();
|
|
1338
1543
|
auto ioptions = cfd_->ioptions();
|
|
1339
1544
|
Status s = table_cache->GetTableProperties(
|
|
1340
|
-
file_options_, cfd_->internal_comparator(), file_meta
|
|
1545
|
+
file_options_, cfd_->internal_comparator(), *file_meta, tp,
|
|
1341
1546
|
mutable_cf_options_.prefix_extractor, true /* no io */);
|
|
1342
1547
|
if (s.ok()) {
|
|
1343
1548
|
return s;
|
|
@@ -1530,7 +1735,8 @@ size_t Version::GetMemoryUsageByTableReaders() {
|
|
|
1530
1735
|
for (auto& file_level : storage_info_.level_files_brief_) {
|
|
1531
1736
|
for (size_t i = 0; i < file_level.num_files; i++) {
|
|
1532
1737
|
total_usage += cfd_->table_cache()->GetMemoryUsageByTableReader(
|
|
1533
|
-
file_options_, cfd_->internal_comparator(),
|
|
1738
|
+
file_options_, cfd_->internal_comparator(),
|
|
1739
|
+
*file_level.files[i].file_metadata,
|
|
1534
1740
|
mutable_cf_options_.prefix_extractor);
|
|
1535
1741
|
}
|
|
1536
1742
|
}
|
|
@@ -1627,38 +1833,27 @@ void Version::GetCreationTimeOfOldestFile(uint64_t* creation_time) {
|
|
|
1627
1833
|
*creation_time = oldest_time;
|
|
1628
1834
|
}
|
|
1629
1835
|
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
oss << "Table Properties: " << InternalUniqueIdToHumanString(&id);
|
|
1650
|
-
} else {
|
|
1651
|
-
oss << "Failed to get Table Properties: " << s.ToString();
|
|
1652
|
-
}
|
|
1653
|
-
return Status::Corruption("VersionSet", oss.str());
|
|
1654
|
-
}
|
|
1655
|
-
TEST_SYNC_POINT_CALLBACK("Version::VerifySstUniqueIds::Passed", &id);
|
|
1656
|
-
} else {
|
|
1657
|
-
TEST_SYNC_POINT_CALLBACK("Version::VerifySstUniqueIds::Skipped", meta);
|
|
1658
|
-
}
|
|
1659
|
-
}
|
|
1836
|
+
InternalIterator* Version::TEST_GetLevelIterator(
|
|
1837
|
+
const ReadOptions& read_options, MergeIteratorBuilder* merge_iter_builder,
|
|
1838
|
+
int level, bool allow_unprepared_value) {
|
|
1839
|
+
auto* arena = merge_iter_builder->GetArena();
|
|
1840
|
+
auto* mem = arena->AllocateAligned(sizeof(LevelIterator));
|
|
1841
|
+
TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr;
|
|
1842
|
+
auto level_iter = new (mem) LevelIterator(
|
|
1843
|
+
cfd_->table_cache(), read_options, file_options_,
|
|
1844
|
+
cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
|
|
1845
|
+
mutable_cf_options_.prefix_extractor, should_sample_file_read(),
|
|
1846
|
+
cfd_->internal_stats()->GetFileReadHist(level),
|
|
1847
|
+
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
|
1848
|
+
nullptr /* range_del_agg */, nullptr /* compaction_boundaries */,
|
|
1849
|
+
allow_unprepared_value, &tombstone_iter_ptr);
|
|
1850
|
+
if (read_options.ignore_range_deletions) {
|
|
1851
|
+
merge_iter_builder->AddIterator(level_iter);
|
|
1852
|
+
} else {
|
|
1853
|
+
merge_iter_builder->AddPointAndTombstoneIterator(
|
|
1854
|
+
level_iter, nullptr /* tombstone_iter */, tombstone_iter_ptr);
|
|
1660
1855
|
}
|
|
1661
|
-
return
|
|
1856
|
+
return level_iter;
|
|
1662
1857
|
}
|
|
1663
1858
|
|
|
1664
1859
|
uint64_t VersionStorageInfo::GetEstimatedActiveKeys() const {
|
|
@@ -1711,22 +1906,19 @@ double VersionStorageInfo::GetEstimatedCompressionRatioAtLevel(
|
|
|
1711
1906
|
void Version::AddIterators(const ReadOptions& read_options,
|
|
1712
1907
|
const FileOptions& soptions,
|
|
1713
1908
|
MergeIteratorBuilder* merge_iter_builder,
|
|
1714
|
-
RangeDelAggregator* range_del_agg,
|
|
1715
1909
|
bool allow_unprepared_value) {
|
|
1716
1910
|
assert(storage_info_.finalized_);
|
|
1717
1911
|
|
|
1718
1912
|
for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) {
|
|
1719
1913
|
AddIteratorsForLevel(read_options, soptions, merge_iter_builder, level,
|
|
1720
|
-
|
|
1914
|
+
allow_unprepared_value);
|
|
1721
1915
|
}
|
|
1722
1916
|
}
|
|
1723
1917
|
|
|
1724
1918
|
void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|
1725
1919
|
const FileOptions& soptions,
|
|
1726
1920
|
MergeIteratorBuilder* merge_iter_builder,
|
|
1727
|
-
int level,
|
|
1728
|
-
RangeDelAggregator* range_del_agg,
|
|
1729
|
-
bool allow_unprepared_value) {
|
|
1921
|
+
int level, bool allow_unprepared_value) {
|
|
1730
1922
|
assert(storage_info_.finalized_);
|
|
1731
1923
|
if (level >= storage_info_.num_non_empty_levels()) {
|
|
1732
1924
|
// This is an empty level
|
|
@@ -1741,17 +1933,25 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|
|
1741
1933
|
auto* arena = merge_iter_builder->GetArena();
|
|
1742
1934
|
if (level == 0) {
|
|
1743
1935
|
// Merge all level zero files together since they may overlap
|
|
1936
|
+
TruncatedRangeDelIterator* tombstone_iter = nullptr;
|
|
1744
1937
|
for (size_t i = 0; i < storage_info_.LevelFilesBrief(0).num_files; i++) {
|
|
1745
1938
|
const auto& file = storage_info_.LevelFilesBrief(0).files[i];
|
|
1746
|
-
|
|
1939
|
+
auto table_iter = cfd_->table_cache()->NewIterator(
|
|
1747
1940
|
read_options, soptions, cfd_->internal_comparator(),
|
|
1748
|
-
*file.file_metadata, range_del_agg,
|
|
1941
|
+
*file.file_metadata, /*range_del_agg=*/nullptr,
|
|
1749
1942
|
mutable_cf_options_.prefix_extractor, nullptr,
|
|
1750
1943
|
cfd_->internal_stats()->GetFileReadHist(0),
|
|
1751
1944
|
TableReaderCaller::kUserIterator, arena,
|
|
1752
1945
|
/*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_,
|
|
1753
1946
|
/*smallest_compaction_key=*/nullptr,
|
|
1754
|
-
/*largest_compaction_key=*/nullptr, allow_unprepared_value
|
|
1947
|
+
/*largest_compaction_key=*/nullptr, allow_unprepared_value,
|
|
1948
|
+
&tombstone_iter);
|
|
1949
|
+
if (read_options.ignore_range_deletions) {
|
|
1950
|
+
merge_iter_builder->AddIterator(table_iter);
|
|
1951
|
+
} else {
|
|
1952
|
+
merge_iter_builder->AddPointAndTombstoneIterator(table_iter,
|
|
1953
|
+
tombstone_iter);
|
|
1954
|
+
}
|
|
1755
1955
|
}
|
|
1756
1956
|
if (should_sample) {
|
|
1757
1957
|
// Count ones for every L0 files. This is done per iterator creation
|
|
@@ -1767,14 +1967,21 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|
|
1767
1967
|
// walks through the non-overlapping files in the level, opening them
|
|
1768
1968
|
// lazily.
|
|
1769
1969
|
auto* mem = arena->AllocateAligned(sizeof(LevelIterator));
|
|
1770
|
-
|
|
1970
|
+
TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr;
|
|
1971
|
+
auto level_iter = new (mem) LevelIterator(
|
|
1771
1972
|
cfd_->table_cache(), read_options, soptions,
|
|
1772
1973
|
cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
|
|
1773
1974
|
mutable_cf_options_.prefix_extractor, should_sample_file_read(),
|
|
1774
1975
|
cfd_->internal_stats()->GetFileReadHist(level),
|
|
1775
1976
|
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
|
1776
|
-
range_del_agg,
|
|
1777
|
-
|
|
1977
|
+
/*range_del_agg=*/nullptr, /*compaction_boundaries=*/nullptr,
|
|
1978
|
+
allow_unprepared_value, &tombstone_iter_ptr);
|
|
1979
|
+
if (read_options.ignore_range_deletions) {
|
|
1980
|
+
merge_iter_builder->AddIterator(level_iter);
|
|
1981
|
+
} else {
|
|
1982
|
+
merge_iter_builder->AddPointAndTombstoneIterator(
|
|
1983
|
+
level_iter, nullptr /* tombstone_iter */, tombstone_iter_ptr);
|
|
1984
|
+
}
|
|
1778
1985
|
}
|
|
1779
1986
|
}
|
|
1780
1987
|
|
|
@@ -2465,8 +2672,8 @@ Status Version::ProcessBatch(
|
|
|
2465
2672
|
std::unordered_map<uint64_t, BlobReadContexts>* blob_ctxs,
|
|
2466
2673
|
autovector<FilePickerMultiGet, 4>& batches, std::deque<size_t>& waiting,
|
|
2467
2674
|
std::deque<size_t>& to_process, unsigned int& num_tasks_queued,
|
|
2468
|
-
uint64_t
|
|
2469
|
-
|
|
2675
|
+
std::unordered_map<int, std::tuple<uint64_t, uint64_t, uint64_t>>&
|
|
2676
|
+
mget_stats) {
|
|
2470
2677
|
FilePickerMultiGet& fp = *batch;
|
|
2471
2678
|
MultiGetRange range = fp.GetRange();
|
|
2472
2679
|
// Initialize a new empty range. Any keys that are not in this level will
|
|
@@ -2516,19 +2723,29 @@ Status Version::ProcessBatch(
|
|
|
2516
2723
|
leftover += ~file_range;
|
|
2517
2724
|
range -= ~file_range;
|
|
2518
2725
|
if (!file_range.empty()) {
|
|
2726
|
+
int level = fp.GetHitFileLevel();
|
|
2727
|
+
auto stat = mget_stats.find(level);
|
|
2728
|
+
if (stat == mget_stats.end()) {
|
|
2729
|
+
auto entry = mget_stats.insert({level, {0, 0, 0}});
|
|
2730
|
+
assert(entry.second);
|
|
2731
|
+
stat = entry.first;
|
|
2732
|
+
}
|
|
2733
|
+
|
|
2519
2734
|
if (waiting.empty() && to_process.empty() &&
|
|
2520
2735
|
!fp.RemainingOverlapInLevel() && leftover.empty() &&
|
|
2521
2736
|
mget_tasks.empty()) {
|
|
2522
2737
|
// All keys are in one SST file, so take the fast path
|
|
2523
2738
|
s = MultiGetFromSST(read_options, file_range, fp.GetHitFileLevel(),
|
|
2524
2739
|
skip_filters, skip_range_deletions, f, *blob_ctxs,
|
|
2525
|
-
table_handle,
|
|
2526
|
-
|
|
2740
|
+
table_handle, std::get<0>(stat->second),
|
|
2741
|
+
std::get<1>(stat->second),
|
|
2742
|
+
std::get<2>(stat->second));
|
|
2527
2743
|
} else {
|
|
2528
2744
|
mget_tasks.emplace_back(MultiGetFromSSTCoroutine(
|
|
2529
2745
|
read_options, file_range, fp.GetHitFileLevel(), skip_filters,
|
|
2530
|
-
skip_range_deletions, f, *blob_ctxs, table_handle,
|
|
2531
|
-
|
|
2746
|
+
skip_range_deletions, f, *blob_ctxs, table_handle,
|
|
2747
|
+
std::get<0>(stat->second), std::get<1>(stat->second),
|
|
2748
|
+
std::get<2>(stat->second)));
|
|
2532
2749
|
++num_tasks_queued;
|
|
2533
2750
|
}
|
|
2534
2751
|
}
|
|
@@ -2538,8 +2755,9 @@ Status Version::ProcessBatch(
|
|
|
2538
2755
|
f = fp.GetNextFileInLevel();
|
|
2539
2756
|
}
|
|
2540
2757
|
// Split the current batch only if some keys are likely in this level and
|
|
2541
|
-
// some are not.
|
|
2542
|
-
|
|
2758
|
+
// some are not. Only split if we're done with this level, i.e f is null.
|
|
2759
|
+
// Otherwise, it means there are more files in this level to look at.
|
|
2760
|
+
if (s.ok() && !f && !leftover.empty() && !range.empty()) {
|
|
2543
2761
|
fp.ReplaceRange(range);
|
|
2544
2762
|
batches.emplace_back(&leftover, fp);
|
|
2545
2763
|
to_process.emplace_back(batches.size() - 1);
|
|
@@ -2565,9 +2783,7 @@ Status Version::MultiGetAsync(
|
|
|
2565
2783
|
std::deque<size_t> to_process;
|
|
2566
2784
|
Status s;
|
|
2567
2785
|
std::vector<folly::coro::Task<Status>> mget_tasks;
|
|
2568
|
-
uint64_t
|
|
2569
|
-
uint64_t num_index_read = 0;
|
|
2570
|
-
uint64_t num_sst_read = 0;
|
|
2786
|
+
std::unordered_map<int, std::tuple<uint64_t, uint64_t, uint64_t>> mget_stats;
|
|
2571
2787
|
|
|
2572
2788
|
// Create the initial batch with the input range
|
|
2573
2789
|
batches.emplace_back(range, &storage_info_.level_files_brief_,
|
|
@@ -2577,6 +2793,11 @@ Status Version::MultiGetAsync(
|
|
|
2577
2793
|
to_process.emplace_back(0);
|
|
2578
2794
|
|
|
2579
2795
|
while (!to_process.empty()) {
|
|
2796
|
+
// As we process a batch, it may get split into two. So reserve space for
|
|
2797
|
+
// an additional batch in the autovector in order to prevent later moves
|
|
2798
|
+
// of elements in ProcessBatch().
|
|
2799
|
+
batches.reserve(batches.size() + 1);
|
|
2800
|
+
|
|
2580
2801
|
size_t idx = to_process.front();
|
|
2581
2802
|
FilePickerMultiGet* batch = &batches.at(idx);
|
|
2582
2803
|
unsigned int num_tasks_queued = 0;
|
|
@@ -2589,20 +2810,10 @@ Status Version::MultiGetAsync(
|
|
|
2589
2810
|
// Look through one level. This may split the batch and enqueue it to
|
|
2590
2811
|
// to_process
|
|
2591
2812
|
s = ProcessBatch(options, batch, mget_tasks, blob_ctxs, batches, waiting,
|
|
2592
|
-
to_process, num_tasks_queued,
|
|
2593
|
-
num_index_read, num_sst_read);
|
|
2813
|
+
to_process, num_tasks_queued, mget_stats);
|
|
2594
2814
|
if (!s.ok()) {
|
|
2595
2815
|
break;
|
|
2596
2816
|
}
|
|
2597
|
-
// Dump the stats since the search has moved to the next level
|
|
2598
|
-
if (num_filter_read + num_index_read) {
|
|
2599
|
-
RecordInHistogram(db_statistics_,
|
|
2600
|
-
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
|
|
2601
|
-
num_index_read + num_filter_read);
|
|
2602
|
-
}
|
|
2603
|
-
if (num_sst_read) {
|
|
2604
|
-
RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read);
|
|
2605
|
-
}
|
|
2606
2817
|
// If ProcessBatch didn't enqueue any coroutine tasks, it means all
|
|
2607
2818
|
// keys were filtered out. So put the batch back in to_process to
|
|
2608
2819
|
// lookup in the next level
|
|
@@ -2649,6 +2860,30 @@ Status Version::MultiGetAsync(
|
|
|
2649
2860
|
}
|
|
2650
2861
|
}
|
|
2651
2862
|
|
|
2863
|
+
uint64_t num_levels = 0;
|
|
2864
|
+
for (auto& stat : mget_stats) {
|
|
2865
|
+
if (stat.first == 0) {
|
|
2866
|
+
num_levels += std::get<2>(stat.second);
|
|
2867
|
+
} else {
|
|
2868
|
+
num_levels++;
|
|
2869
|
+
}
|
|
2870
|
+
|
|
2871
|
+
uint64_t num_meta_reads =
|
|
2872
|
+
std::get<0>(stat.second) + std::get<1>(stat.second);
|
|
2873
|
+
uint64_t num_sst_reads = std::get<2>(stat.second);
|
|
2874
|
+
if (num_meta_reads > 0) {
|
|
2875
|
+
RecordInHistogram(db_statistics_,
|
|
2876
|
+
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
|
|
2877
|
+
num_meta_reads);
|
|
2878
|
+
}
|
|
2879
|
+
if (num_sst_reads > 0) {
|
|
2880
|
+
RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_reads);
|
|
2881
|
+
}
|
|
2882
|
+
}
|
|
2883
|
+
if (num_levels > 0) {
|
|
2884
|
+
RecordInHistogram(db_statistics_, NUM_LEVEL_READ_PER_MULTIGET, num_levels);
|
|
2885
|
+
}
|
|
2886
|
+
|
|
2652
2887
|
return s;
|
|
2653
2888
|
}
|
|
2654
2889
|
#endif
|
|
@@ -4562,7 +4797,7 @@ void VersionSet::AppendVersion(ColumnFamilyData* column_family_data,
|
|
|
4562
4797
|
|
|
4563
4798
|
Status VersionSet::ProcessManifestWrites(
|
|
4564
4799
|
std::deque<ManifestWriter>& writers, InstrumentedMutex* mu,
|
|
4565
|
-
FSDirectory*
|
|
4800
|
+
FSDirectory* dir_contains_current_file, bool new_descriptor_log,
|
|
4566
4801
|
const ColumnFamilyOptions* new_cf_options) {
|
|
4567
4802
|
mu->AssertHeld();
|
|
4568
4803
|
assert(!writers.empty());
|
|
@@ -4893,7 +5128,7 @@ Status VersionSet::ProcessManifestWrites(
|
|
|
4893
5128
|
}
|
|
4894
5129
|
if (s.ok() && new_descriptor_log) {
|
|
4895
5130
|
io_s = SetCurrentFile(fs_.get(), dbname_, pending_manifest_file_number_,
|
|
4896
|
-
|
|
5131
|
+
dir_contains_current_file);
|
|
4897
5132
|
if (!io_s.ok()) {
|
|
4898
5133
|
s = io_s;
|
|
4899
5134
|
}
|
|
@@ -5120,8 +5355,8 @@ Status VersionSet::LogAndApply(
|
|
|
5120
5355
|
const autovector<ColumnFamilyData*>& column_family_datas,
|
|
5121
5356
|
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
|
|
5122
5357
|
const autovector<autovector<VersionEdit*>>& edit_lists,
|
|
5123
|
-
InstrumentedMutex* mu, FSDirectory*
|
|
5124
|
-
const ColumnFamilyOptions* new_cf_options,
|
|
5358
|
+
InstrumentedMutex* mu, FSDirectory* dir_contains_current_file,
|
|
5359
|
+
bool new_descriptor_log, const ColumnFamilyOptions* new_cf_options,
|
|
5125
5360
|
const std::vector<std::function<void(const Status&)>>& manifest_wcbs) {
|
|
5126
5361
|
mu->AssertHeld();
|
|
5127
5362
|
int num_edits = 0;
|
|
@@ -5195,9 +5430,8 @@ Status VersionSet::LogAndApply(
|
|
|
5195
5430
|
}
|
|
5196
5431
|
return Status::ColumnFamilyDropped();
|
|
5197
5432
|
}
|
|
5198
|
-
|
|
5199
|
-
|
|
5200
|
-
new_cf_options);
|
|
5433
|
+
return ProcessManifestWrites(writers, mu, dir_contains_current_file,
|
|
5434
|
+
new_descriptor_log, new_cf_options);
|
|
5201
5435
|
}
|
|
5202
5436
|
|
|
5203
5437
|
void VersionSet::LogAndApplyCFHelper(VersionEdit* edit,
|
|
@@ -6079,7 +6313,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const FdWithKeyRange& f,
|
|
|
6079
6313
|
TableCache* table_cache = v->cfd_->table_cache();
|
|
6080
6314
|
if (table_cache != nullptr) {
|
|
6081
6315
|
result = table_cache->ApproximateOffsetOf(
|
|
6082
|
-
key, f.file_metadata
|
|
6316
|
+
key, *f.file_metadata, caller, icmp,
|
|
6083
6317
|
v->GetMutableCFOptions().prefix_extractor);
|
|
6084
6318
|
}
|
|
6085
6319
|
}
|
|
@@ -6119,7 +6353,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
|
|
|
6119
6353
|
return 0;
|
|
6120
6354
|
}
|
|
6121
6355
|
return table_cache->ApproximateSize(
|
|
6122
|
-
start, end, f.file_metadata
|
|
6356
|
+
start, end, *f.file_metadata, caller, icmp,
|
|
6123
6357
|
v->GetMutableCFOptions().prefix_extractor);
|
|
6124
6358
|
}
|
|
6125
6359
|
|
|
@@ -6245,16 +6479,16 @@ InternalIterator* VersionSet::MakeInputIterator(
|
|
|
6245
6479
|
for (size_t i = 0; i < flevel->num_files; i++) {
|
|
6246
6480
|
const FileMetaData& fmd = *flevel->files[i].file_metadata;
|
|
6247
6481
|
if (start.has_value() &&
|
|
6248
|
-
cfd->user_comparator()->
|
|
6249
|
-
|
|
6482
|
+
cfd->user_comparator()->CompareWithoutTimestamp(
|
|
6483
|
+
start.value(), fmd.largest.user_key()) > 0) {
|
|
6250
6484
|
continue;
|
|
6251
6485
|
}
|
|
6252
6486
|
// We should be able to filter out the case where the end key
|
|
6253
6487
|
// equals to the end boundary, since the end key is exclusive.
|
|
6254
6488
|
// We try to be extra safe here.
|
|
6255
6489
|
if (end.has_value() &&
|
|
6256
|
-
cfd->user_comparator()->
|
|
6257
|
-
|
|
6490
|
+
cfd->user_comparator()->CompareWithoutTimestamp(
|
|
6491
|
+
end.value(), fmd.smallest.user_key()) < 0) {
|
|
6258
6492
|
continue;
|
|
6259
6493
|
}
|
|
6260
6494
|
|