@nxtedition/rocksdb 7.1.14 → 7.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +1 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +72 -18
- package/deps/rocksdb/rocksdb/Makefile +91 -11
- package/deps/rocksdb/rocksdb/TARGETS +8 -4
- package/deps/rocksdb/rocksdb/cache/cache.cc +5 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +13 -8
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +116 -57
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +958 -459
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +407 -622
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +104 -40
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +23 -8
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +350 -184
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +12 -2
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +2 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +130 -43
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +24 -2
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +423 -98
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +19 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +10 -7
- package/deps/rocksdb/rocksdb/crash_test.mk +2 -2
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +46 -26
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +9 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +90 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +56 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -10
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +64 -59
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +11 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +92 -62
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +159 -136
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -13
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +129 -57
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +81 -3
- package/deps/rocksdb/rocksdb/db/c.cc +29 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +10 -1
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +21 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -36
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +344 -102
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +163 -28
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +52 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +35 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +167 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +0 -117
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +6 -49
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -4
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +18 -11
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +4 -10
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +12 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -93
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +28 -32
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +11 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +76 -138
- package/deps/rocksdb/rocksdb/db/db_iter.h +26 -23
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +931 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +44 -22
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -14
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +155 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +45 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -1
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +5 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +24 -12
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +7 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +3 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +79 -18
- package/deps/rocksdb/rocksdb/db/memtable.h +5 -0
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +26 -4
- package/deps/rocksdb/rocksdb/db/memtable_list.h +2 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +113 -0
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +110 -0
- package/deps/rocksdb/rocksdb/db/{periodic_work_scheduler_test.cc → periodic_task_scheduler_test.cc} +33 -39
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +12 -20
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +6 -5
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +12 -8
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +20 -5
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +14 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +17 -8
- package/deps/rocksdb/rocksdb/db/repair_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +49 -66
- package/deps/rocksdb/rocksdb/db/table_cache.cc +92 -63
- package/deps/rocksdb/rocksdb/db/table_cache.h +16 -9
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +2 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -3
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_set.cc +379 -145
- package/deps/rocksdb/rocksdb/db/version_set.h +26 -24
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +9 -9
- package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +10 -2
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +5 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +71 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +14 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +23 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +26 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +105 -34
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +16 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +4 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +282 -25
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
- package/deps/rocksdb/rocksdb/env/io_posix.cc +3 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +367 -177
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +144 -56
- package/deps/rocksdb/rocksdb/file/filename.cc +3 -3
- package/deps/rocksdb/rocksdb/file/filename.h +4 -2
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +415 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +2 -0
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +36 -45
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +21 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +11 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +15 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +163 -68
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +23 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +21 -17
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +17 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +2 -1
- package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -2
- package/deps/rocksdb/rocksdb/monitoring/histogram.cc +4 -2
- package/deps/rocksdb/rocksdb/monitoring/histogram.h +2 -0
- package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +15 -1
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +17 -0
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +14 -3
- package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +3 -0
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +50 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +31 -32
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options.cc +2 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -1
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -0
- package/deps/rocksdb/rocksdb/src.mk +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block.h +9 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +110 -99
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +12 -10
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +11 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +138 -83
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +25 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +31 -30
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -13
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +4 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +17 -19
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
- package/deps/rocksdb/rocksdb/table/format.cc +26 -29
- package/deps/rocksdb/rocksdb/table/format.h +44 -26
- package/deps/rocksdb/rocksdb/table/get_context.cc +17 -12
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +7 -0
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +4 -0
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +950 -104
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +28 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +3 -2
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -1
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +10 -9
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +22 -20
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +1 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +9 -21
- package/deps/rocksdb/rocksdb/table/table_test.cc +12 -12
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +4 -4
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +1 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +116 -34
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +6 -1
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/util/autovector.h +12 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +3 -2
- package/deps/rocksdb/rocksdb/util/stderr_logger.cc +30 -0
- package/deps/rocksdb/rocksdb/util/stderr_logger.h +5 -18
- package/deps/rocksdb/rocksdb/util/timer.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +9 -2
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +34 -53
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +9 -14
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +3 -1
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +26 -8
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +114 -16
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +59 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +39 -0
- package/deps/rocksdb/rocksdb.gyp +0 -1
- package/index.js +6 -10
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +0 -168
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +0 -90
|
@@ -20,7 +20,8 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
20
20
|
// Single cache shard interface.
|
|
21
21
|
class CacheShard {
|
|
22
22
|
public:
|
|
23
|
-
CacheShard()
|
|
23
|
+
explicit CacheShard(CacheMetadataChargePolicy metadata_charge_policy)
|
|
24
|
+
: metadata_charge_policy_(metadata_charge_policy) {}
|
|
24
25
|
virtual ~CacheShard() = default;
|
|
25
26
|
|
|
26
27
|
using DeleterFn = Cache::DeleterFn;
|
|
@@ -47,6 +48,8 @@ class CacheShard {
|
|
|
47
48
|
virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
|
|
48
49
|
virtual size_t GetUsage() const = 0;
|
|
49
50
|
virtual size_t GetPinnedUsage() const = 0;
|
|
51
|
+
virtual size_t GetOccupancyCount() const = 0;
|
|
52
|
+
virtual size_t GetTableAddressCount() const = 0;
|
|
50
53
|
// Handles iterating over roughly `average_entries_per_lock` entries, using
|
|
51
54
|
// `state` to somehow record where it last ended up. Caller initially uses
|
|
52
55
|
// *state == 0 and implementation sets *state = UINT32_MAX to indicate
|
|
@@ -57,13 +60,9 @@ class CacheShard {
|
|
|
57
60
|
uint32_t average_entries_per_lock, uint32_t* state) = 0;
|
|
58
61
|
virtual void EraseUnRefEntries() = 0;
|
|
59
62
|
virtual std::string GetPrintableOptions() const { return ""; }
|
|
60
|
-
void set_metadata_charge_policy(
|
|
61
|
-
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
62
|
-
metadata_charge_policy_ = metadata_charge_policy;
|
|
63
|
-
}
|
|
64
63
|
|
|
65
64
|
protected:
|
|
66
|
-
CacheMetadataChargePolicy metadata_charge_policy_
|
|
65
|
+
const CacheMetadataChargePolicy metadata_charge_policy_;
|
|
67
66
|
};
|
|
68
67
|
|
|
69
68
|
// Generic cache interface which shards cache by hash of keys. 2^num_shard_bits
|
|
@@ -106,6 +105,8 @@ class ShardedCache : public Cache {
|
|
|
106
105
|
virtual size_t GetUsage() const override;
|
|
107
106
|
virtual size_t GetUsage(Handle* handle) const override;
|
|
108
107
|
virtual size_t GetPinnedUsage() const override;
|
|
108
|
+
virtual size_t GetOccupancyCount() const override;
|
|
109
|
+
virtual size_t GetTableAddressCount() const override;
|
|
109
110
|
virtual void ApplyToAllEntries(
|
|
110
111
|
const std::function<void(const Slice& key, void* value, size_t charge,
|
|
111
112
|
DeleterFn deleter)>& callback,
|
|
@@ -127,6 +128,8 @@ class ShardedCache : public Cache {
|
|
|
127
128
|
std::atomic<uint64_t> last_id_;
|
|
128
129
|
};
|
|
129
130
|
|
|
130
|
-
|
|
131
|
+
// 512KB is traditional minimum shard size.
|
|
132
|
+
int GetDefaultCacheShardBits(size_t capacity,
|
|
133
|
+
size_t min_shard_size = 512U * 1024U);
|
|
131
134
|
|
|
132
135
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -78,7 +78,7 @@ blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
|
|
|
78
78
|
$(CRASHTEST_PY) --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS)
|
|
79
79
|
|
|
80
80
|
blackbox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
|
|
81
|
-
$(CRASHTEST_PY) --
|
|
81
|
+
$(CRASHTEST_PY) --test_tiered_storage blackbox $(CRASH_TEST_EXT_ARGS)
|
|
82
82
|
|
|
83
83
|
ifeq ($(CRASH_TEST_KILL_ODD),)
|
|
84
84
|
CRASH_TEST_KILL_ODD=888887
|
|
@@ -103,5 +103,5 @@ whitebox_crash_test_with_ts: $(DB_STRESS_CMD)
|
|
|
103
103
|
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
|
|
104
104
|
|
|
105
105
|
whitebox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
|
|
106
|
-
$(CRASHTEST_PY) --
|
|
106
|
+
$(CRASHTEST_PY) --test_tiered_storage whitebox --random_kill_odd \
|
|
107
107
|
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
|
|
@@ -58,40 +58,60 @@ Status ArenaWrappedDBIter::Refresh() {
|
|
|
58
58
|
uint64_t cur_sv_number = cfd_->GetSuperVersionNumber();
|
|
59
59
|
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1");
|
|
60
60
|
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2");
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
new (&arena_) Arena();
|
|
61
|
+
auto reinit_internal_iter = [&]() {
|
|
62
|
+
Env* env = db_iter_->env();
|
|
63
|
+
db_iter_->~DBIter();
|
|
64
|
+
arena_.~Arena();
|
|
65
|
+
new (&arena_) Arena();
|
|
67
66
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
67
|
+
SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_);
|
|
68
|
+
SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
|
|
69
|
+
if (read_callback_) {
|
|
70
|
+
read_callback_->Refresh(latest_seq);
|
|
71
|
+
}
|
|
72
|
+
Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options,
|
|
73
|
+
sv->current, latest_seq,
|
|
74
|
+
sv->mutable_cf_options.max_sequential_skip_in_iterations,
|
|
75
|
+
cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_,
|
|
76
|
+
allow_refresh_);
|
|
78
77
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
78
|
+
InternalIterator* internal_iter = db_impl_->NewInternalIterator(
|
|
79
|
+
read_options_, cfd_, sv, &arena_, latest_seq,
|
|
80
|
+
/* allow_unprepared_value */ true, /* db_iter */ this);
|
|
81
|
+
SetIterUnderDBIter(internal_iter);
|
|
82
|
+
};
|
|
83
|
+
while (true) {
|
|
84
|
+
if (sv_number_ != cur_sv_number) {
|
|
85
|
+
reinit_internal_iter();
|
|
83
86
|
break;
|
|
84
87
|
} else {
|
|
85
88
|
SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
|
|
86
89
|
// Refresh range-tombstones in MemTable
|
|
87
90
|
if (!read_options_.ignore_range_deletions) {
|
|
88
91
|
SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_);
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
92
|
+
auto t = sv->mem->NewRangeTombstoneIterator(
|
|
93
|
+
read_options_, latest_seq, false /* immutable_memtable */);
|
|
94
|
+
if (!t || t->empty()) {
|
|
95
|
+
if (memtable_range_tombstone_iter_) {
|
|
96
|
+
delete *memtable_range_tombstone_iter_;
|
|
97
|
+
*memtable_range_tombstone_iter_ = nullptr;
|
|
98
|
+
}
|
|
99
|
+
delete t;
|
|
100
|
+
} else { // current mutable memtable has range tombstones
|
|
101
|
+
if (!memtable_range_tombstone_iter_) {
|
|
102
|
+
delete t;
|
|
103
|
+
cfd_->ReturnThreadLocalSuperVersion(sv);
|
|
104
|
+
// The memtable under DBIter did not have range tombstone before
|
|
105
|
+
// refresh.
|
|
106
|
+
reinit_internal_iter();
|
|
107
|
+
break;
|
|
108
|
+
} else {
|
|
109
|
+
delete *memtable_range_tombstone_iter_;
|
|
110
|
+
*memtable_range_tombstone_iter_ = new TruncatedRangeDelIterator(
|
|
111
|
+
std::unique_ptr<FragmentedRangeTombstoneIterator>(t),
|
|
112
|
+
&cfd_->internal_comparator(), nullptr, nullptr);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
95
115
|
cfd_->ReturnThreadLocalSuperVersion(sv);
|
|
96
116
|
}
|
|
97
117
|
// Refresh latest sequence number
|
|
@@ -44,9 +44,7 @@ class ArenaWrappedDBIter : public Iterator {
|
|
|
44
44
|
// Get the arena to be used to allocate memory for DBIter to be wrapped,
|
|
45
45
|
// as well as child iterators in it.
|
|
46
46
|
virtual Arena* GetArena() { return &arena_; }
|
|
47
|
-
|
|
48
|
-
return db_iter_->GetRangeDelAggregator();
|
|
49
|
-
}
|
|
47
|
+
|
|
50
48
|
const ReadOptions& GetReadOptions() { return read_options_; }
|
|
51
49
|
|
|
52
50
|
// Set the internal iterator wrapped inside the DB Iterator. Usually it is
|
|
@@ -55,6 +53,10 @@ class ArenaWrappedDBIter : public Iterator {
|
|
|
55
53
|
db_iter_->SetIter(iter);
|
|
56
54
|
}
|
|
57
55
|
|
|
56
|
+
void SetMemtableRangetombstoneIter(TruncatedRangeDelIterator** iter) {
|
|
57
|
+
memtable_range_tombstone_iter_ = iter;
|
|
58
|
+
}
|
|
59
|
+
|
|
58
60
|
bool Valid() const override { return db_iter_->Valid(); }
|
|
59
61
|
void SeekToFirst() override { db_iter_->SeekToFirst(); }
|
|
60
62
|
void SeekToLast() override { db_iter_->SeekToLast(); }
|
|
@@ -68,6 +70,7 @@ class ArenaWrappedDBIter : public Iterator {
|
|
|
68
70
|
void Prev() override { db_iter_->Prev(); }
|
|
69
71
|
Slice key() const override { return db_iter_->key(); }
|
|
70
72
|
Slice value() const override { return db_iter_->value(); }
|
|
73
|
+
const WideColumns& columns() const override { return db_iter_->columns(); }
|
|
71
74
|
Status status() const override { return db_iter_->status(); }
|
|
72
75
|
Slice timestamp() const override { return db_iter_->timestamp(); }
|
|
73
76
|
bool IsBlob() const { return db_iter_->IsBlob(); }
|
|
@@ -104,6 +107,9 @@ class ArenaWrappedDBIter : public Iterator {
|
|
|
104
107
|
ReadCallback* read_callback_;
|
|
105
108
|
bool expose_blob_index_ = false;
|
|
106
109
|
bool allow_refresh_ = true;
|
|
110
|
+
// If this is nullptr, it means the mutable memtable does not contain range
|
|
111
|
+
// tombstone when added under this DBIter.
|
|
112
|
+
TruncatedRangeDelIterator** memtable_range_tombstone_iter_ = nullptr;
|
|
107
113
|
};
|
|
108
114
|
|
|
109
115
|
// Generate the arena wrapped iterator class.
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
|
|
6
|
+
#include "db/blob/blob_contents.h"
|
|
7
|
+
|
|
8
|
+
#include <cassert>
|
|
9
|
+
|
|
10
|
+
#include "cache/cache_entry_roles.h"
|
|
11
|
+
#include "cache/cache_helpers.h"
|
|
12
|
+
#include "port/malloc.h"
|
|
13
|
+
|
|
14
|
+
namespace ROCKSDB_NAMESPACE {
|
|
15
|
+
|
|
16
|
+
std::unique_ptr<BlobContents> BlobContents::Create(
|
|
17
|
+
CacheAllocationPtr&& allocation, size_t size) {
|
|
18
|
+
return std::unique_ptr<BlobContents>(
|
|
19
|
+
new BlobContents(std::move(allocation), size));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
size_t BlobContents::ApproximateMemoryUsage() const {
|
|
23
|
+
size_t usage = 0;
|
|
24
|
+
|
|
25
|
+
if (allocation_) {
|
|
26
|
+
MemoryAllocator* const allocator = allocation_.get_deleter().allocator;
|
|
27
|
+
|
|
28
|
+
if (allocator) {
|
|
29
|
+
usage += allocator->UsableSize(allocation_.get(), data_.size());
|
|
30
|
+
} else {
|
|
31
|
+
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
|
32
|
+
usage += malloc_usable_size(allocation_.get());
|
|
33
|
+
#else
|
|
34
|
+
usage += data_.size();
|
|
35
|
+
#endif
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
|
40
|
+
usage += malloc_usable_size(const_cast<BlobContents*>(this));
|
|
41
|
+
#else
|
|
42
|
+
usage += sizeof(*this);
|
|
43
|
+
#endif
|
|
44
|
+
|
|
45
|
+
return usage;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
size_t BlobContents::SizeCallback(void* obj) {
|
|
49
|
+
assert(obj);
|
|
50
|
+
|
|
51
|
+
return static_cast<const BlobContents*>(obj)->size();
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
Status BlobContents::SaveToCallback(void* from_obj, size_t from_offset,
|
|
55
|
+
size_t length, void* out) {
|
|
56
|
+
assert(from_obj);
|
|
57
|
+
|
|
58
|
+
const BlobContents* buf = static_cast<const BlobContents*>(from_obj);
|
|
59
|
+
assert(buf->size() >= from_offset + length);
|
|
60
|
+
|
|
61
|
+
memcpy(out, buf->data().data() + from_offset, length);
|
|
62
|
+
|
|
63
|
+
return Status::OK();
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
Cache::CacheItemHelper* BlobContents::GetCacheItemHelper() {
|
|
67
|
+
static Cache::CacheItemHelper cache_helper(
|
|
68
|
+
&SizeCallback, &SaveToCallback,
|
|
69
|
+
GetCacheEntryDeleterForRole<BlobContents, CacheEntryRole::kBlobValue>());
|
|
70
|
+
|
|
71
|
+
return &cache_helper;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
Status BlobContents::CreateCallback(CacheAllocationPtr&& allocation,
|
|
75
|
+
const void* buf, size_t size,
|
|
76
|
+
void** out_obj, size_t* charge) {
|
|
77
|
+
assert(allocation);
|
|
78
|
+
|
|
79
|
+
memcpy(allocation.get(), buf, size);
|
|
80
|
+
|
|
81
|
+
std::unique_ptr<BlobContents> obj = Create(std::move(allocation), size);
|
|
82
|
+
BlobContents* const contents = obj.release();
|
|
83
|
+
|
|
84
|
+
*out_obj = contents;
|
|
85
|
+
*charge = contents->ApproximateMemoryUsage();
|
|
86
|
+
|
|
87
|
+
return Status::OK();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
|
|
6
|
+
#pragma once
|
|
7
|
+
|
|
8
|
+
#include <memory>
|
|
9
|
+
|
|
10
|
+
#include "memory/memory_allocator.h"
|
|
11
|
+
#include "rocksdb/cache.h"
|
|
12
|
+
#include "rocksdb/rocksdb_namespace.h"
|
|
13
|
+
#include "rocksdb/slice.h"
|
|
14
|
+
#include "rocksdb/status.h"
|
|
15
|
+
|
|
16
|
+
namespace ROCKSDB_NAMESPACE {
|
|
17
|
+
|
|
18
|
+
// A class representing a single uncompressed value read from a blob file.
|
|
19
|
+
class BlobContents {
|
|
20
|
+
public:
|
|
21
|
+
static std::unique_ptr<BlobContents> Create(CacheAllocationPtr&& allocation,
|
|
22
|
+
size_t size);
|
|
23
|
+
|
|
24
|
+
BlobContents(const BlobContents&) = delete;
|
|
25
|
+
BlobContents& operator=(const BlobContents&) = delete;
|
|
26
|
+
|
|
27
|
+
BlobContents(BlobContents&&) = default;
|
|
28
|
+
BlobContents& operator=(BlobContents&&) = default;
|
|
29
|
+
|
|
30
|
+
~BlobContents() = default;
|
|
31
|
+
|
|
32
|
+
const Slice& data() const { return data_; }
|
|
33
|
+
size_t size() const { return data_.size(); }
|
|
34
|
+
|
|
35
|
+
size_t ApproximateMemoryUsage() const;
|
|
36
|
+
|
|
37
|
+
// Callbacks for secondary cache
|
|
38
|
+
static size_t SizeCallback(void* obj);
|
|
39
|
+
|
|
40
|
+
static Status SaveToCallback(void* from_obj, size_t from_offset,
|
|
41
|
+
size_t length, void* out);
|
|
42
|
+
|
|
43
|
+
static Cache::CacheItemHelper* GetCacheItemHelper();
|
|
44
|
+
|
|
45
|
+
static Status CreateCallback(CacheAllocationPtr&& allocation, const void* buf,
|
|
46
|
+
size_t size, void** out_obj, size_t* charge);
|
|
47
|
+
|
|
48
|
+
private:
|
|
49
|
+
BlobContents(CacheAllocationPtr&& allocation, size_t size)
|
|
50
|
+
: allocation_(std::move(allocation)), data_(allocation_.get(), size) {}
|
|
51
|
+
|
|
52
|
+
CacheAllocationPtr allocation_;
|
|
53
|
+
Slice data_;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
#include <cassert>
|
|
9
9
|
|
|
10
|
+
#include "db/blob/blob_contents.h"
|
|
10
11
|
#include "db/blob/blob_file_addition.h"
|
|
11
12
|
#include "db/blob/blob_file_completion_callback.h"
|
|
12
13
|
#include "db/blob/blob_index.h"
|
|
@@ -408,16 +409,28 @@ Status BlobFileBuilder::PutBlobIntoCacheIfNeeded(const Slice& blob,
|
|
|
408
409
|
|
|
409
410
|
// Objects to be put into the cache have to be heap-allocated and
|
|
410
411
|
// self-contained, i.e. own their contents. The Cache has to be able to
|
|
411
|
-
// take unique ownership of them.
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
412
|
+
// take unique ownership of them.
|
|
413
|
+
CacheAllocationPtr allocation =
|
|
414
|
+
AllocateBlock(blob.size(), blob_cache->memory_allocator());
|
|
415
|
+
memcpy(allocation.get(), blob.data(), blob.size());
|
|
416
|
+
std::unique_ptr<BlobContents> buf =
|
|
417
|
+
BlobContents::Create(std::move(allocation), blob.size());
|
|
418
|
+
|
|
419
|
+
Cache::CacheItemHelper* const cache_item_helper =
|
|
420
|
+
BlobContents::GetCacheItemHelper();
|
|
421
|
+
assert(cache_item_helper);
|
|
422
|
+
|
|
423
|
+
if (immutable_options_->lowest_used_cache_tier ==
|
|
424
|
+
CacheTier::kNonVolatileBlockTier) {
|
|
425
|
+
s = blob_cache->Insert(key, buf.get(), cache_item_helper,
|
|
426
|
+
buf->ApproximateMemoryUsage(),
|
|
427
|
+
nullptr /* cache_handle */, priority);
|
|
428
|
+
} else {
|
|
429
|
+
s = blob_cache->Insert(key, buf.get(), buf->ApproximateMemoryUsage(),
|
|
430
|
+
cache_item_helper->del_cb,
|
|
431
|
+
nullptr /* cache_handle */, priority);
|
|
432
|
+
}
|
|
433
|
+
|
|
421
434
|
if (s.ok()) {
|
|
422
435
|
RecordTick(statistics, BLOB_DB_CACHE_ADD);
|
|
423
436
|
RecordTick(statistics, BLOB_DB_CACHE_BYTES_WRITE, buf->size());
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#include <cassert>
|
|
9
9
|
#include <string>
|
|
10
10
|
|
|
11
|
+
#include "db/blob/blob_contents.h"
|
|
11
12
|
#include "db/blob/blob_log_format.h"
|
|
12
13
|
#include "file/file_prefetch_buffer.h"
|
|
13
14
|
#include "file/filename.h"
|
|
@@ -283,14 +284,12 @@ BlobFileReader::BlobFileReader(
|
|
|
283
284
|
|
|
284
285
|
BlobFileReader::~BlobFileReader() = default;
|
|
285
286
|
|
|
286
|
-
Status BlobFileReader::GetBlob(
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
uint64_t* bytes_read) const {
|
|
293
|
-
assert(value);
|
|
287
|
+
Status BlobFileReader::GetBlob(
|
|
288
|
+
const ReadOptions& read_options, const Slice& user_key, uint64_t offset,
|
|
289
|
+
uint64_t value_size, CompressionType compression_type,
|
|
290
|
+
FilePrefetchBuffer* prefetch_buffer, MemoryAllocator* allocator,
|
|
291
|
+
std::unique_ptr<BlobContents>* result, uint64_t* bytes_read) const {
|
|
292
|
+
assert(result);
|
|
294
293
|
|
|
295
294
|
const uint64_t key_size = user_key.size();
|
|
296
295
|
|
|
@@ -361,8 +360,8 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
|
|
|
361
360
|
const Slice value_slice(record_slice.data() + adjustment, value_size);
|
|
362
361
|
|
|
363
362
|
{
|
|
364
|
-
const Status s = UncompressBlobIfNeeded(
|
|
365
|
-
|
|
363
|
+
const Status s = UncompressBlobIfNeeded(
|
|
364
|
+
value_slice, compression_type, allocator, clock_, statistics_, result);
|
|
366
365
|
if (!s.ok()) {
|
|
367
366
|
return s;
|
|
368
367
|
}
|
|
@@ -375,16 +374,18 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
|
|
|
375
374
|
return Status::OK();
|
|
376
375
|
}
|
|
377
376
|
|
|
378
|
-
void BlobFileReader::MultiGetBlob(
|
|
379
|
-
|
|
380
|
-
|
|
377
|
+
void BlobFileReader::MultiGetBlob(
|
|
378
|
+
const ReadOptions& read_options, MemoryAllocator* allocator,
|
|
379
|
+
autovector<std::pair<BlobReadRequest*, std::unique_ptr<BlobContents>>>&
|
|
380
|
+
blob_reqs,
|
|
381
|
+
uint64_t* bytes_read) const {
|
|
381
382
|
const size_t num_blobs = blob_reqs.size();
|
|
382
383
|
assert(num_blobs > 0);
|
|
383
384
|
assert(num_blobs <= MultiGetContext::MAX_BATCH_SIZE);
|
|
384
385
|
|
|
385
386
|
#ifndef NDEBUG
|
|
386
387
|
for (size_t i = 0; i < num_blobs - 1; ++i) {
|
|
387
|
-
assert(blob_reqs[i]->offset <= blob_reqs[i + 1]->offset);
|
|
388
|
+
assert(blob_reqs[i].first->offset <= blob_reqs[i + 1].first->offset);
|
|
388
389
|
}
|
|
389
390
|
#endif // !NDEBUG
|
|
390
391
|
|
|
@@ -393,16 +394,21 @@ void BlobFileReader::MultiGetBlob(const ReadOptions& read_options,
|
|
|
393
394
|
uint64_t total_len = 0;
|
|
394
395
|
read_reqs.reserve(num_blobs);
|
|
395
396
|
for (size_t i = 0; i < num_blobs; ++i) {
|
|
396
|
-
const
|
|
397
|
-
|
|
398
|
-
|
|
397
|
+
BlobReadRequest* const req = blob_reqs[i].first;
|
|
398
|
+
assert(req);
|
|
399
|
+
assert(req->user_key);
|
|
400
|
+
assert(req->status);
|
|
401
|
+
|
|
402
|
+
const size_t key_size = req->user_key->size();
|
|
403
|
+
const uint64_t offset = req->offset;
|
|
404
|
+
const uint64_t value_size = req->len;
|
|
399
405
|
|
|
400
406
|
if (!IsValidBlobOffset(offset, key_size, value_size, file_size_)) {
|
|
401
|
-
*
|
|
407
|
+
*req->status = Status::Corruption("Invalid blob offset");
|
|
402
408
|
continue;
|
|
403
409
|
}
|
|
404
|
-
if (
|
|
405
|
-
*
|
|
410
|
+
if (req->compression != compression_type_) {
|
|
411
|
+
*req->status =
|
|
406
412
|
Status::Corruption("Compression type mismatch when reading a blob");
|
|
407
413
|
continue;
|
|
408
414
|
}
|
|
@@ -411,12 +417,12 @@ void BlobFileReader::MultiGetBlob(const ReadOptions& read_options,
|
|
|
411
417
|
read_options.verify_checksums
|
|
412
418
|
? BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size)
|
|
413
419
|
: 0;
|
|
414
|
-
assert(
|
|
420
|
+
assert(req->offset >= adjustment);
|
|
415
421
|
adjustments.push_back(adjustment);
|
|
416
422
|
|
|
417
423
|
FSReadRequest read_req = {};
|
|
418
|
-
read_req.offset =
|
|
419
|
-
read_req.len =
|
|
424
|
+
read_req.offset = req->offset - adjustment;
|
|
425
|
+
read_req.len = req->len + adjustment;
|
|
420
426
|
read_reqs.emplace_back(read_req);
|
|
421
427
|
total_len += read_req.len;
|
|
422
428
|
}
|
|
@@ -450,8 +456,11 @@ void BlobFileReader::MultiGetBlob(const ReadOptions& read_options,
|
|
|
450
456
|
for (auto& req : read_reqs) {
|
|
451
457
|
req.status.PermitUncheckedError();
|
|
452
458
|
}
|
|
453
|
-
for (auto&
|
|
459
|
+
for (auto& blob_req : blob_reqs) {
|
|
460
|
+
BlobReadRequest* const req = blob_req.first;
|
|
461
|
+
assert(req);
|
|
454
462
|
assert(req->status);
|
|
463
|
+
|
|
455
464
|
if (!req->status->IsCorruption()) {
|
|
456
465
|
// Avoid overwriting corruption status.
|
|
457
466
|
*req->status = s;
|
|
@@ -464,38 +473,42 @@ void BlobFileReader::MultiGetBlob(const ReadOptions& read_options,
|
|
|
464
473
|
|
|
465
474
|
uint64_t total_bytes = 0;
|
|
466
475
|
for (size_t i = 0, j = 0; i < num_blobs; ++i) {
|
|
467
|
-
|
|
468
|
-
|
|
476
|
+
BlobReadRequest* const req = blob_reqs[i].first;
|
|
477
|
+
assert(req);
|
|
478
|
+
assert(req->user_key);
|
|
479
|
+
assert(req->status);
|
|
480
|
+
|
|
481
|
+
if (!req->status->ok()) {
|
|
469
482
|
continue;
|
|
470
483
|
}
|
|
471
484
|
|
|
472
485
|
assert(j < read_reqs.size());
|
|
473
|
-
auto&
|
|
474
|
-
const auto& record_slice =
|
|
475
|
-
if (
|
|
476
|
-
|
|
486
|
+
auto& read_req = read_reqs[j++];
|
|
487
|
+
const auto& record_slice = read_req.result;
|
|
488
|
+
if (read_req.status.ok() && record_slice.size() != read_req.len) {
|
|
489
|
+
read_req.status =
|
|
490
|
+
IOStatus::Corruption("Failed to read data from blob file");
|
|
477
491
|
}
|
|
478
492
|
|
|
479
|
-
*
|
|
480
|
-
if (!
|
|
493
|
+
*req->status = read_req.status;
|
|
494
|
+
if (!req->status->ok()) {
|
|
481
495
|
continue;
|
|
482
496
|
}
|
|
483
497
|
|
|
484
498
|
// Verify checksums if enabled
|
|
485
499
|
if (read_options.verify_checksums) {
|
|
486
|
-
*
|
|
487
|
-
|
|
488
|
-
if (!blob_reqs[i]->status->ok()) {
|
|
500
|
+
*req->status = VerifyBlob(record_slice, *req->user_key, req->len);
|
|
501
|
+
if (!req->status->ok()) {
|
|
489
502
|
continue;
|
|
490
503
|
}
|
|
491
504
|
}
|
|
492
505
|
|
|
493
506
|
// Uncompress blob if needed
|
|
494
|
-
Slice value_slice(record_slice.data() + adjustments[i],
|
|
495
|
-
*
|
|
496
|
-
UncompressBlobIfNeeded(value_slice, compression_type_,
|
|
497
|
-
statistics_, blob_reqs[i]
|
|
498
|
-
if (
|
|
507
|
+
Slice value_slice(record_slice.data() + adjustments[i], req->len);
|
|
508
|
+
*req->status =
|
|
509
|
+
UncompressBlobIfNeeded(value_slice, compression_type_, allocator,
|
|
510
|
+
clock_, statistics_, &blob_reqs[i].second);
|
|
511
|
+
if (req->status->ok()) {
|
|
499
512
|
total_bytes += record_slice.size();
|
|
500
513
|
}
|
|
501
514
|
}
|
|
@@ -549,15 +562,18 @@ Status BlobFileReader::VerifyBlob(const Slice& record_slice,
|
|
|
549
562
|
return Status::OK();
|
|
550
563
|
}
|
|
551
564
|
|
|
552
|
-
Status BlobFileReader::UncompressBlobIfNeeded(
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
assert(value);
|
|
565
|
+
Status BlobFileReader::UncompressBlobIfNeeded(
|
|
566
|
+
const Slice& value_slice, CompressionType compression_type,
|
|
567
|
+
MemoryAllocator* allocator, SystemClock* clock, Statistics* statistics,
|
|
568
|
+
std::unique_ptr<BlobContents>* result) {
|
|
569
|
+
assert(result);
|
|
558
570
|
|
|
559
571
|
if (compression_type == kNoCompression) {
|
|
560
|
-
|
|
572
|
+
CacheAllocationPtr allocation =
|
|
573
|
+
AllocateBlock(value_slice.size(), allocator);
|
|
574
|
+
memcpy(allocation.get(), value_slice.data(), value_slice.size());
|
|
575
|
+
|
|
576
|
+
*result = BlobContents::Create(std::move(allocation), value_slice.size());
|
|
561
577
|
|
|
562
578
|
return Status::OK();
|
|
563
579
|
}
|
|
@@ -568,7 +584,6 @@ Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
|
|
|
568
584
|
|
|
569
585
|
size_t uncompressed_size = 0;
|
|
570
586
|
constexpr uint32_t compression_format_version = 2;
|
|
571
|
-
constexpr MemoryAllocator* allocator = nullptr;
|
|
572
587
|
|
|
573
588
|
CacheAllocationPtr output;
|
|
574
589
|
|
|
@@ -587,19 +602,9 @@ Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
|
|
|
587
602
|
return Status::Corruption("Unable to uncompress blob");
|
|
588
603
|
}
|
|
589
604
|
|
|
590
|
-
|
|
605
|
+
*result = BlobContents::Create(std::move(output), uncompressed_size);
|
|
591
606
|
|
|
592
607
|
return Status::OK();
|
|
593
608
|
}
|
|
594
609
|
|
|
595
|
-
void BlobFileReader::SaveValue(const Slice& src, PinnableSlice* dst) {
|
|
596
|
-
assert(dst);
|
|
597
|
-
|
|
598
|
-
if (dst->IsPinned()) {
|
|
599
|
-
dst->Reset();
|
|
600
|
-
}
|
|
601
|
-
|
|
602
|
-
dst->PinSelf(src);
|
|
603
|
-
}
|
|
604
|
-
|
|
605
610
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -23,7 +23,7 @@ class HistogramImpl;
|
|
|
23
23
|
struct ReadOptions;
|
|
24
24
|
class Slice;
|
|
25
25
|
class FilePrefetchBuffer;
|
|
26
|
-
class
|
|
26
|
+
class BlobContents;
|
|
27
27
|
class Statistics;
|
|
28
28
|
|
|
29
29
|
class BlobFileReader {
|
|
@@ -44,13 +44,17 @@ class BlobFileReader {
|
|
|
44
44
|
Status GetBlob(const ReadOptions& read_options, const Slice& user_key,
|
|
45
45
|
uint64_t offset, uint64_t value_size,
|
|
46
46
|
CompressionType compression_type,
|
|
47
|
-
FilePrefetchBuffer* prefetch_buffer,
|
|
47
|
+
FilePrefetchBuffer* prefetch_buffer,
|
|
48
|
+
MemoryAllocator* allocator,
|
|
49
|
+
std::unique_ptr<BlobContents>* result,
|
|
48
50
|
uint64_t* bytes_read) const;
|
|
49
51
|
|
|
50
52
|
// offsets must be sorted in ascending order by caller.
|
|
51
|
-
void MultiGetBlob(
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
void MultiGetBlob(
|
|
54
|
+
const ReadOptions& read_options, MemoryAllocator* allocator,
|
|
55
|
+
autovector<std::pair<BlobReadRequest*, std::unique_ptr<BlobContents>>>&
|
|
56
|
+
blob_reqs,
|
|
57
|
+
uint64_t* bytes_read) const;
|
|
54
58
|
|
|
55
59
|
CompressionType GetCompressionType() const { return compression_type_; }
|
|
56
60
|
|
|
@@ -89,11 +93,10 @@ class BlobFileReader {
|
|
|
89
93
|
|
|
90
94
|
static Status UncompressBlobIfNeeded(const Slice& value_slice,
|
|
91
95
|
CompressionType compression_type,
|
|
96
|
+
MemoryAllocator* allocator,
|
|
92
97
|
SystemClock* clock,
|
|
93
98
|
Statistics* statistics,
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
static void SaveValue(const Slice& src, PinnableSlice* dst);
|
|
99
|
+
std::unique_ptr<BlobContents>* result);
|
|
97
100
|
|
|
98
101
|
std::unique_ptr<RandomAccessFileReader> file_reader_;
|
|
99
102
|
uint64_t file_size_;
|