@nxtedition/rocksdb 7.0.22 → 7.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +9 -4
- package/deps/rocksdb/rocksdb/CMakeLists.txt +5 -0
- package/deps/rocksdb/rocksdb/Makefile +6 -2
- package/deps/rocksdb/rocksdb/TARGETS +14 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +4 -1
- package/deps/rocksdb/rocksdb/cache/cache_helpers.h +20 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +2 -2
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +44 -31
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +491 -722
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +468 -2
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +51 -52
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +28 -16
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +12 -1
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +1 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +170 -36
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +63 -36
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +4 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +57 -38
- package/deps/rocksdb/rocksdb/db/blob/blob_read_request.h +58 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +164 -74
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +42 -29
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +419 -62
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +208 -8
- package/deps/rocksdb/rocksdb/db/c.cc +68 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +95 -2
- package/deps/rocksdb/rocksdb/db/column_family.cc +12 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +92 -15
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +76 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +52 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +30 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +126 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +203 -1584
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +93 -26
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +87 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +314 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +328 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +4 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +7 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +174 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +474 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +825 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +46 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +42 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +223 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +255 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +1253 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +32 -8
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -8
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +376 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +103 -78
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +4 -6
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +21 -6
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +19 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +91 -14
- package/deps/rocksdb/rocksdb/db/db_iter.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +33 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +79 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +5 -2
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +185 -0
- package/deps/rocksdb/rocksdb/db/dbformat.cc +1 -4
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -8
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +71 -29
- package/deps/rocksdb/rocksdb/db/internal_stats.h +160 -5
- package/deps/rocksdb/rocksdb/db/log_reader.cc +29 -3
- package/deps/rocksdb/rocksdb/db/log_reader.h +12 -3
- package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -3
- package/deps/rocksdb/rocksdb/db/version_edit.cc +6 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +93 -129
- package/deps/rocksdb/rocksdb/db/version_set.h +4 -4
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +42 -35
- package/deps/rocksdb/rocksdb/db/write_batch.cc +10 -2
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +10 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +140 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +12 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +46 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +27 -7
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +8 -0
- package/deps/rocksdb/rocksdb/env/env_posix.cc +14 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +130 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +7 -1
- package/deps/rocksdb/rocksdb/env/io_posix.cc +18 -50
- package/deps/rocksdb/rocksdb/env/io_posix.h +53 -6
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +8 -10
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +3 -7
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +239 -259
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +84 -19
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +24 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +31 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +11 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +37 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +13 -13
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -2
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +38 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +7 -1
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +17 -0
- package/deps/rocksdb/rocksdb/port/win/env_win.h +8 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +6 -3
- package/deps/rocksdb/rocksdb/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/src.mk +5 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +15 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +5 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -2
- package/deps/rocksdb/rocksdb/table/get_context.cc +1 -0
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +24 -4
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/util/compression.h +2 -0
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +18 -1
- package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +67 -4
- package/deps/rocksdb/rocksdb/util/threadpool_imp.h +8 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +15 -12
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -2
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb.gyp +5 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
|
@@ -1907,114 +1907,62 @@ Status Version::GetBlob(const ReadOptions& read_options, const Slice& user_key,
|
|
|
1907
1907
|
|
|
1908
1908
|
void Version::MultiGetBlob(
|
|
1909
1909
|
const ReadOptions& read_options, MultiGetRange& range,
|
|
1910
|
-
std::unordered_map<uint64_t,
|
|
1911
|
-
|
|
1912
|
-
Status s = Status::Incomplete("Cannot read blob(s): no disk I/O allowed");
|
|
1913
|
-
for (const auto& elem : blob_rqs) {
|
|
1914
|
-
for (const auto& blob_rq : elem.second) {
|
|
1915
|
-
const KeyContext& key_context = blob_rq.second;
|
|
1916
|
-
assert(key_context.s);
|
|
1917
|
-
assert(key_context.s->ok());
|
|
1918
|
-
*(key_context.s) = s;
|
|
1919
|
-
assert(key_context.get_context);
|
|
1920
|
-
auto& get_context = *(key_context.get_context);
|
|
1921
|
-
get_context.MarkKeyMayExist();
|
|
1922
|
-
}
|
|
1923
|
-
}
|
|
1924
|
-
return;
|
|
1925
|
-
}
|
|
1910
|
+
std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs) {
|
|
1911
|
+
assert(!blob_ctxs.empty());
|
|
1926
1912
|
|
|
1927
|
-
|
|
1928
|
-
Status status;
|
|
1913
|
+
autovector<BlobFileReadRequests> blob_reqs;
|
|
1929
1914
|
|
|
1930
|
-
for (auto&
|
|
1931
|
-
const
|
|
1915
|
+
for (auto& ctx : blob_ctxs) {
|
|
1916
|
+
const auto file_number = ctx.first;
|
|
1917
|
+
const auto blob_file_meta = storage_info_.GetBlobFileMetaData(file_number);
|
|
1932
1918
|
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
}
|
|
1939
|
-
continue;
|
|
1940
|
-
}
|
|
1919
|
+
autovector<BlobReadRequest> blob_reqs_in_file;
|
|
1920
|
+
BlobReadContexts& blobs_in_file = ctx.second;
|
|
1921
|
+
for (const auto& blob : blobs_in_file) {
|
|
1922
|
+
const BlobIndex& blob_index = blob.first;
|
|
1923
|
+
const KeyContext& key_context = blob.second;
|
|
1941
1924
|
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
assert(!status.ok() || blob_file_reader.GetValue());
|
|
1925
|
+
if (!blob_file_meta) {
|
|
1926
|
+
*key_context.s = Status::Corruption("Invalid blob file number");
|
|
1927
|
+
continue;
|
|
1928
|
+
}
|
|
1947
1929
|
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
*(key_context.s) = status;
|
|
1930
|
+
if (blob_index.HasTTL() || blob_index.IsInlined()) {
|
|
1931
|
+
*key_context.s =
|
|
1932
|
+
Status::Corruption("Unexpected TTL/inlined blob index");
|
|
1933
|
+
continue;
|
|
1953
1934
|
}
|
|
1954
|
-
continue;
|
|
1955
|
-
}
|
|
1956
1935
|
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1936
|
+
key_context.value->Reset();
|
|
1937
|
+
blob_reqs_in_file.emplace_back(
|
|
1938
|
+
key_context.ukey_with_ts, blob_index.offset(), blob_index.size(),
|
|
1939
|
+
blob_index.compression(), key_context.value, key_context.s);
|
|
1940
|
+
}
|
|
1941
|
+
if (blob_reqs_in_file.size() > 0) {
|
|
1942
|
+
const auto file_size = blob_file_meta->GetBlobFileSize();
|
|
1943
|
+
blob_reqs.emplace_back(file_number, file_size, blob_reqs_in_file);
|
|
1944
|
+
}
|
|
1945
|
+
}
|
|
1961
1946
|
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
[](const BlobReadRequest& lhs, const BlobReadRequest& rhs) -> bool {
|
|
1966
|
-
assert(lhs.first.file_number() == rhs.first.file_number());
|
|
1967
|
-
return lhs.first.offset() < rhs.first.offset();
|
|
1968
|
-
});
|
|
1947
|
+
if (blob_reqs.size() > 0) {
|
|
1948
|
+
blob_source_->MultiGetBlob(read_options, blob_reqs, /*bytes_read=*/nullptr);
|
|
1949
|
+
}
|
|
1969
1950
|
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
autovector<uint64_t> offsets;
|
|
1973
|
-
autovector<uint64_t> value_sizes;
|
|
1974
|
-
autovector<Status*> statuses;
|
|
1975
|
-
autovector<PinnableSlice*> values;
|
|
1951
|
+
for (auto& ctx : blob_ctxs) {
|
|
1952
|
+
BlobReadContexts& blobs_in_file = ctx.second;
|
|
1976
1953
|
for (const auto& blob : blobs_in_file) {
|
|
1977
|
-
const auto& blob_index = blob.first;
|
|
1978
1954
|
const KeyContext& key_context = blob.second;
|
|
1979
|
-
if (
|
|
1980
|
-
|
|
1981
|
-
Status::Corruption("Unexpected TTL/inlined blob index");
|
|
1982
|
-
continue;
|
|
1983
|
-
}
|
|
1984
|
-
const uint64_t key_size = key_context.ukey_with_ts.size();
|
|
1985
|
-
const uint64_t offset = blob_index.offset();
|
|
1986
|
-
const uint64_t value_size = blob_index.size();
|
|
1987
|
-
if (!IsValidBlobOffset(offset, key_size, value_size, file_size)) {
|
|
1988
|
-
*(key_context.s) = Status::Corruption("Invalid blob offset");
|
|
1989
|
-
continue;
|
|
1990
|
-
}
|
|
1991
|
-
if (blob_index.compression() != compression) {
|
|
1992
|
-
*(key_context.s) =
|
|
1993
|
-
Status::Corruption("Compression type mismatch when reading a blob");
|
|
1994
|
-
continue;
|
|
1995
|
-
}
|
|
1996
|
-
blob_read_key_contexts.emplace_back(std::cref(key_context));
|
|
1997
|
-
user_keys.emplace_back(std::cref(key_context.ukey_with_ts));
|
|
1998
|
-
offsets.push_back(blob_index.offset());
|
|
1999
|
-
value_sizes.push_back(blob_index.size());
|
|
2000
|
-
statuses.push_back(key_context.s);
|
|
2001
|
-
values.push_back(key_context.value);
|
|
2002
|
-
}
|
|
2003
|
-
blob_file_reader.GetValue()->MultiGetBlob(read_options, user_keys, offsets,
|
|
2004
|
-
value_sizes, statuses, values,
|
|
2005
|
-
/*bytes_read=*/nullptr);
|
|
2006
|
-
size_t num = blob_read_key_contexts.size();
|
|
2007
|
-
assert(num == user_keys.size());
|
|
2008
|
-
assert(num == offsets.size());
|
|
2009
|
-
assert(num == value_sizes.size());
|
|
2010
|
-
assert(num == statuses.size());
|
|
2011
|
-
assert(num == values.size());
|
|
2012
|
-
for (size_t i = 0; i < num; ++i) {
|
|
2013
|
-
if (statuses[i]->ok()) {
|
|
2014
|
-
range.AddValueSize(blob_read_key_contexts[i].get().value->size());
|
|
1955
|
+
if (key_context.s->ok()) {
|
|
1956
|
+
range.AddValueSize(key_context.value->size());
|
|
2015
1957
|
if (range.GetValueSize() > read_options.value_size_soft_limit) {
|
|
2016
|
-
*
|
|
1958
|
+
*key_context.s = Status::Aborted();
|
|
2017
1959
|
}
|
|
1960
|
+
} else if (key_context.s->IsIncomplete()) {
|
|
1961
|
+
// read_options.read_tier == kBlockCacheTier
|
|
1962
|
+
// Cannot read blob(s): no disk I/O allowed
|
|
1963
|
+
assert(key_context.get_context);
|
|
1964
|
+
auto& get_context = *(key_context.get_context);
|
|
1965
|
+
get_context.MarkKeyMayExist();
|
|
2018
1966
|
}
|
|
2019
1967
|
}
|
|
2020
1968
|
}
|
|
@@ -2253,7 +2201,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
2253
2201
|
|
|
2254
2202
|
MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end());
|
|
2255
2203
|
// blob_file => [[blob_idx, it], ...]
|
|
2256
|
-
std::unordered_map<uint64_t,
|
|
2204
|
+
std::unordered_map<uint64_t, BlobReadContexts> blob_ctxs;
|
|
2257
2205
|
int prev_level = -1;
|
|
2258
2206
|
|
|
2259
2207
|
while (!fp.IsSearchEnded()) {
|
|
@@ -2270,7 +2218,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
2270
2218
|
// Call MultiGetFromSST for looking up a single file
|
|
2271
2219
|
s = MultiGetFromSST(read_options, fp.CurrentFileRange(),
|
|
2272
2220
|
fp.GetHitFileLevel(), fp.IsHitFileLastInLevel(), f,
|
|
2273
|
-
|
|
2221
|
+
blob_ctxs, num_filter_read, num_index_read,
|
|
2274
2222
|
num_sst_read);
|
|
2275
2223
|
if (fp.GetHitFileLevel() == 0) {
|
|
2276
2224
|
dump_stats_for_l0_file = true;
|
|
@@ -2285,7 +2233,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
2285
2233
|
while (f != nullptr) {
|
|
2286
2234
|
mget_tasks.emplace_back(MultiGetFromSSTCoroutine(
|
|
2287
2235
|
read_options, fp.CurrentFileRange(), fp.GetHitFileLevel(),
|
|
2288
|
-
fp.IsHitFileLastInLevel(), f,
|
|
2236
|
+
fp.IsHitFileLastInLevel(), f, blob_ctxs, num_filter_read,
|
|
2289
2237
|
num_index_read, num_sst_read));
|
|
2290
2238
|
if (fp.KeyMaySpanNextFile()) {
|
|
2291
2239
|
break;
|
|
@@ -2358,8 +2306,8 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
2358
2306
|
num_level_read);
|
|
2359
2307
|
}
|
|
2360
2308
|
|
|
2361
|
-
if (s.ok() && !
|
|
2362
|
-
MultiGetBlob(read_options, keys_with_blobs_range,
|
|
2309
|
+
if (s.ok() && !blob_ctxs.empty()) {
|
|
2310
|
+
MultiGetBlob(read_options, keys_with_blobs_range, blob_ctxs);
|
|
2363
2311
|
}
|
|
2364
2312
|
|
|
2365
2313
|
// Process any left over keys
|
|
@@ -2709,6 +2657,16 @@ uint32_t GetExpiredTtlFilesCount(const ImmutableOptions& ioptions,
|
|
|
2709
2657
|
void VersionStorageInfo::ComputeCompactionScore(
|
|
2710
2658
|
const ImmutableOptions& immutable_options,
|
|
2711
2659
|
const MutableCFOptions& mutable_cf_options) {
|
|
2660
|
+
double total_downcompact_bytes = 0.0;
|
|
2661
|
+
// Historically, score is defined as actual bytes in a level divided by
|
|
2662
|
+
// the level's target size, and 1.0 is the threshold for triggering
|
|
2663
|
+
// compaction. Higher score means higher prioritization.
|
|
2664
|
+
// Now we keep the compaction triggering condition, but consider more
|
|
2665
|
+
// factors for priorization, while still keeping the 1.0 threshold.
|
|
2666
|
+
// In order to provide flexibility for reducing score while still
|
|
2667
|
+
// maintaining it to be over 1.0, we scale the original score by 10x
|
|
2668
|
+
// if it is larger than 1.0.
|
|
2669
|
+
const double kScoreScale = 10.0;
|
|
2712
2670
|
for (int level = 0; level <= MaxInputLevel(); level++) {
|
|
2713
2671
|
double score;
|
|
2714
2672
|
if (level == 0) {
|
|
@@ -2726,6 +2684,7 @@ void VersionStorageInfo::ComputeCompactionScore(
|
|
|
2726
2684
|
int num_sorted_runs = 0;
|
|
2727
2685
|
uint64_t total_size = 0;
|
|
2728
2686
|
for (auto* f : files_[level]) {
|
|
2687
|
+
total_downcompact_bytes += static_cast<double>(f->fd.GetFileSize());
|
|
2729
2688
|
if (!f->being_compacted) {
|
|
2730
2689
|
total_size += f->compensated_file_size;
|
|
2731
2690
|
num_sorted_runs++;
|
|
@@ -2789,18 +2748,40 @@ void VersionStorageInfo::ComputeCompactionScore(
|
|
|
2789
2748
|
}
|
|
2790
2749
|
score =
|
|
2791
2750
|
std::max(score, static_cast<double>(total_size) / l0_target_size);
|
|
2751
|
+
if (immutable_options.level_compaction_dynamic_level_bytes &&
|
|
2752
|
+
score > 1.0) {
|
|
2753
|
+
score *= kScoreScale;
|
|
2754
|
+
}
|
|
2792
2755
|
}
|
|
2793
2756
|
}
|
|
2794
2757
|
} else {
|
|
2795
2758
|
// Compute the ratio of current size to size limit.
|
|
2796
2759
|
uint64_t level_bytes_no_compacting = 0;
|
|
2760
|
+
uint64_t level_total_bytes = 0;
|
|
2797
2761
|
for (auto f : files_[level]) {
|
|
2762
|
+
level_total_bytes += f->fd.GetFileSize();
|
|
2798
2763
|
if (!f->being_compacted) {
|
|
2799
2764
|
level_bytes_no_compacting += f->compensated_file_size;
|
|
2800
2765
|
}
|
|
2801
2766
|
}
|
|
2802
|
-
|
|
2803
|
-
|
|
2767
|
+
if (!immutable_options.level_compaction_dynamic_level_bytes ||
|
|
2768
|
+
level_bytes_no_compacting < MaxBytesForLevel(level)) {
|
|
2769
|
+
score = static_cast<double>(level_bytes_no_compacting) /
|
|
2770
|
+
MaxBytesForLevel(level);
|
|
2771
|
+
} else {
|
|
2772
|
+
// If there are a large mount of data being compacted down to the
|
|
2773
|
+
// current level soon, we would de-prioritize compaction from
|
|
2774
|
+
// a level where the incoming data would be a large ratio. We do
|
|
2775
|
+
// it by dividing level size not by target level size, but
|
|
2776
|
+
// the target size and the incoming compaction bytes.
|
|
2777
|
+
score = static_cast<double>(level_bytes_no_compacting) /
|
|
2778
|
+
(MaxBytesForLevel(level) + total_downcompact_bytes) *
|
|
2779
|
+
kScoreScale;
|
|
2780
|
+
}
|
|
2781
|
+
if (level_total_bytes > MaxBytesForLevel(level)) {
|
|
2782
|
+
total_downcompact_bytes +=
|
|
2783
|
+
static_cast<double>(level_total_bytes - MaxBytesForLevel(level));
|
|
2784
|
+
}
|
|
2804
2785
|
}
|
|
2805
2786
|
compaction_level_[level] = level;
|
|
2806
2787
|
compaction_score_[level] = score;
|
|
@@ -3199,6 +3180,15 @@ void SortFileByOverlappingRatio(
|
|
|
3199
3180
|
|
|
3200
3181
|
std::partial_sort(temp->begin(), temp->begin() + num_to_sort, temp->end(),
|
|
3201
3182
|
[&](const Fsize& f1, const Fsize& f2) -> bool {
|
|
3183
|
+
// If score is the same, pick file with smaller keys.
|
|
3184
|
+
// This makes the algorithm more deterministic, and also
|
|
3185
|
+
// help the trivial move case to have more files to
|
|
3186
|
+
// extend.
|
|
3187
|
+
if (file_to_order[f1.file->fd.GetNumber()] ==
|
|
3188
|
+
file_to_order[f2.file->fd.GetNumber()]) {
|
|
3189
|
+
return icmp.Compare(f1.file->smallest,
|
|
3190
|
+
f2.file->smallest) < 0;
|
|
3191
|
+
}
|
|
3202
3192
|
return file_to_order[f1.file->fd.GetNumber()] <
|
|
3203
3193
|
file_to_order[f2.file->fd.GetNumber()];
|
|
3204
3194
|
});
|
|
@@ -3827,13 +3817,7 @@ void VersionStorageInfo::CalculateBaseBytes(const ImmutableOptions& ioptions,
|
|
|
3827
3817
|
// No compaction from L1+ needs to be scheduled.
|
|
3828
3818
|
base_level_ = num_levels_ - 1;
|
|
3829
3819
|
} else {
|
|
3830
|
-
uint64_t
|
|
3831
|
-
for (const auto& f : files_[0]) {
|
|
3832
|
-
l0_size += f->fd.GetFileSize();
|
|
3833
|
-
}
|
|
3834
|
-
|
|
3835
|
-
uint64_t base_bytes_max =
|
|
3836
|
-
std::max(options.max_bytes_for_level_base, l0_size);
|
|
3820
|
+
uint64_t base_bytes_max = options.max_bytes_for_level_base;
|
|
3837
3821
|
uint64_t base_bytes_min = static_cast<uint64_t>(
|
|
3838
3822
|
base_bytes_max / options.max_bytes_for_level_multiplier);
|
|
3839
3823
|
|
|
@@ -3875,26 +3859,6 @@ void VersionStorageInfo::CalculateBaseBytes(const ImmutableOptions& ioptions,
|
|
|
3875
3859
|
|
|
3876
3860
|
level_multiplier_ = options.max_bytes_for_level_multiplier;
|
|
3877
3861
|
assert(base_level_size > 0);
|
|
3878
|
-
if (l0_size > base_level_size &&
|
|
3879
|
-
(l0_size > options.max_bytes_for_level_base ||
|
|
3880
|
-
static_cast<int>(files_[0].size() / 2) >=
|
|
3881
|
-
options.level0_file_num_compaction_trigger)) {
|
|
3882
|
-
// We adjust the base level according to actual L0 size, and adjust
|
|
3883
|
-
// the level multiplier accordingly, when:
|
|
3884
|
-
// 1. the L0 size is larger than level size base, or
|
|
3885
|
-
// 2. number of L0 files reaches twice the L0->L1 compaction trigger
|
|
3886
|
-
// We don't do this otherwise to keep the LSM-tree structure stable
|
|
3887
|
-
// unless the L0 compaction is backlogged.
|
|
3888
|
-
base_level_size = l0_size;
|
|
3889
|
-
if (base_level_ == num_levels_ - 1) {
|
|
3890
|
-
level_multiplier_ = 1.0;
|
|
3891
|
-
} else {
|
|
3892
|
-
level_multiplier_ = std::pow(
|
|
3893
|
-
static_cast<double>(max_level_size) /
|
|
3894
|
-
static_cast<double>(base_level_size),
|
|
3895
|
-
1.0 / static_cast<double>(num_levels_ - base_level_ - 1));
|
|
3896
|
-
}
|
|
3897
|
-
}
|
|
3898
3862
|
|
|
3899
3863
|
uint64_t level_size = base_level_size;
|
|
3900
3864
|
for (int i = base_level_; i < num_levels_; i++) {
|
|
@@ -860,11 +860,11 @@ class Version {
|
|
|
860
860
|
FilePrefetchBuffer* prefetch_buffer, PinnableSlice* value,
|
|
861
861
|
uint64_t* bytes_read) const;
|
|
862
862
|
|
|
863
|
-
using
|
|
863
|
+
using BlobReadContext =
|
|
864
864
|
std::pair<BlobIndex, std::reference_wrapper<const KeyContext>>;
|
|
865
|
-
using
|
|
865
|
+
using BlobReadContexts = std::vector<BlobReadContext>;
|
|
866
866
|
void MultiGetBlob(const ReadOptions& read_options, MultiGetRange& range,
|
|
867
|
-
std::unordered_map<uint64_t,
|
|
867
|
+
std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs);
|
|
868
868
|
|
|
869
869
|
// Loads some stats information from files (if update_stats is set) and
|
|
870
870
|
// populates derived data structures. Call without mutex held. It needs to be
|
|
@@ -989,7 +989,7 @@ class Version {
|
|
|
989
989
|
/* ret_type */ Status, /* func_name */ MultiGetFromSST,
|
|
990
990
|
const ReadOptions& read_options, MultiGetRange file_range,
|
|
991
991
|
int hit_file_level, bool is_hit_file_last_in_level, FdWithKeyRange* f,
|
|
992
|
-
std::unordered_map<uint64_t,
|
|
992
|
+
std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs,
|
|
993
993
|
uint64_t& num_filter_read, uint64_t& num_index_read,
|
|
994
994
|
uint64_t& num_sst_read);
|
|
995
995
|
|
|
@@ -14,7 +14,7 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
14
14
|
DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
|
|
15
15
|
(const ReadOptions& read_options, MultiGetRange file_range, int hit_file_level,
|
|
16
16
|
bool is_hit_file_last_in_level, FdWithKeyRange* f,
|
|
17
|
-
std::unordered_map<uint64_t,
|
|
17
|
+
std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs,
|
|
18
18
|
uint64_t& num_filter_read, uint64_t& num_index_read, uint64_t& num_sst_read) {
|
|
19
19
|
bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
|
|
20
20
|
get_perf_context()->per_level_perf_context_enabled;
|
|
@@ -110,7 +110,7 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
|
|
|
110
110
|
Status tmp_s = blob_index.DecodeFrom(blob_index_slice);
|
|
111
111
|
if (tmp_s.ok()) {
|
|
112
112
|
const uint64_t blob_file_num = blob_index.file_number();
|
|
113
|
-
|
|
113
|
+
blob_ctxs[blob_file_num].emplace_back(
|
|
114
114
|
std::make_pair(blob_index, std::cref(*iter)));
|
|
115
115
|
} else {
|
|
116
116
|
*(iter->s) = tmp_s;
|
|
@@ -376,73 +376,80 @@ TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_1) {
|
|
|
376
376
|
ASSERT_EQ(2, vstorage_.base_level());
|
|
377
377
|
// level multiplier should be 3.5
|
|
378
378
|
ASSERT_EQ(vstorage_.level_multiplier(), 5.0);
|
|
379
|
-
// Level size should be around 30,000, 105,000, 367,500
|
|
380
379
|
ASSERT_EQ(40000U, vstorage_.MaxBytesForLevel(2));
|
|
381
380
|
ASSERT_EQ(51450U, vstorage_.MaxBytesForLevel(3));
|
|
382
381
|
ASSERT_EQ(257250U, vstorage_.MaxBytesForLevel(4));
|
|
382
|
+
|
|
383
|
+
vstorage_.ComputeCompactionScore(ioptions_, mutable_cf_options_);
|
|
384
|
+
// Only L0 hits compaction.
|
|
385
|
+
ASSERT_EQ(vstorage_.CompactionScoreLevel(0), 0);
|
|
383
386
|
}
|
|
384
387
|
|
|
385
388
|
TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_2) {
|
|
386
389
|
ioptions_.level_compaction_dynamic_level_bytes = true;
|
|
387
390
|
mutable_cf_options_.max_bytes_for_level_base = 10000;
|
|
388
391
|
mutable_cf_options_.max_bytes_for_level_multiplier = 5;
|
|
389
|
-
mutable_cf_options_.level0_file_num_compaction_trigger =
|
|
392
|
+
mutable_cf_options_.level0_file_num_compaction_trigger = 4;
|
|
390
393
|
|
|
391
394
|
Add(0, 11U, "1", "2", 10000U);
|
|
392
395
|
Add(0, 12U, "1", "2", 10000U);
|
|
393
396
|
Add(0, 13U, "1", "2", 10000U);
|
|
394
397
|
|
|
398
|
+
// Level size should be around 10,000, 10,290, 51,450, 257,250
|
|
395
399
|
Add(5, 4U, "1", "2", 1286250U);
|
|
396
|
-
Add(4, 5U, "1", "2",
|
|
397
|
-
Add(3, 6U, "1", "2",
|
|
398
|
-
Add(2, 7U, "1", "2",
|
|
400
|
+
Add(4, 5U, "1", "2", 258000U); // unadjusted score 1.003
|
|
401
|
+
Add(3, 6U, "1", "2", 53000U); // unadjusted score 1.03
|
|
402
|
+
Add(2, 7U, "1", "2", 20000U); // unadjusted score 1.94
|
|
399
403
|
|
|
400
404
|
UpdateVersionStorageInfo();
|
|
401
405
|
|
|
402
406
|
ASSERT_EQ(0, logger_->log_count);
|
|
403
|
-
ASSERT_EQ(
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
407
|
+
ASSERT_EQ(1, vstorage_.base_level());
|
|
408
|
+
ASSERT_EQ(10000U, vstorage_.MaxBytesForLevel(1));
|
|
409
|
+
ASSERT_EQ(10290U, vstorage_.MaxBytesForLevel(2));
|
|
410
|
+
ASSERT_EQ(51450U, vstorage_.MaxBytesForLevel(3));
|
|
411
|
+
ASSERT_EQ(257250U, vstorage_.MaxBytesForLevel(4));
|
|
412
|
+
|
|
413
|
+
vstorage_.ComputeCompactionScore(ioptions_, mutable_cf_options_);
|
|
414
|
+
// Although L2 and l3 have higher unadjusted compaction score, considering
|
|
415
|
+
// a relatively large L0 being compacted down soon, L4 is picked up for
|
|
416
|
+
// compaction.
|
|
417
|
+
// L0 is still picked up for oversizing.
|
|
418
|
+
ASSERT_EQ(0, vstorage_.CompactionScoreLevel(0));
|
|
419
|
+
ASSERT_EQ(4, vstorage_.CompactionScoreLevel(1));
|
|
413
420
|
}
|
|
414
421
|
|
|
415
422
|
TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_3) {
|
|
416
423
|
ioptions_.level_compaction_dynamic_level_bytes = true;
|
|
417
|
-
mutable_cf_options_.max_bytes_for_level_base =
|
|
424
|
+
mutable_cf_options_.max_bytes_for_level_base = 20000;
|
|
418
425
|
mutable_cf_options_.max_bytes_for_level_multiplier = 5;
|
|
419
|
-
mutable_cf_options_.level0_file_num_compaction_trigger =
|
|
426
|
+
mutable_cf_options_.level0_file_num_compaction_trigger = 5;
|
|
420
427
|
|
|
421
|
-
Add(0, 11U, "1", "2",
|
|
422
|
-
Add(0, 12U, "1", "2",
|
|
423
|
-
Add(0, 13U, "1", "2",
|
|
424
|
-
Add(0, 14U, "1", "2",
|
|
425
|
-
Add(0, 15U, "1", "2", 5000U);
|
|
426
|
-
Add(0, 16U, "1", "2", 5000U);
|
|
428
|
+
Add(0, 11U, "1", "2", 2500U);
|
|
429
|
+
Add(0, 12U, "1", "2", 2500U);
|
|
430
|
+
Add(0, 13U, "1", "2", 2500U);
|
|
431
|
+
Add(0, 14U, "1", "2", 2500U);
|
|
427
432
|
|
|
433
|
+
// Level size should be around 20,000, 53000, 258000
|
|
428
434
|
Add(5, 4U, "1", "2", 1286250U);
|
|
429
|
-
Add(4, 5U, "1", "2",
|
|
430
|
-
Add(3, 6U, "1", "2",
|
|
431
|
-
Add(2, 7U, "1", "2",
|
|
435
|
+
Add(4, 5U, "1", "2", 260000U); // Unadjusted score 1.01, adjusted about 4.3
|
|
436
|
+
Add(3, 6U, "1", "2", 85000U); // Unadjusted score 1.42, adjusted about 11.6
|
|
437
|
+
Add(2, 7U, "1", "2", 30000); // Unadjusted score 1.5, adjusted about 10.0
|
|
432
438
|
|
|
433
439
|
UpdateVersionStorageInfo();
|
|
434
440
|
|
|
435
441
|
ASSERT_EQ(0, logger_->log_count);
|
|
436
442
|
ASSERT_EQ(2, vstorage_.base_level());
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
//
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
443
|
+
ASSERT_EQ(20000U, vstorage_.MaxBytesForLevel(2));
|
|
444
|
+
|
|
445
|
+
vstorage_.ComputeCompactionScore(ioptions_, mutable_cf_options_);
|
|
446
|
+
// Although L2 has higher unadjusted compaction score, considering
|
|
447
|
+
// a relatively large L0 being compacted down soon, L3 is picked up for
|
|
448
|
+
// compaction.
|
|
449
|
+
|
|
450
|
+
ASSERT_EQ(3, vstorage_.CompactionScoreLevel(0));
|
|
451
|
+
ASSERT_EQ(2, vstorage_.CompactionScoreLevel(1));
|
|
452
|
+
ASSERT_EQ(4, vstorage_.CompactionScoreLevel(2));
|
|
446
453
|
}
|
|
447
454
|
|
|
448
455
|
TEST_F(VersionStorageInfoTest, EstimateLiveDataSize) {
|
|
@@ -3063,7 +3063,8 @@ size_t WriteBatchInternal::AppendedByteSize(size_t leftByteSize,
|
|
|
3063
3063
|
}
|
|
3064
3064
|
|
|
3065
3065
|
Status WriteBatchInternal::UpdateProtectionInfo(WriteBatch* wb,
|
|
3066
|
-
size_t bytes_per_key
|
|
3066
|
+
size_t bytes_per_key,
|
|
3067
|
+
uint64_t* checksum) {
|
|
3067
3068
|
if (bytes_per_key == 0) {
|
|
3068
3069
|
if (wb->prot_info_ != nullptr) {
|
|
3069
3070
|
wb->prot_info_.reset();
|
|
@@ -3076,7 +3077,14 @@ Status WriteBatchInternal::UpdateProtectionInfo(WriteBatch* wb,
|
|
|
3076
3077
|
if (wb->prot_info_ == nullptr) {
|
|
3077
3078
|
wb->prot_info_.reset(new WriteBatch::ProtectionInfo());
|
|
3078
3079
|
ProtectionInfoUpdater prot_info_updater(wb->prot_info_.get());
|
|
3079
|
-
|
|
3080
|
+
Status s = wb->Iterate(&prot_info_updater);
|
|
3081
|
+
if (s.ok() && checksum != nullptr) {
|
|
3082
|
+
uint64_t expected_hash = XXH3_64bits(wb->rep_.data(), wb->rep_.size());
|
|
3083
|
+
if (expected_hash != *checksum) {
|
|
3084
|
+
return Status::Corruption("Write batch content corrupted.");
|
|
3085
|
+
}
|
|
3086
|
+
}
|
|
3087
|
+
return s;
|
|
3080
3088
|
} else {
|
|
3081
3089
|
// Already protected.
|
|
3082
3090
|
return Status::OK();
|
|
@@ -240,7 +240,10 @@ class WriteBatchInternal {
|
|
|
240
240
|
return wb.has_key_with_ts_;
|
|
241
241
|
}
|
|
242
242
|
|
|
243
|
-
|
|
243
|
+
// Update per-key value protection information on this write batch.
|
|
244
|
+
// If checksum is provided, the batch content is verfied against the checksum.
|
|
245
|
+
static Status UpdateProtectionInfo(WriteBatch* wb, size_t bytes_per_key,
|
|
246
|
+
uint64_t* checksum = nullptr);
|
|
244
247
|
};
|
|
245
248
|
|
|
246
249
|
// LocalSavePoint is similar to a scope guard
|
|
@@ -148,7 +148,7 @@ void DbVerificationThread(void* v) {
|
|
|
148
148
|
}
|
|
149
149
|
}
|
|
150
150
|
|
|
151
|
-
void
|
|
151
|
+
void TimestampedSnapshotsThread(void* v) {
|
|
152
152
|
assert(FLAGS_create_timestamped_snapshot_one_in > 0);
|
|
153
153
|
auto* thread = reinterpret_cast<ThreadState*>(v);
|
|
154
154
|
assert(thread);
|
|
@@ -169,6 +169,14 @@ void SnapshotGcThread(void* v) {
|
|
|
169
169
|
}
|
|
170
170
|
|
|
171
171
|
uint64_t now = db_stress_env->NowNanos();
|
|
172
|
+
std::pair<Status, std::shared_ptr<const Snapshot>> res =
|
|
173
|
+
stress_test->CreateTimestampedSnapshot(now);
|
|
174
|
+
if (res.first.ok()) {
|
|
175
|
+
assert(res.second);
|
|
176
|
+
assert(res.second->GetTimestamp() == now);
|
|
177
|
+
} else {
|
|
178
|
+
assert(!res.second);
|
|
179
|
+
}
|
|
172
180
|
constexpr uint64_t time_diff = static_cast<uint64_t>(1000) * 1000 * 1000;
|
|
173
181
|
stress_test->ReleaseOldTimestampedSnapshots(now - time_diff);
|
|
174
182
|
|
|
@@ -267,15 +275,13 @@ uint32_t GetValueBase(Slice s) {
|
|
|
267
275
|
return res;
|
|
268
276
|
}
|
|
269
277
|
|
|
270
|
-
std::string
|
|
278
|
+
std::string GetNowNanos() {
|
|
271
279
|
uint64_t t = db_stress_env->NowNanos();
|
|
272
280
|
std::string ret;
|
|
273
281
|
PutFixed64(&ret, t);
|
|
274
282
|
return ret;
|
|
275
283
|
}
|
|
276
284
|
|
|
277
|
-
std::string GenerateTimestampForRead() { return NowNanosStr(); }
|
|
278
|
-
|
|
279
285
|
namespace {
|
|
280
286
|
|
|
281
287
|
class MyXXH64Checksum : public FileChecksumGenerator {
|
|
@@ -110,6 +110,7 @@ DECLARE_int32(open_files);
|
|
|
110
110
|
DECLARE_int64(compressed_cache_size);
|
|
111
111
|
DECLARE_int32(compressed_cache_numshardbits);
|
|
112
112
|
DECLARE_int32(compaction_style);
|
|
113
|
+
DECLARE_int32(compaction_pri);
|
|
113
114
|
DECLARE_int32(num_levels);
|
|
114
115
|
DECLARE_int32(level0_file_num_compaction_trigger);
|
|
115
116
|
DECLARE_int32(level0_slowdown_writes_trigger);
|
|
@@ -592,7 +593,7 @@ extern void PoolSizeChangeThread(void* v);
|
|
|
592
593
|
|
|
593
594
|
extern void DbVerificationThread(void* v);
|
|
594
595
|
|
|
595
|
-
extern void
|
|
596
|
+
extern void TimestampedSnapshotsThread(void* v);
|
|
596
597
|
|
|
597
598
|
extern void PrintKeyValue(int cf, uint64_t key, const char* value, size_t sz);
|
|
598
599
|
|
|
@@ -612,8 +613,7 @@ extern void CheckAndSetOptionsForMultiOpsTxnStressTest();
|
|
|
612
613
|
extern void InitializeHotKeyGenerator(double alpha);
|
|
613
614
|
extern int64_t GetOneHotKeyID(double rand_seed, int64_t max_key);
|
|
614
615
|
|
|
615
|
-
extern std::string
|
|
616
|
-
extern std::string NowNanosStr();
|
|
616
|
+
extern std::string GetNowNanos();
|
|
617
617
|
|
|
618
618
|
std::shared_ptr<FileChecksumGenFactory> GetFileChecksumImpl(
|
|
619
619
|
const std::string& name);
|
|
@@ -105,9 +105,10 @@ bool RunStressTest(StressTest* stress) {
|
|
|
105
105
|
&continuous_verification_thread);
|
|
106
106
|
}
|
|
107
107
|
|
|
108
|
-
ThreadState
|
|
108
|
+
ThreadState timestamped_snapshots_thread(0, &shared);
|
|
109
109
|
if (FLAGS_create_timestamped_snapshot_one_in > 0) {
|
|
110
|
-
db_stress_env->StartThread(
|
|
110
|
+
db_stress_env->StartThread(TimestampedSnapshotsThread,
|
|
111
|
+
×tamped_snapshots_thread);
|
|
111
112
|
}
|
|
112
113
|
|
|
113
114
|
// Each thread goes through the following states:
|
|
@@ -200,6 +200,10 @@ DEFINE_int32(
|
|
|
200
200
|
DEFINE_int32(compaction_style, ROCKSDB_NAMESPACE::Options().compaction_style,
|
|
201
201
|
"");
|
|
202
202
|
|
|
203
|
+
DEFINE_int32(compaction_pri, ROCKSDB_NAMESPACE::Options().compaction_pri,
|
|
204
|
+
"Which file from a level should be picked to merge to the next "
|
|
205
|
+
"level in level-based compaction");
|
|
206
|
+
|
|
203
207
|
DEFINE_int32(num_levels, ROCKSDB_NAMESPACE::Options().num_levels,
|
|
204
208
|
"Number of levels in the DB");
|
|
205
209
|
|
|
@@ -75,7 +75,8 @@ class SharedState {
|
|
|
75
75
|
should_stop_test_(false),
|
|
76
76
|
no_overwrite_ids_(GenerateNoOverwriteIds()),
|
|
77
77
|
expected_state_manager_(nullptr),
|
|
78
|
-
printing_verification_results_(false)
|
|
78
|
+
printing_verification_results_(false),
|
|
79
|
+
start_timestamp_(Env::Default()->NowNanos()) {
|
|
79
80
|
Status status;
|
|
80
81
|
// TODO: We should introduce a way to explicitly disable verification
|
|
81
82
|
// during shutdown. When that is disabled and FLAGS_expected_values_dir
|
|
@@ -303,6 +304,8 @@ class SharedState {
|
|
|
303
304
|
printing_verification_results_.store(false, std::memory_order_relaxed);
|
|
304
305
|
}
|
|
305
306
|
|
|
307
|
+
uint64_t GetStartTimestamp() const { return start_timestamp_; }
|
|
308
|
+
|
|
306
309
|
private:
|
|
307
310
|
static void IgnoreReadErrorCallback(void*) {
|
|
308
311
|
ignore_read_error = true;
|
|
@@ -365,6 +368,7 @@ class SharedState {
|
|
|
365
368
|
// and storing it in the container may require copying depending on the impl.
|
|
366
369
|
std::vector<std::unique_ptr<port::Mutex[]>> key_locks_;
|
|
367
370
|
std::atomic<bool> printing_verification_results_;
|
|
371
|
+
const uint64_t start_timestamp_;
|
|
368
372
|
};
|
|
369
373
|
|
|
370
374
|
// Per-thread state for concurrent executions of the same benchmark.
|