@nxtedition/rocksdb 7.0.12 → 7.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/Makefile +3 -0
- package/deps/rocksdb/rocksdb/TARGETS +6 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +12 -7
- package/deps/rocksdb/rocksdb/cache/cache_key.h +2 -0
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +18 -6
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +13 -5
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +89 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -28
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +147 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +30 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +403 -30
- package/deps/rocksdb/rocksdb/db/c.cc +159 -5
- package/deps/rocksdb/rocksdb/db/c_test.c +108 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +2 -1
- package/deps/rocksdb/rocksdb/db/column_family.h +7 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +22 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +6 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +15 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +35 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +55 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +21 -19
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +60 -1
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +247 -6
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +10 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +9 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -0
- package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +54 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +50 -2
- package/deps/rocksdb/rocksdb/db/db_iter.h +2 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +56 -25
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_test.cc +9 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -8
- package/deps/rocksdb/rocksdb/db/flush_job.h +1 -1
- package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
- package/deps/rocksdb/rocksdb/db/memtable.cc +103 -93
- package/deps/rocksdb/rocksdb/db/memtable.h +3 -3
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +7 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
- package/deps/rocksdb/rocksdb/db/version_set.cc +13 -5
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +213 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -7
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +154 -2
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +8 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +21 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +31 -4
- package/deps/rocksdb/rocksdb/env/env_test.cc +2 -2
- package/deps/rocksdb/rocksdb/env/fs_remap.cc +4 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +136 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +23 -23
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +5 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +7 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +19 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +1 -6
- package/deps/rocksdb/rocksdb/options/db_options.h +0 -1
- package/deps/rocksdb/rocksdb/options/options.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +4 -4
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +1 -1
- package/deps/rocksdb/rocksdb/src.mk +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +5 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +16 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +13 -7
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +7 -3
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -17
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +15 -9
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +32 -16
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +28 -18
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +15 -6
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +16 -7
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -1
- package/deps/rocksdb/rocksdb/table/get_context.cc +27 -6
- package/deps/rocksdb/rocksdb/table/get_context.h +2 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +5 -5
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +46 -0
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -1
- package/deps/rocksdb/rocksdb/util/mutexlock.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
- package/package.json +1 -1
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -1383,6 +1383,9 @@ db_blob_compaction_test: $(OBJ_DIR)/db/blob/db_blob_compaction_test.o $(TEST_LIB
|
|
|
1383
1383
|
db_readonly_with_timestamp_test: $(OBJ_DIR)/db/db_readonly_with_timestamp_test.o $(TEST_LIBRARY) $(LIBRARY)
|
|
1384
1384
|
$(AM_LINK)
|
|
1385
1385
|
|
|
1386
|
+
db_wide_basic_test: $(OBJ_DIR)/db/wide/db_wide_basic_test.o $(TEST_LIBRARY) $(LIBRARY)
|
|
1387
|
+
$(AM_LINK)
|
|
1388
|
+
|
|
1386
1389
|
db_with_timestamp_basic_test: $(OBJ_DIR)/db/db_with_timestamp_basic_test.o $(TEST_LIBRARY) $(LIBRARY)
|
|
1387
1390
|
$(AM_LINK)
|
|
1388
1391
|
|
|
@@ -5240,6 +5240,12 @@ cpp_unittest_wrapper(name="db_wal_test",
|
|
|
5240
5240
|
extra_compiler_flags=[])
|
|
5241
5241
|
|
|
5242
5242
|
|
|
5243
|
+
cpp_unittest_wrapper(name="db_wide_basic_test",
|
|
5244
|
+
srcs=["db/wide/db_wide_basic_test.cc"],
|
|
5245
|
+
deps=[":rocksdb_test_lib"],
|
|
5246
|
+
extra_compiler_flags=[])
|
|
5247
|
+
|
|
5248
|
+
|
|
5243
5249
|
cpp_unittest_wrapper(name="db_with_timestamp_basic_test",
|
|
5244
5250
|
srcs=["db/db_with_timestamp_basic_test.cc"],
|
|
5245
5251
|
deps=[":rocksdb_test_lib"],
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
4
|
// (found in the LICENSE.Apache file in the root directory).
|
|
5
5
|
|
|
6
|
+
#include "cache_key.h"
|
|
6
7
|
#ifdef GFLAGS
|
|
7
8
|
#include <cinttypes>
|
|
8
9
|
#include <cstddef>
|
|
@@ -214,7 +215,8 @@ struct KeyGen {
|
|
|
214
215
|
EncodeFixed64(key_data + 10, key);
|
|
215
216
|
key_data[18] = char{4};
|
|
216
217
|
EncodeFixed64(key_data + 19, key);
|
|
217
|
-
|
|
218
|
+
assert(27 >= kCacheKeySize);
|
|
219
|
+
return Slice(&key_data[off], kCacheKeySize);
|
|
218
220
|
}
|
|
219
221
|
};
|
|
220
222
|
|
|
@@ -321,8 +323,9 @@ class CacheBench {
|
|
|
321
323
|
Random64 rnd(1);
|
|
322
324
|
KeyGen keygen;
|
|
323
325
|
for (uint64_t i = 0; i < 2 * FLAGS_cache_size; i += FLAGS_value_bytes) {
|
|
324
|
-
cache_->Insert(keygen.GetRand(rnd, max_key_, max_log_),
|
|
325
|
-
|
|
326
|
+
Status s = cache_->Insert(keygen.GetRand(rnd, max_key_, max_log_),
|
|
327
|
+
createValue(rnd), &helper1, FLAGS_value_bytes);
|
|
328
|
+
assert(s.ok());
|
|
326
329
|
}
|
|
327
330
|
}
|
|
328
331
|
|
|
@@ -542,8 +545,9 @@ class CacheBench {
|
|
|
542
545
|
FLAGS_value_bytes);
|
|
543
546
|
} else {
|
|
544
547
|
// do insert
|
|
545
|
-
cache_->Insert(key, createValue(thread->rnd), &helper2,
|
|
546
|
-
|
|
548
|
+
Status s = cache_->Insert(key, createValue(thread->rnd), &helper2,
|
|
549
|
+
FLAGS_value_bytes, &handle);
|
|
550
|
+
assert(s.ok());
|
|
547
551
|
}
|
|
548
552
|
} else if (random_op < insert_threshold_) {
|
|
549
553
|
if (handle) {
|
|
@@ -551,8 +555,9 @@ class CacheBench {
|
|
|
551
555
|
handle = nullptr;
|
|
552
556
|
}
|
|
553
557
|
// do insert
|
|
554
|
-
cache_->Insert(key, createValue(thread->rnd), &helper3,
|
|
555
|
-
|
|
558
|
+
Status s = cache_->Insert(key, createValue(thread->rnd), &helper3,
|
|
559
|
+
FLAGS_value_bytes, &handle);
|
|
560
|
+
assert(s.ok());
|
|
556
561
|
} else if (random_op < lookup_threshold_) {
|
|
557
562
|
if (handle) {
|
|
558
563
|
cache_->Release(handle);
|
|
@@ -65,6 +65,8 @@ class CacheKey {
|
|
|
65
65
|
uint64_t offset_etc64_;
|
|
66
66
|
};
|
|
67
67
|
|
|
68
|
+
constexpr uint8_t kCacheKeySize = static_cast<uint8_t>(sizeof(CacheKey));
|
|
69
|
+
|
|
68
70
|
// A file-specific generator of cache keys, sometimes referred to as the
|
|
69
71
|
// "base" cache key for a file because all the cache keys for various offsets
|
|
70
72
|
// within the file are computed using simple arithmetic. The basis for the
|
|
@@ -192,8 +192,7 @@ LRUCacheShard::LRUCacheShard(size_t capacity, size_t estimated_value_size,
|
|
|
192
192
|
: capacity_(capacity),
|
|
193
193
|
strict_capacity_limit_(strict_capacity_limit),
|
|
194
194
|
table_(
|
|
195
|
-
CalcHashBits(capacity, estimated_value_size, metadata_charge_policy)
|
|
196
|
-
static_cast<uint8_t>(ceil(log2(1.0 / kLoadFactor)))),
|
|
195
|
+
CalcHashBits(capacity, estimated_value_size, metadata_charge_policy)),
|
|
197
196
|
usage_(0),
|
|
198
197
|
lru_usage_(0) {
|
|
199
198
|
set_metadata_charge_policy(metadata_charge_policy);
|
|
@@ -295,16 +294,29 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
|
|
|
295
294
|
}
|
|
296
295
|
}
|
|
297
296
|
|
|
298
|
-
|
|
299
|
-
size_t
|
|
297
|
+
size_t LRUCacheShard::CalcEstimatedHandleCharge(
|
|
298
|
+
size_t estimated_value_size,
|
|
300
299
|
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
301
300
|
LRUHandle h;
|
|
302
301
|
h.CalcTotalCharge(estimated_value_size, metadata_charge_policy);
|
|
303
|
-
|
|
302
|
+
return h.total_charge;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
uint8_t LRUCacheShard::CalcHashBits(
|
|
306
|
+
size_t capacity, size_t estimated_value_size,
|
|
307
|
+
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
308
|
+
size_t handle_charge =
|
|
309
|
+
CalcEstimatedHandleCharge(estimated_value_size, metadata_charge_policy);
|
|
310
|
+
size_t num_entries =
|
|
311
|
+
static_cast<size_t>(capacity / (kLoadFactor * handle_charge));
|
|
312
|
+
|
|
313
|
+
// Compute the ceiling of log2(num_entries). If num_entries == 0, return 0.
|
|
304
314
|
uint8_t num_hash_bits = 0;
|
|
305
|
-
|
|
315
|
+
size_t num_entries_copy = num_entries;
|
|
316
|
+
while (num_entries_copy >>= 1) {
|
|
306
317
|
++num_hash_bits;
|
|
307
318
|
}
|
|
319
|
+
num_hash_bits += size_t{1} << num_hash_bits < num_entries ? 1 : 0;
|
|
308
320
|
return num_hash_bits;
|
|
309
321
|
}
|
|
310
322
|
|
|
@@ -22,10 +22,14 @@
|
|
|
22
22
|
#include "util/distributed_mutex.h"
|
|
23
23
|
|
|
24
24
|
namespace ROCKSDB_NAMESPACE {
|
|
25
|
+
|
|
25
26
|
namespace fast_lru_cache {
|
|
26
27
|
|
|
27
|
-
//
|
|
28
|
+
// Forward declaration of friend class.
|
|
29
|
+
class FastLRUCacheTest;
|
|
28
30
|
|
|
31
|
+
// LRU cache implementation using an open-address hash table.
|
|
32
|
+
//
|
|
29
33
|
// Every slot in the hash table is an LRUHandle. Because handles can be
|
|
30
34
|
// referenced externally, we can't discard them immediately once they are
|
|
31
35
|
// deleted (via a delete or an LRU eviction) or replaced by a new version
|
|
@@ -51,7 +55,7 @@ namespace fast_lru_cache {
|
|
|
51
55
|
// - Not R --> R: When an unreferenced element becomes referenced. This can only
|
|
52
56
|
// happen if the element is V, since references to an element can only be
|
|
53
57
|
// created when it's visible.
|
|
54
|
-
|
|
58
|
+
//
|
|
55
59
|
// Internally, the cache uses an open-addressed hash table to index the handles.
|
|
56
60
|
// We use tombstone counters to keep track of displacements.
|
|
57
61
|
// Because of the tombstones and the two possible visibility states of an
|
|
@@ -70,9 +74,6 @@ namespace fast_lru_cache {
|
|
|
70
74
|
// slot. In any case, the slot becomes available. When a handle is inserted
|
|
71
75
|
// into that slot, it becomes a visible element again.
|
|
72
76
|
|
|
73
|
-
constexpr uint8_t kCacheKeySize =
|
|
74
|
-
static_cast<uint8_t>(sizeof(ROCKSDB_NAMESPACE::CacheKey));
|
|
75
|
-
|
|
76
77
|
// The load factor p is a real number in (0, 1) such that at all
|
|
77
78
|
// times at most a fraction p of all slots, without counting tombstones,
|
|
78
79
|
// are occupied by elements. This means that the probability that a
|
|
@@ -367,6 +368,8 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
|
|
|
367
368
|
|
|
368
369
|
private:
|
|
369
370
|
friend class LRUCache;
|
|
371
|
+
friend class FastLRUCacheTest;
|
|
372
|
+
|
|
370
373
|
void LRU_Remove(LRUHandle* e);
|
|
371
374
|
void LRU_Insert(LRUHandle* e);
|
|
372
375
|
|
|
@@ -376,6 +379,11 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
|
|
|
376
379
|
// holding the mutex_.
|
|
377
380
|
void EvictFromLRU(size_t charge, autovector<LRUHandle>* deleted);
|
|
378
381
|
|
|
382
|
+
// Returns the charge of a single handle.
|
|
383
|
+
static size_t CalcEstimatedHandleCharge(
|
|
384
|
+
size_t estimated_value_size,
|
|
385
|
+
CacheMetadataChargePolicy metadata_charge_policy);
|
|
386
|
+
|
|
379
387
|
// Returns the number of bits used to hash an element in the hash
|
|
380
388
|
// table.
|
|
381
389
|
static uint8_t CalcHashBits(size_t capacity, size_t estimated_value_size,
|
|
@@ -206,6 +206,7 @@ TEST_F(LRUCacheTest, EntriesWithPriority) {
|
|
|
206
206
|
ValidateLRUList({"e", "f", "g", "Z", "d"}, 2);
|
|
207
207
|
}
|
|
208
208
|
|
|
209
|
+
namespace fast_lru_cache {
|
|
209
210
|
// TODO(guido) Consolidate the following FastLRUCache tests with
|
|
210
211
|
// that of LRUCache.
|
|
211
212
|
class FastLRUCacheTest : public testing::Test {
|
|
@@ -238,6 +239,38 @@ class FastLRUCacheTest : public testing::Test {
|
|
|
238
239
|
|
|
239
240
|
Status Insert(char key, size_t len) { return Insert(std::string(len, key)); }
|
|
240
241
|
|
|
242
|
+
size_t CalcEstimatedHandleChargeWrapper(
|
|
243
|
+
size_t estimated_value_size,
|
|
244
|
+
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
245
|
+
return fast_lru_cache::LRUCacheShard::CalcEstimatedHandleCharge(
|
|
246
|
+
estimated_value_size, metadata_charge_policy);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
uint8_t CalcHashBitsWrapper(
|
|
250
|
+
size_t capacity, size_t estimated_value_size,
|
|
251
|
+
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
252
|
+
return fast_lru_cache::LRUCacheShard::CalcHashBits(
|
|
253
|
+
capacity, estimated_value_size, metadata_charge_policy);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Maximum number of items that a shard can hold.
|
|
257
|
+
double CalcMaxOccupancy(size_t capacity, size_t estimated_value_size,
|
|
258
|
+
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
259
|
+
size_t handle_charge =
|
|
260
|
+
fast_lru_cache::LRUCacheShard::CalcEstimatedHandleCharge(
|
|
261
|
+
estimated_value_size, metadata_charge_policy);
|
|
262
|
+
return capacity / (fast_lru_cache::kLoadFactor * handle_charge);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
bool TableSizeIsAppropriate(uint8_t hash_bits, double max_occupancy) {
|
|
266
|
+
if (hash_bits == 0) {
|
|
267
|
+
return max_occupancy <= 1;
|
|
268
|
+
} else {
|
|
269
|
+
return (1 << hash_bits >= max_occupancy) &&
|
|
270
|
+
(1 << (hash_bits - 1) <= max_occupancy);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
241
274
|
private:
|
|
242
275
|
fast_lru_cache::LRUCacheShard* cache_ = nullptr;
|
|
243
276
|
};
|
|
@@ -253,6 +286,62 @@ TEST_F(FastLRUCacheTest, ValidateKeySize) {
|
|
|
253
286
|
EXPECT_NOK(Insert('f', 0));
|
|
254
287
|
}
|
|
255
288
|
|
|
289
|
+
TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
|
|
290
|
+
size_t capacity = 1024;
|
|
291
|
+
size_t estimated_value_size = 1;
|
|
292
|
+
CacheMetadataChargePolicy metadata_charge_policy = kDontChargeCacheMetadata;
|
|
293
|
+
double max_occupancy =
|
|
294
|
+
CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
|
|
295
|
+
uint8_t hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
296
|
+
metadata_charge_policy);
|
|
297
|
+
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
|
|
298
|
+
|
|
299
|
+
capacity = 1024;
|
|
300
|
+
estimated_value_size = 1;
|
|
301
|
+
metadata_charge_policy = kFullChargeCacheMetadata;
|
|
302
|
+
max_occupancy =
|
|
303
|
+
CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
|
|
304
|
+
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
305
|
+
metadata_charge_policy);
|
|
306
|
+
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
|
|
307
|
+
|
|
308
|
+
// No elements fit in cache.
|
|
309
|
+
capacity = 0;
|
|
310
|
+
estimated_value_size = 1;
|
|
311
|
+
metadata_charge_policy = kDontChargeCacheMetadata;
|
|
312
|
+
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
313
|
+
metadata_charge_policy);
|
|
314
|
+
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
|
|
315
|
+
|
|
316
|
+
// Set the capacity just below a single handle. Because the load factor is <
|
|
317
|
+
// 100% at least one handle will fit in the table.
|
|
318
|
+
estimated_value_size = 1;
|
|
319
|
+
size_t handle_charge = CalcEstimatedHandleChargeWrapper(
|
|
320
|
+
8192 /* estimated_value_size */, kDontChargeCacheMetadata);
|
|
321
|
+
capacity = handle_charge - 1;
|
|
322
|
+
// The load factor should be bounded away from 100%.
|
|
323
|
+
assert(static_cast<size_t>(capacity / fast_lru_cache::kLoadFactor) >
|
|
324
|
+
handle_charge);
|
|
325
|
+
metadata_charge_policy = kDontChargeCacheMetadata;
|
|
326
|
+
max_occupancy =
|
|
327
|
+
CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
|
|
328
|
+
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
329
|
+
metadata_charge_policy);
|
|
330
|
+
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
|
|
331
|
+
|
|
332
|
+
// Large capacity.
|
|
333
|
+
capacity = 31924172;
|
|
334
|
+
estimated_value_size = 321;
|
|
335
|
+
metadata_charge_policy = kFullChargeCacheMetadata;
|
|
336
|
+
max_occupancy =
|
|
337
|
+
CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
|
|
338
|
+
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
339
|
+
metadata_charge_policy);
|
|
340
|
+
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
} // namespace fast_lru_cache
|
|
344
|
+
|
|
256
345
|
class TestSecondaryCache : public SecondaryCache {
|
|
257
346
|
public:
|
|
258
347
|
// Specifies what action to take on a lookup for a particular key
|
|
@@ -443,50 +443,40 @@ void BlobFileReader::MultiGetBlob(
|
|
|
443
443
|
}
|
|
444
444
|
|
|
445
445
|
assert(s.ok());
|
|
446
|
+
|
|
447
|
+
uint64_t total_bytes = 0;
|
|
446
448
|
for (size_t i = 0; i < num_blobs; ++i) {
|
|
447
449
|
auto& req = read_reqs[i];
|
|
450
|
+
const auto& record_slice = req.result;
|
|
451
|
+
|
|
448
452
|
assert(statuses[i]);
|
|
449
|
-
if (req.status.ok() &&
|
|
453
|
+
if (req.status.ok() && record_slice.size() != req.len) {
|
|
450
454
|
req.status = IOStatus::Corruption("Failed to read data from blob file");
|
|
451
455
|
}
|
|
456
|
+
|
|
452
457
|
*statuses[i] = req.status;
|
|
453
|
-
|
|
458
|
+
if (!statuses[i]->ok()) {
|
|
459
|
+
continue;
|
|
460
|
+
}
|
|
454
461
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
462
|
+
// Verify checksums if enabled
|
|
463
|
+
if (read_options.verify_checksums) {
|
|
464
|
+
*statuses[i] = VerifyBlob(record_slice, user_keys[i], value_sizes[i]);
|
|
458
465
|
if (!statuses[i]->ok()) {
|
|
459
466
|
continue;
|
|
460
467
|
}
|
|
461
|
-
const Slice& record_slice = read_reqs[i].result;
|
|
462
|
-
s = VerifyBlob(record_slice, user_keys[i], value_sizes[i]);
|
|
463
|
-
if (!s.ok()) {
|
|
464
|
-
assert(statuses[i]);
|
|
465
|
-
*statuses[i] = s;
|
|
466
|
-
}
|
|
467
468
|
}
|
|
468
|
-
}
|
|
469
469
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
const Slice value_slice(record_slice.data() + adjustments[i],
|
|
477
|
-
value_sizes[i]);
|
|
478
|
-
s = UncompressBlobIfNeeded(value_slice, compression_type_, clock_,
|
|
479
|
-
statistics_, values[i]);
|
|
480
|
-
if (!s.ok()) {
|
|
481
|
-
*statuses[i] = s;
|
|
470
|
+
// Uncompress blob if needed
|
|
471
|
+
Slice value_slice(record_slice.data() + adjustments[i], value_sizes[i]);
|
|
472
|
+
*statuses[i] = UncompressBlobIfNeeded(value_slice, compression_type_,
|
|
473
|
+
clock_, statistics_, values[i]);
|
|
474
|
+
if (statuses[i]->ok()) {
|
|
475
|
+
total_bytes += record_slice.size();
|
|
482
476
|
}
|
|
483
477
|
}
|
|
484
478
|
|
|
485
479
|
if (bytes_read) {
|
|
486
|
-
uint64_t total_bytes = 0;
|
|
487
|
-
for (const auto& req : read_reqs) {
|
|
488
|
-
total_bytes += req.result.size();
|
|
489
|
-
}
|
|
490
480
|
*bytes_read = total_bytes;
|
|
491
481
|
}
|
|
492
482
|
}
|
|
@@ -9,7 +9,9 @@
|
|
|
9
9
|
#include <string>
|
|
10
10
|
|
|
11
11
|
#include "db/blob/blob_file_reader.h"
|
|
12
|
+
#include "db/blob/blob_log_format.h"
|
|
12
13
|
#include "options/cf_options.h"
|
|
14
|
+
#include "table/multiget_context.h"
|
|
13
15
|
|
|
14
16
|
namespace ROCKSDB_NAMESPACE {
|
|
15
17
|
|
|
@@ -98,9 +100,16 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
|
|
|
98
100
|
Slice key = cache_key.AsSlice();
|
|
99
101
|
s = GetBlobFromCache(key, &blob_entry);
|
|
100
102
|
if (s.ok() && blob_entry.GetValue()) {
|
|
101
|
-
|
|
103
|
+
// For consistency, the size of on-disk (possibly compressed) blob record
|
|
104
|
+
// is assigned to bytes_read.
|
|
102
105
|
if (bytes_read) {
|
|
103
|
-
|
|
106
|
+
uint64_t adjustment =
|
|
107
|
+
read_options.verify_checksums
|
|
108
|
+
? BlobLogRecord::CalculateAdjustmentForRecordHeader(
|
|
109
|
+
user_key.size())
|
|
110
|
+
: 0;
|
|
111
|
+
assert(offset >= adjustment);
|
|
112
|
+
*bytes_read = value_size + adjustment;
|
|
104
113
|
}
|
|
105
114
|
value->PinSelf(*blob_entry.GetValue());
|
|
106
115
|
return s;
|
|
@@ -152,6 +161,142 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
|
|
|
152
161
|
return s;
|
|
153
162
|
}
|
|
154
163
|
|
|
164
|
+
void BlobSource::MultiGetBlob(
|
|
165
|
+
const ReadOptions& read_options,
|
|
166
|
+
const autovector<std::reference_wrapper<const Slice>>& user_keys,
|
|
167
|
+
uint64_t file_number, uint64_t file_size,
|
|
168
|
+
const autovector<uint64_t>& offsets,
|
|
169
|
+
const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
|
|
170
|
+
autovector<PinnableSlice*>& blobs, uint64_t* bytes_read) {
|
|
171
|
+
size_t num_blobs = user_keys.size();
|
|
172
|
+
assert(num_blobs > 0);
|
|
173
|
+
assert(num_blobs <= MultiGetContext::MAX_BATCH_SIZE);
|
|
174
|
+
assert(num_blobs == offsets.size());
|
|
175
|
+
assert(num_blobs == value_sizes.size());
|
|
176
|
+
assert(num_blobs == statuses.size());
|
|
177
|
+
assert(num_blobs == blobs.size());
|
|
178
|
+
|
|
179
|
+
#ifndef NDEBUG
|
|
180
|
+
for (size_t i = 0; i < offsets.size() - 1; ++i) {
|
|
181
|
+
assert(offsets[i] <= offsets[i + 1]);
|
|
182
|
+
}
|
|
183
|
+
#endif // !NDEBUG
|
|
184
|
+
|
|
185
|
+
using Mask = uint64_t;
|
|
186
|
+
Mask cache_hit_mask = 0;
|
|
187
|
+
|
|
188
|
+
Status s;
|
|
189
|
+
uint64_t total_bytes = 0;
|
|
190
|
+
const OffsetableCacheKey base_cache_key(db_id_, db_session_id_, file_number,
|
|
191
|
+
file_size);
|
|
192
|
+
|
|
193
|
+
if (blob_cache_) {
|
|
194
|
+
size_t cached_blob_count = 0;
|
|
195
|
+
for (size_t i = 0; i < num_blobs; ++i) {
|
|
196
|
+
CachableEntry<std::string> blob_entry;
|
|
197
|
+
const CacheKey cache_key = base_cache_key.WithOffset(offsets[i]);
|
|
198
|
+
const Slice key = cache_key.AsSlice();
|
|
199
|
+
|
|
200
|
+
s = GetBlobFromCache(key, &blob_entry);
|
|
201
|
+
if (s.ok() && blob_entry.GetValue()) {
|
|
202
|
+
assert(statuses[i]);
|
|
203
|
+
*statuses[i] = s;
|
|
204
|
+
blobs[i]->PinSelf(*blob_entry.GetValue());
|
|
205
|
+
|
|
206
|
+
// Update the counter for the number of valid blobs read from the cache.
|
|
207
|
+
++cached_blob_count;
|
|
208
|
+
// For consistency, the size of each on-disk (possibly compressed) blob
|
|
209
|
+
// record is accumulated to total_bytes.
|
|
210
|
+
uint64_t adjustment =
|
|
211
|
+
read_options.verify_checksums
|
|
212
|
+
? BlobLogRecord::CalculateAdjustmentForRecordHeader(
|
|
213
|
+
user_keys[i].get().size())
|
|
214
|
+
: 0;
|
|
215
|
+
assert(offsets[i] >= adjustment);
|
|
216
|
+
total_bytes += value_sizes[i] + adjustment;
|
|
217
|
+
cache_hit_mask |= (Mask{1} << i); // cache hit
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// All blobs were read from the cache.
|
|
222
|
+
if (cached_blob_count == num_blobs) {
|
|
223
|
+
if (bytes_read) {
|
|
224
|
+
*bytes_read = total_bytes;
|
|
225
|
+
}
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const bool no_io = read_options.read_tier == kBlockCacheTier;
|
|
231
|
+
if (no_io) {
|
|
232
|
+
for (size_t i = 0; i < num_blobs; ++i) {
|
|
233
|
+
if (!(cache_hit_mask & (Mask{1} << i))) {
|
|
234
|
+
assert(statuses[i]);
|
|
235
|
+
*statuses[i] =
|
|
236
|
+
Status::Incomplete("Cannot read blob(s): no disk I/O allowed");
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
return;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
{
|
|
243
|
+
// Find the rest of blobs from the file since I/O is allowed.
|
|
244
|
+
autovector<std::reference_wrapper<const Slice>> _user_keys;
|
|
245
|
+
autovector<uint64_t> _offsets;
|
|
246
|
+
autovector<uint64_t> _value_sizes;
|
|
247
|
+
autovector<Status*> _statuses;
|
|
248
|
+
autovector<PinnableSlice*> _blobs;
|
|
249
|
+
uint64_t _bytes_read = 0;
|
|
250
|
+
|
|
251
|
+
for (size_t i = 0; i < num_blobs; ++i) {
|
|
252
|
+
if (!(cache_hit_mask & (Mask{1} << i))) {
|
|
253
|
+
_user_keys.emplace_back(user_keys[i]);
|
|
254
|
+
_offsets.push_back(offsets[i]);
|
|
255
|
+
_value_sizes.push_back(value_sizes[i]);
|
|
256
|
+
_statuses.push_back(statuses[i]);
|
|
257
|
+
_blobs.push_back(blobs[i]);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
CacheHandleGuard<BlobFileReader> blob_file_reader;
|
|
262
|
+
s = blob_file_cache_->GetBlobFileReader(file_number, &blob_file_reader);
|
|
263
|
+
if (!s.ok()) {
|
|
264
|
+
for (size_t i = 0; i < _blobs.size(); ++i) {
|
|
265
|
+
assert(_statuses[i]);
|
|
266
|
+
*_statuses[i] = s;
|
|
267
|
+
}
|
|
268
|
+
return;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
assert(blob_file_reader.GetValue());
|
|
272
|
+
|
|
273
|
+
blob_file_reader.GetValue()->MultiGetBlob(read_options, _user_keys,
|
|
274
|
+
_offsets, _value_sizes, _statuses,
|
|
275
|
+
_blobs, &_bytes_read);
|
|
276
|
+
|
|
277
|
+
if (read_options.fill_cache) {
|
|
278
|
+
// If filling cache is allowed and a cache is configured, try to put
|
|
279
|
+
// the blob(s) to the cache.
|
|
280
|
+
for (size_t i = 0; i < _blobs.size(); ++i) {
|
|
281
|
+
if (_statuses[i]->ok()) {
|
|
282
|
+
CachableEntry<std::string> blob_entry;
|
|
283
|
+
const CacheKey cache_key = base_cache_key.WithOffset(_offsets[i]);
|
|
284
|
+
const Slice key = cache_key.AsSlice();
|
|
285
|
+
s = PutBlobIntoCache(key, &blob_entry, _blobs[i]);
|
|
286
|
+
if (!s.ok()) {
|
|
287
|
+
*_statuses[i] = s;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
total_bytes += _bytes_read;
|
|
294
|
+
if (bytes_read) {
|
|
295
|
+
*bytes_read = total_bytes;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
155
300
|
bool BlobSource::TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
|
|
156
301
|
uint64_t offset) const {
|
|
157
302
|
const CacheKey cache_key = GetCacheKey(file_number, file_size, offset);
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
#include "rocksdb/cache.h"
|
|
14
14
|
#include "rocksdb/rocksdb_namespace.h"
|
|
15
15
|
#include "table/block_based/cachable_entry.h"
|
|
16
|
+
#include "util/autovector.h"
|
|
16
17
|
|
|
17
18
|
namespace ROCKSDB_NAMESPACE {
|
|
18
19
|
|
|
@@ -36,12 +37,41 @@ class BlobSource {
|
|
|
36
37
|
|
|
37
38
|
~BlobSource();
|
|
38
39
|
|
|
40
|
+
// Read a blob from the underlying cache or storage.
|
|
41
|
+
//
|
|
42
|
+
// If successful, returns ok and sets "*value" to the newly retrieved
|
|
43
|
+
// uncompressed blob. If there was an error while fetching the blob, sets
|
|
44
|
+
// "*value" to empty and returns a non-ok status.
|
|
45
|
+
//
|
|
46
|
+
// Note: For consistency, whether the blob is found in the cache or on disk,
|
|
47
|
+
// sets "*bytes_read" to the size of on-disk (possibly compressed) blob
|
|
48
|
+
// record.
|
|
39
49
|
Status GetBlob(const ReadOptions& read_options, const Slice& user_key,
|
|
40
50
|
uint64_t file_number, uint64_t offset, uint64_t file_size,
|
|
41
51
|
uint64_t value_size, CompressionType compression_type,
|
|
42
52
|
FilePrefetchBuffer* prefetch_buffer, PinnableSlice* value,
|
|
43
53
|
uint64_t* bytes_read);
|
|
44
54
|
|
|
55
|
+
// Read multiple blobs from the underlying cache or storage.
|
|
56
|
+
//
|
|
57
|
+
// If successful, returns ok and sets the elements of blobs to the newly
|
|
58
|
+
// retrieved uncompressed blobs. If there was an error while fetching one of
|
|
59
|
+
// blobs, sets its corresponding "blobs[i]" to empty and sets "statuses[i]" to
|
|
60
|
+
// a non-ok status.
|
|
61
|
+
//
|
|
62
|
+
// Note:
|
|
63
|
+
// - Offsets must be sorted in ascending order by caller.
|
|
64
|
+
// - For consistency, whether the blob is found in the cache or on disk, sets
|
|
65
|
+
// "*bytes_read" to the total size of on-disk (possibly compressed) blob
|
|
66
|
+
// records.
|
|
67
|
+
void MultiGetBlob(
|
|
68
|
+
const ReadOptions& read_options,
|
|
69
|
+
const autovector<std::reference_wrapper<const Slice>>& user_keys,
|
|
70
|
+
uint64_t file_number, uint64_t file_size,
|
|
71
|
+
const autovector<uint64_t>& offsets,
|
|
72
|
+
const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
|
|
73
|
+
autovector<PinnableSlice*>& blobs, uint64_t* bytes_read);
|
|
74
|
+
|
|
45
75
|
inline Status GetBlobFileReader(
|
|
46
76
|
uint64_t blob_file_number,
|
|
47
77
|
CacheHandleGuard<BlobFileReader>* blob_file_reader) {
|