@nxtedition/rocksdb 7.0.11 → 7.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/binding.cc +0 -1
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -0
  3. package/deps/rocksdb/rocksdb/Makefile +3 -0
  4. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +12 -7
  6. package/deps/rocksdb/rocksdb/cache/cache_key.h +2 -0
  7. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +18 -6
  8. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +13 -5
  9. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +89 -0
  10. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -28
  11. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +147 -2
  12. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +30 -0
  13. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +403 -30
  14. package/deps/rocksdb/rocksdb/db/c.cc +159 -5
  15. package/deps/rocksdb/rocksdb/db/c_test.c +108 -0
  16. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -1
  17. package/deps/rocksdb/rocksdb/db/column_family.h +7 -5
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +22 -0
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -0
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +6 -3
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +15 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +35 -2
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +55 -0
  24. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +21 -19
  25. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +60 -1
  26. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +247 -6
  27. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +10 -0
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -33
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -2
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -15
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -3
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -4
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +9 -0
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
  35. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -0
  36. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +54 -0
  37. package/deps/rocksdb/rocksdb/db/db_iter.cc +50 -2
  38. package/deps/rocksdb/rocksdb/db/db_iter.h +2 -0
  39. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +56 -25
  40. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -1
  41. package/deps/rocksdb/rocksdb/db/db_test.cc +9 -0
  42. package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -8
  43. package/deps/rocksdb/rocksdb/db/flush_job.h +1 -1
  44. package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
  45. package/deps/rocksdb/rocksdb/db/memtable.cc +103 -93
  46. package/deps/rocksdb/rocksdb/db/memtable.h +3 -3
  47. package/deps/rocksdb/rocksdb/db/merge_helper.cc +7 -2
  48. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  49. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -5
  50. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  51. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +213 -0
  52. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -7
  53. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +16 -0
  54. package/deps/rocksdb/rocksdb/db/write_batch.cc +154 -2
  55. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  56. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  57. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +8 -3
  58. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +21 -1
  59. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +31 -4
  60. package/deps/rocksdb/rocksdb/env/env_test.cc +2 -2
  61. package/deps/rocksdb/rocksdb/env/fs_remap.cc +4 -0
  62. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  63. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +17 -0
  64. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +136 -0
  65. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  66. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
  67. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  68. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +23 -23
  69. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  70. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +11 -0
  71. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +14 -0
  72. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +5 -0
  73. package/deps/rocksdb/rocksdb/options/cf_options.cc +7 -0
  74. package/deps/rocksdb/rocksdb/options/cf_options.h +19 -0
  75. package/deps/rocksdb/rocksdb/options/db_options.cc +1 -6
  76. package/deps/rocksdb/rocksdb/options/db_options.h +0 -1
  77. package/deps/rocksdb/rocksdb/options/options.cc +4 -1
  78. package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -0
  79. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
  80. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -4
  81. package/deps/rocksdb/rocksdb/port/win/env_win.cc +1 -1
  82. package/deps/rocksdb/rocksdb/src.mk +1 -0
  83. package/deps/rocksdb/rocksdb/table/block_based/block.cc +5 -3
  84. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +2 -2
  85. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +16 -9
  86. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -2
  87. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +1 -1
  88. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +13 -7
  89. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +7 -3
  90. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +4 -2
  91. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -17
  92. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +15 -9
  93. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +32 -16
  94. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +28 -18
  95. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +15 -6
  96. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +16 -7
  97. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -1
  98. package/deps/rocksdb/rocksdb/table/get_context.cc +27 -6
  99. package/deps/rocksdb/rocksdb/table/get_context.h +2 -0
  100. package/deps/rocksdb/rocksdb/table/table_test.cc +5 -5
  101. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +46 -0
  102. package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -1
  103. package/deps/rocksdb/rocksdb/util/mutexlock.h +1 -1
  104. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  105. package/index.js +2 -2
  106. package/iterator.js +1 -1
  107. package/package.json +1 -1
  108. package/prebuilds/linux-x64/node.napi.node +0 -0
package/binding.cc CHANGED
@@ -1395,7 +1395,6 @@ NAPI_METHOD(db_get_property) {
1395
1395
  return result;
1396
1396
  }
1397
1397
 
1398
-
1399
1398
  NAPI_METHOD(db_get_latest_sequence) {
1400
1399
  NAPI_ARGV(1);
1401
1400
 
@@ -1301,6 +1301,7 @@ if(WITH_TESTS)
1301
1301
  db/version_set_test.cc
1302
1302
  db/wal_manager_test.cc
1303
1303
  db/wal_edit_test.cc
1304
+ db/wide/db_wide_basic_test.cc
1304
1305
  db/wide/wide_column_serialization_test.cc
1305
1306
  db/write_batch_test.cc
1306
1307
  db/write_callback_test.cc
@@ -1383,6 +1383,9 @@ db_blob_compaction_test: $(OBJ_DIR)/db/blob/db_blob_compaction_test.o $(TEST_LIB
1383
1383
  db_readonly_with_timestamp_test: $(OBJ_DIR)/db/db_readonly_with_timestamp_test.o $(TEST_LIBRARY) $(LIBRARY)
1384
1384
  $(AM_LINK)
1385
1385
 
1386
+ db_wide_basic_test: $(OBJ_DIR)/db/wide/db_wide_basic_test.o $(TEST_LIBRARY) $(LIBRARY)
1387
+ $(AM_LINK)
1388
+
1386
1389
  db_with_timestamp_basic_test: $(OBJ_DIR)/db/db_with_timestamp_basic_test.o $(TEST_LIBRARY) $(LIBRARY)
1387
1390
  $(AM_LINK)
1388
1391
 
@@ -5240,6 +5240,12 @@ cpp_unittest_wrapper(name="db_wal_test",
5240
5240
  extra_compiler_flags=[])
5241
5241
 
5242
5242
 
5243
+ cpp_unittest_wrapper(name="db_wide_basic_test",
5244
+ srcs=["db/wide/db_wide_basic_test.cc"],
5245
+ deps=[":rocksdb_test_lib"],
5246
+ extra_compiler_flags=[])
5247
+
5248
+
5243
5249
  cpp_unittest_wrapper(name="db_with_timestamp_basic_test",
5244
5250
  srcs=["db/db_with_timestamp_basic_test.cc"],
5245
5251
  deps=[":rocksdb_test_lib"],
@@ -3,6 +3,7 @@
3
3
  // COPYING file in the root directory) and Apache 2.0 License
4
4
  // (found in the LICENSE.Apache file in the root directory).
5
5
 
6
+ #include "cache_key.h"
6
7
  #ifdef GFLAGS
7
8
  #include <cinttypes>
8
9
  #include <cstddef>
@@ -214,7 +215,8 @@ struct KeyGen {
214
215
  EncodeFixed64(key_data + 10, key);
215
216
  key_data[18] = char{4};
216
217
  EncodeFixed64(key_data + 19, key);
217
- return Slice(&key_data[off], sizeof(key_data) - off);
218
+ assert(27 >= kCacheKeySize);
219
+ return Slice(&key_data[off], kCacheKeySize);
218
220
  }
219
221
  };
220
222
 
@@ -321,8 +323,9 @@ class CacheBench {
321
323
  Random64 rnd(1);
322
324
  KeyGen keygen;
323
325
  for (uint64_t i = 0; i < 2 * FLAGS_cache_size; i += FLAGS_value_bytes) {
324
- cache_->Insert(keygen.GetRand(rnd, max_key_, max_log_), createValue(rnd),
325
- &helper1, FLAGS_value_bytes);
326
+ Status s = cache_->Insert(keygen.GetRand(rnd, max_key_, max_log_),
327
+ createValue(rnd), &helper1, FLAGS_value_bytes);
328
+ assert(s.ok());
326
329
  }
327
330
  }
328
331
 
@@ -542,8 +545,9 @@ class CacheBench {
542
545
  FLAGS_value_bytes);
543
546
  } else {
544
547
  // do insert
545
- cache_->Insert(key, createValue(thread->rnd), &helper2,
546
- FLAGS_value_bytes, &handle);
548
+ Status s = cache_->Insert(key, createValue(thread->rnd), &helper2,
549
+ FLAGS_value_bytes, &handle);
550
+ assert(s.ok());
547
551
  }
548
552
  } else if (random_op < insert_threshold_) {
549
553
  if (handle) {
@@ -551,8 +555,9 @@ class CacheBench {
551
555
  handle = nullptr;
552
556
  }
553
557
  // do insert
554
- cache_->Insert(key, createValue(thread->rnd), &helper3,
555
- FLAGS_value_bytes, &handle);
558
+ Status s = cache_->Insert(key, createValue(thread->rnd), &helper3,
559
+ FLAGS_value_bytes, &handle);
560
+ assert(s.ok());
556
561
  } else if (random_op < lookup_threshold_) {
557
562
  if (handle) {
558
563
  cache_->Release(handle);
@@ -65,6 +65,8 @@ class CacheKey {
65
65
  uint64_t offset_etc64_;
66
66
  };
67
67
 
68
+ constexpr uint8_t kCacheKeySize = static_cast<uint8_t>(sizeof(CacheKey));
69
+
68
70
  // A file-specific generator of cache keys, sometimes referred to as the
69
71
  // "base" cache key for a file because all the cache keys for various offsets
70
72
  // within the file are computed using simple arithmetic. The basis for the
@@ -192,8 +192,7 @@ LRUCacheShard::LRUCacheShard(size_t capacity, size_t estimated_value_size,
192
192
  : capacity_(capacity),
193
193
  strict_capacity_limit_(strict_capacity_limit),
194
194
  table_(
195
- CalcHashBits(capacity, estimated_value_size, metadata_charge_policy) +
196
- static_cast<uint8_t>(ceil(log2(1.0 / kLoadFactor)))),
195
+ CalcHashBits(capacity, estimated_value_size, metadata_charge_policy)),
197
196
  usage_(0),
198
197
  lru_usage_(0) {
199
198
  set_metadata_charge_policy(metadata_charge_policy);
@@ -295,16 +294,29 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
295
294
  }
296
295
  }
297
296
 
298
- uint8_t LRUCacheShard::CalcHashBits(
299
- size_t capacity, size_t estimated_value_size,
297
+ size_t LRUCacheShard::CalcEstimatedHandleCharge(
298
+ size_t estimated_value_size,
300
299
  CacheMetadataChargePolicy metadata_charge_policy) {
301
300
  LRUHandle h;
302
301
  h.CalcTotalCharge(estimated_value_size, metadata_charge_policy);
303
- size_t num_entries = capacity / h.total_charge;
302
+ return h.total_charge;
303
+ }
304
+
305
+ uint8_t LRUCacheShard::CalcHashBits(
306
+ size_t capacity, size_t estimated_value_size,
307
+ CacheMetadataChargePolicy metadata_charge_policy) {
308
+ size_t handle_charge =
309
+ CalcEstimatedHandleCharge(estimated_value_size, metadata_charge_policy);
310
+ size_t num_entries =
311
+ static_cast<size_t>(capacity / (kLoadFactor * handle_charge));
312
+
313
+ // Compute the ceiling of log2(num_entries). If num_entries == 0, return 0.
304
314
  uint8_t num_hash_bits = 0;
305
- while (num_entries >>= 1) {
315
+ size_t num_entries_copy = num_entries;
316
+ while (num_entries_copy >>= 1) {
306
317
  ++num_hash_bits;
307
318
  }
319
+ num_hash_bits += size_t{1} << num_hash_bits < num_entries ? 1 : 0;
308
320
  return num_hash_bits;
309
321
  }
310
322
 
@@ -22,10 +22,14 @@
22
22
  #include "util/distributed_mutex.h"
23
23
 
24
24
  namespace ROCKSDB_NAMESPACE {
25
+
25
26
  namespace fast_lru_cache {
26
27
 
27
- // LRU cache implementation using an open-address hash table.
28
+ // Forward declaration of friend class.
29
+ class FastLRUCacheTest;
28
30
 
31
+ // LRU cache implementation using an open-address hash table.
32
+ //
29
33
  // Every slot in the hash table is an LRUHandle. Because handles can be
30
34
  // referenced externally, we can't discard them immediately once they are
31
35
  // deleted (via a delete or an LRU eviction) or replaced by a new version
@@ -51,7 +55,7 @@ namespace fast_lru_cache {
51
55
  // - Not R --> R: When an unreferenced element becomes referenced. This can only
52
56
  // happen if the element is V, since references to an element can only be
53
57
  // created when it's visible.
54
-
58
+ //
55
59
  // Internally, the cache uses an open-addressed hash table to index the handles.
56
60
  // We use tombstone counters to keep track of displacements.
57
61
  // Because of the tombstones and the two possible visibility states of an
@@ -70,9 +74,6 @@ namespace fast_lru_cache {
70
74
  // slot. In any case, the slot becomes available. When a handle is inserted
71
75
  // into that slot, it becomes a visible element again.
72
76
 
73
- constexpr uint8_t kCacheKeySize =
74
- static_cast<uint8_t>(sizeof(ROCKSDB_NAMESPACE::CacheKey));
75
-
76
77
  // The load factor p is a real number in (0, 1) such that at all
77
78
  // times at most a fraction p of all slots, without counting tombstones,
78
79
  // are occupied by elements. This means that the probability that a
@@ -367,6 +368,8 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
367
368
 
368
369
  private:
369
370
  friend class LRUCache;
371
+ friend class FastLRUCacheTest;
372
+
370
373
  void LRU_Remove(LRUHandle* e);
371
374
  void LRU_Insert(LRUHandle* e);
372
375
 
@@ -376,6 +379,11 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
376
379
  // holding the mutex_.
377
380
  void EvictFromLRU(size_t charge, autovector<LRUHandle>* deleted);
378
381
 
382
+ // Returns the charge of a single handle.
383
+ static size_t CalcEstimatedHandleCharge(
384
+ size_t estimated_value_size,
385
+ CacheMetadataChargePolicy metadata_charge_policy);
386
+
379
387
  // Returns the number of bits used to hash an element in the hash
380
388
  // table.
381
389
  static uint8_t CalcHashBits(size_t capacity, size_t estimated_value_size,
@@ -206,6 +206,7 @@ TEST_F(LRUCacheTest, EntriesWithPriority) {
206
206
  ValidateLRUList({"e", "f", "g", "Z", "d"}, 2);
207
207
  }
208
208
 
209
+ namespace fast_lru_cache {
209
210
  // TODO(guido) Consolidate the following FastLRUCache tests with
210
211
  // that of LRUCache.
211
212
  class FastLRUCacheTest : public testing::Test {
@@ -238,6 +239,38 @@ class FastLRUCacheTest : public testing::Test {
238
239
 
239
240
  Status Insert(char key, size_t len) { return Insert(std::string(len, key)); }
240
241
 
242
+ size_t CalcEstimatedHandleChargeWrapper(
243
+ size_t estimated_value_size,
244
+ CacheMetadataChargePolicy metadata_charge_policy) {
245
+ return fast_lru_cache::LRUCacheShard::CalcEstimatedHandleCharge(
246
+ estimated_value_size, metadata_charge_policy);
247
+ }
248
+
249
+ uint8_t CalcHashBitsWrapper(
250
+ size_t capacity, size_t estimated_value_size,
251
+ CacheMetadataChargePolicy metadata_charge_policy) {
252
+ return fast_lru_cache::LRUCacheShard::CalcHashBits(
253
+ capacity, estimated_value_size, metadata_charge_policy);
254
+ }
255
+
256
+ // Maximum number of items that a shard can hold.
257
+ double CalcMaxOccupancy(size_t capacity, size_t estimated_value_size,
258
+ CacheMetadataChargePolicy metadata_charge_policy) {
259
+ size_t handle_charge =
260
+ fast_lru_cache::LRUCacheShard::CalcEstimatedHandleCharge(
261
+ estimated_value_size, metadata_charge_policy);
262
+ return capacity / (fast_lru_cache::kLoadFactor * handle_charge);
263
+ }
264
+
265
+ bool TableSizeIsAppropriate(uint8_t hash_bits, double max_occupancy) {
266
+ if (hash_bits == 0) {
267
+ return max_occupancy <= 1;
268
+ } else {
269
+ return (1 << hash_bits >= max_occupancy) &&
270
+ (1 << (hash_bits - 1) <= max_occupancy);
271
+ }
272
+ }
273
+
241
274
  private:
242
275
  fast_lru_cache::LRUCacheShard* cache_ = nullptr;
243
276
  };
@@ -253,6 +286,62 @@ TEST_F(FastLRUCacheTest, ValidateKeySize) {
253
286
  EXPECT_NOK(Insert('f', 0));
254
287
  }
255
288
 
289
+ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
290
+ size_t capacity = 1024;
291
+ size_t estimated_value_size = 1;
292
+ CacheMetadataChargePolicy metadata_charge_policy = kDontChargeCacheMetadata;
293
+ double max_occupancy =
294
+ CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
295
+ uint8_t hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
296
+ metadata_charge_policy);
297
+ EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
298
+
299
+ capacity = 1024;
300
+ estimated_value_size = 1;
301
+ metadata_charge_policy = kFullChargeCacheMetadata;
302
+ max_occupancy =
303
+ CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
304
+ hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
305
+ metadata_charge_policy);
306
+ EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
307
+
308
+ // No elements fit in cache.
309
+ capacity = 0;
310
+ estimated_value_size = 1;
311
+ metadata_charge_policy = kDontChargeCacheMetadata;
312
+ hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
313
+ metadata_charge_policy);
314
+ EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
315
+
316
+ // Set the capacity just below a single handle. Because the load factor is <
317
+ // 100% at least one handle will fit in the table.
318
+ estimated_value_size = 1;
319
+ size_t handle_charge = CalcEstimatedHandleChargeWrapper(
320
+ 8192 /* estimated_value_size */, kDontChargeCacheMetadata);
321
+ capacity = handle_charge - 1;
322
+ // The load factor should be bounded away from 100%.
323
+ assert(static_cast<size_t>(capacity / fast_lru_cache::kLoadFactor) >
324
+ handle_charge);
325
+ metadata_charge_policy = kDontChargeCacheMetadata;
326
+ max_occupancy =
327
+ CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
328
+ hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
329
+ metadata_charge_policy);
330
+ EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
331
+
332
+ // Large capacity.
333
+ capacity = 31924172;
334
+ estimated_value_size = 321;
335
+ metadata_charge_policy = kFullChargeCacheMetadata;
336
+ max_occupancy =
337
+ CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
338
+ hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
339
+ metadata_charge_policy);
340
+ EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
341
+ }
342
+
343
+ } // namespace fast_lru_cache
344
+
256
345
  class TestSecondaryCache : public SecondaryCache {
257
346
  public:
258
347
  // Specifies what action to take on a lookup for a particular key
@@ -443,50 +443,40 @@ void BlobFileReader::MultiGetBlob(
443
443
  }
444
444
 
445
445
  assert(s.ok());
446
+
447
+ uint64_t total_bytes = 0;
446
448
  for (size_t i = 0; i < num_blobs; ++i) {
447
449
  auto& req = read_reqs[i];
450
+ const auto& record_slice = req.result;
451
+
448
452
  assert(statuses[i]);
449
- if (req.status.ok() && req.result.size() != req.len) {
453
+ if (req.status.ok() && record_slice.size() != req.len) {
450
454
  req.status = IOStatus::Corruption("Failed to read data from blob file");
451
455
  }
456
+
452
457
  *statuses[i] = req.status;
453
- }
458
+ if (!statuses[i]->ok()) {
459
+ continue;
460
+ }
454
461
 
455
- if (read_options.verify_checksums) {
456
- for (size_t i = 0; i < num_blobs; ++i) {
457
- assert(statuses[i]);
462
+ // Verify checksums if enabled
463
+ if (read_options.verify_checksums) {
464
+ *statuses[i] = VerifyBlob(record_slice, user_keys[i], value_sizes[i]);
458
465
  if (!statuses[i]->ok()) {
459
466
  continue;
460
467
  }
461
- const Slice& record_slice = read_reqs[i].result;
462
- s = VerifyBlob(record_slice, user_keys[i], value_sizes[i]);
463
- if (!s.ok()) {
464
- assert(statuses[i]);
465
- *statuses[i] = s;
466
- }
467
468
  }
468
- }
469
469
 
470
- for (size_t i = 0; i < num_blobs; ++i) {
471
- assert(statuses[i]);
472
- if (!statuses[i]->ok()) {
473
- continue;
474
- }
475
- const Slice& record_slice = read_reqs[i].result;
476
- const Slice value_slice(record_slice.data() + adjustments[i],
477
- value_sizes[i]);
478
- s = UncompressBlobIfNeeded(value_slice, compression_type_, clock_,
479
- statistics_, values[i]);
480
- if (!s.ok()) {
481
- *statuses[i] = s;
470
+ // Uncompress blob if needed
471
+ Slice value_slice(record_slice.data() + adjustments[i], value_sizes[i]);
472
+ *statuses[i] = UncompressBlobIfNeeded(value_slice, compression_type_,
473
+ clock_, statistics_, values[i]);
474
+ if (statuses[i]->ok()) {
475
+ total_bytes += record_slice.size();
482
476
  }
483
477
  }
484
478
 
485
479
  if (bytes_read) {
486
- uint64_t total_bytes = 0;
487
- for (const auto& req : read_reqs) {
488
- total_bytes += req.result.size();
489
- }
490
480
  *bytes_read = total_bytes;
491
481
  }
492
482
  }
@@ -9,7 +9,9 @@
9
9
  #include <string>
10
10
 
11
11
  #include "db/blob/blob_file_reader.h"
12
+ #include "db/blob/blob_log_format.h"
12
13
  #include "options/cf_options.h"
14
+ #include "table/multiget_context.h"
13
15
 
14
16
  namespace ROCKSDB_NAMESPACE {
15
17
 
@@ -98,9 +100,16 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
98
100
  Slice key = cache_key.AsSlice();
99
101
  s = GetBlobFromCache(key, &blob_entry);
100
102
  if (s.ok() && blob_entry.GetValue()) {
101
- assert(blob_entry.GetValue()->size() == value_size);
103
+ // For consistency, the size of on-disk (possibly compressed) blob record
104
+ // is assigned to bytes_read.
102
105
  if (bytes_read) {
103
- *bytes_read = value_size;
106
+ uint64_t adjustment =
107
+ read_options.verify_checksums
108
+ ? BlobLogRecord::CalculateAdjustmentForRecordHeader(
109
+ user_key.size())
110
+ : 0;
111
+ assert(offset >= adjustment);
112
+ *bytes_read = value_size + adjustment;
104
113
  }
105
114
  value->PinSelf(*blob_entry.GetValue());
106
115
  return s;
@@ -152,6 +161,142 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
152
161
  return s;
153
162
  }
154
163
 
164
+ void BlobSource::MultiGetBlob(
165
+ const ReadOptions& read_options,
166
+ const autovector<std::reference_wrapper<const Slice>>& user_keys,
167
+ uint64_t file_number, uint64_t file_size,
168
+ const autovector<uint64_t>& offsets,
169
+ const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
170
+ autovector<PinnableSlice*>& blobs, uint64_t* bytes_read) {
171
+ size_t num_blobs = user_keys.size();
172
+ assert(num_blobs > 0);
173
+ assert(num_blobs <= MultiGetContext::MAX_BATCH_SIZE);
174
+ assert(num_blobs == offsets.size());
175
+ assert(num_blobs == value_sizes.size());
176
+ assert(num_blobs == statuses.size());
177
+ assert(num_blobs == blobs.size());
178
+
179
+ #ifndef NDEBUG
180
+ for (size_t i = 0; i < offsets.size() - 1; ++i) {
181
+ assert(offsets[i] <= offsets[i + 1]);
182
+ }
183
+ #endif // !NDEBUG
184
+
185
+ using Mask = uint64_t;
186
+ Mask cache_hit_mask = 0;
187
+
188
+ Status s;
189
+ uint64_t total_bytes = 0;
190
+ const OffsetableCacheKey base_cache_key(db_id_, db_session_id_, file_number,
191
+ file_size);
192
+
193
+ if (blob_cache_) {
194
+ size_t cached_blob_count = 0;
195
+ for (size_t i = 0; i < num_blobs; ++i) {
196
+ CachableEntry<std::string> blob_entry;
197
+ const CacheKey cache_key = base_cache_key.WithOffset(offsets[i]);
198
+ const Slice key = cache_key.AsSlice();
199
+
200
+ s = GetBlobFromCache(key, &blob_entry);
201
+ if (s.ok() && blob_entry.GetValue()) {
202
+ assert(statuses[i]);
203
+ *statuses[i] = s;
204
+ blobs[i]->PinSelf(*blob_entry.GetValue());
205
+
206
+ // Update the counter for the number of valid blobs read from the cache.
207
+ ++cached_blob_count;
208
+ // For consistency, the size of each on-disk (possibly compressed) blob
209
+ // record is accumulated to total_bytes.
210
+ uint64_t adjustment =
211
+ read_options.verify_checksums
212
+ ? BlobLogRecord::CalculateAdjustmentForRecordHeader(
213
+ user_keys[i].get().size())
214
+ : 0;
215
+ assert(offsets[i] >= adjustment);
216
+ total_bytes += value_sizes[i] + adjustment;
217
+ cache_hit_mask |= (Mask{1} << i); // cache hit
218
+ }
219
+ }
220
+
221
+ // All blobs were read from the cache.
222
+ if (cached_blob_count == num_blobs) {
223
+ if (bytes_read) {
224
+ *bytes_read = total_bytes;
225
+ }
226
+ return;
227
+ }
228
+ }
229
+
230
+ const bool no_io = read_options.read_tier == kBlockCacheTier;
231
+ if (no_io) {
232
+ for (size_t i = 0; i < num_blobs; ++i) {
233
+ if (!(cache_hit_mask & (Mask{1} << i))) {
234
+ assert(statuses[i]);
235
+ *statuses[i] =
236
+ Status::Incomplete("Cannot read blob(s): no disk I/O allowed");
237
+ }
238
+ }
239
+ return;
240
+ }
241
+
242
+ {
243
+ // Find the rest of blobs from the file since I/O is allowed.
244
+ autovector<std::reference_wrapper<const Slice>> _user_keys;
245
+ autovector<uint64_t> _offsets;
246
+ autovector<uint64_t> _value_sizes;
247
+ autovector<Status*> _statuses;
248
+ autovector<PinnableSlice*> _blobs;
249
+ uint64_t _bytes_read = 0;
250
+
251
+ for (size_t i = 0; i < num_blobs; ++i) {
252
+ if (!(cache_hit_mask & (Mask{1} << i))) {
253
+ _user_keys.emplace_back(user_keys[i]);
254
+ _offsets.push_back(offsets[i]);
255
+ _value_sizes.push_back(value_sizes[i]);
256
+ _statuses.push_back(statuses[i]);
257
+ _blobs.push_back(blobs[i]);
258
+ }
259
+ }
260
+
261
+ CacheHandleGuard<BlobFileReader> blob_file_reader;
262
+ s = blob_file_cache_->GetBlobFileReader(file_number, &blob_file_reader);
263
+ if (!s.ok()) {
264
+ for (size_t i = 0; i < _blobs.size(); ++i) {
265
+ assert(_statuses[i]);
266
+ *_statuses[i] = s;
267
+ }
268
+ return;
269
+ }
270
+
271
+ assert(blob_file_reader.GetValue());
272
+
273
+ blob_file_reader.GetValue()->MultiGetBlob(read_options, _user_keys,
274
+ _offsets, _value_sizes, _statuses,
275
+ _blobs, &_bytes_read);
276
+
277
+ if (read_options.fill_cache) {
278
+ // If filling cache is allowed and a cache is configured, try to put
279
+ // the blob(s) to the cache.
280
+ for (size_t i = 0; i < _blobs.size(); ++i) {
281
+ if (_statuses[i]->ok()) {
282
+ CachableEntry<std::string> blob_entry;
283
+ const CacheKey cache_key = base_cache_key.WithOffset(_offsets[i]);
284
+ const Slice key = cache_key.AsSlice();
285
+ s = PutBlobIntoCache(key, &blob_entry, _blobs[i]);
286
+ if (!s.ok()) {
287
+ *_statuses[i] = s;
288
+ }
289
+ }
290
+ }
291
+ }
292
+
293
+ total_bytes += _bytes_read;
294
+ if (bytes_read) {
295
+ *bytes_read = total_bytes;
296
+ }
297
+ }
298
+ }
299
+
155
300
  bool BlobSource::TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
156
301
  uint64_t offset) const {
157
302
  const CacheKey cache_key = GetCacheKey(file_number, file_size, offset);
@@ -13,6 +13,7 @@
13
13
  #include "rocksdb/cache.h"
14
14
  #include "rocksdb/rocksdb_namespace.h"
15
15
  #include "table/block_based/cachable_entry.h"
16
+ #include "util/autovector.h"
16
17
 
17
18
  namespace ROCKSDB_NAMESPACE {
18
19
 
@@ -36,12 +37,41 @@ class BlobSource {
36
37
 
37
38
  ~BlobSource();
38
39
 
40
+ // Read a blob from the underlying cache or storage.
41
+ //
42
+ // If successful, returns ok and sets "*value" to the newly retrieved
43
+ // uncompressed blob. If there was an error while fetching the blob, sets
44
+ // "*value" to empty and returns a non-ok status.
45
+ //
46
+ // Note: For consistency, whether the blob is found in the cache or on disk,
47
+ // sets "*bytes_read" to the size of on-disk (possibly compressed) blob
48
+ // record.
39
49
  Status GetBlob(const ReadOptions& read_options, const Slice& user_key,
40
50
  uint64_t file_number, uint64_t offset, uint64_t file_size,
41
51
  uint64_t value_size, CompressionType compression_type,
42
52
  FilePrefetchBuffer* prefetch_buffer, PinnableSlice* value,
43
53
  uint64_t* bytes_read);
44
54
 
55
+ // Read multiple blobs from the underlying cache or storage.
56
+ //
57
+ // If successful, returns ok and sets the elements of blobs to the newly
58
+ // retrieved uncompressed blobs. If there was an error while fetching one of
59
+ // blobs, sets its corresponding "blobs[i]" to empty and sets "statuses[i]" to
60
+ // a non-ok status.
61
+ //
62
+ // Note:
63
+ // - Offsets must be sorted in ascending order by caller.
64
+ // - For consistency, whether the blob is found in the cache or on disk, sets
65
+ // "*bytes_read" to the total size of on-disk (possibly compressed) blob
66
+ // records.
67
+ void MultiGetBlob(
68
+ const ReadOptions& read_options,
69
+ const autovector<std::reference_wrapper<const Slice>>& user_keys,
70
+ uint64_t file_number, uint64_t file_size,
71
+ const autovector<uint64_t>& offsets,
72
+ const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
73
+ autovector<PinnableSlice*>& blobs, uint64_t* bytes_read);
74
+
45
75
  inline Status GetBlobFileReader(
46
76
  uint64_t blob_file_number,
47
77
  CacheHandleGuard<BlobFileReader>* blob_file_reader) {