@nxtedition/rocksdb 7.0.23 → 7.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/binding.cc +3 -1
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +5 -0
  3. package/deps/rocksdb/rocksdb/Makefile +6 -2
  4. package/deps/rocksdb/rocksdb/TARGETS +14 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +4 -1
  6. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +20 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +2 -2
  8. package/deps/rocksdb/rocksdb/cache/cache_test.cc +44 -31
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +491 -722
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.h +468 -2
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +51 -52
  13. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +28 -16
  14. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +12 -1
  15. package/deps/rocksdb/rocksdb/cache/lru_cache.h +1 -0
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +170 -36
  17. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +63 -36
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +4 -6
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +57 -38
  21. package/deps/rocksdb/rocksdb/db/blob/blob_read_request.h +58 -0
  22. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +164 -74
  23. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +42 -29
  24. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +419 -62
  25. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +208 -8
  26. package/deps/rocksdb/rocksdb/db/c.cc +68 -0
  27. package/deps/rocksdb/rocksdb/db/c_test.c +95 -2
  28. package/deps/rocksdb/rocksdb/db/column_family.cc +12 -3
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +92 -15
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +76 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +52 -1
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +30 -1
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +126 -0
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +203 -1584
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +93 -26
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +87 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +314 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +328 -0
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +4 -1
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +7 -3
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +174 -33
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +474 -7
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +825 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +46 -0
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +42 -0
  48. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +223 -0
  49. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +255 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +1253 -0
  51. package/deps/rocksdb/rocksdb/db/corruption_test.cc +32 -8
  52. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3 -1
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -8
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +376 -0
  55. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +103 -78
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +4 -6
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -3
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +21 -6
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +19 -1
  61. package/deps/rocksdb/rocksdb/db/db_iter.cc +91 -14
  62. package/deps/rocksdb/rocksdb/db/db_iter.h +5 -0
  63. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +33 -0
  64. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +79 -0
  65. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +2 -0
  66. package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
  67. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +5 -2
  68. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +185 -0
  69. package/deps/rocksdb/rocksdb/db/dbformat.cc +1 -4
  70. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -8
  71. package/deps/rocksdb/rocksdb/db/internal_stats.cc +71 -29
  72. package/deps/rocksdb/rocksdb/db/internal_stats.h +160 -5
  73. package/deps/rocksdb/rocksdb/db/log_reader.cc +29 -3
  74. package/deps/rocksdb/rocksdb/db/log_reader.h +12 -3
  75. package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -3
  76. package/deps/rocksdb/rocksdb/db/version_edit.cc +6 -0
  77. package/deps/rocksdb/rocksdb/db/version_set.cc +93 -129
  78. package/deps/rocksdb/rocksdb/db/version_set.h +4 -4
  79. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
  80. package/deps/rocksdb/rocksdb/db/version_set_test.cc +42 -35
  81. package/deps/rocksdb/rocksdb/db/write_batch.cc +10 -2
  82. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +10 -4
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -3
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -2
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +5 -1
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +140 -8
  89. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +12 -0
  90. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +46 -7
  91. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +7 -0
  92. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +27 -7
  93. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +8 -0
  94. package/deps/rocksdb/rocksdb/env/env_posix.cc +14 -0
  95. package/deps/rocksdb/rocksdb/env/env_test.cc +130 -1
  96. package/deps/rocksdb/rocksdb/env/fs_posix.cc +7 -1
  97. package/deps/rocksdb/rocksdb/env/io_posix.cc +18 -50
  98. package/deps/rocksdb/rocksdb/env/io_posix.h +53 -6
  99. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +8 -10
  100. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +3 -7
  101. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +239 -259
  102. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +84 -19
  103. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +24 -4
  104. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +1 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +31 -1
  106. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +11 -7
  107. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +2 -0
  108. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +14 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +20 -0
  110. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +37 -13
  111. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +7 -0
  112. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +14 -0
  113. package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +9 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +13 -13
  115. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -2
  116. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +38 -0
  117. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +7 -1
  118. package/deps/rocksdb/rocksdb/port/win/env_win.cc +17 -0
  119. package/deps/rocksdb/rocksdb/port/win/env_win.h +8 -0
  120. package/deps/rocksdb/rocksdb/port/win/io_win.cc +6 -3
  121. package/deps/rocksdb/rocksdb/src.mk +5 -0
  122. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -2
  123. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
  124. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +5 -2
  125. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
  126. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +15 -12
  127. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +5 -4
  128. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +2 -1
  129. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1 -1
  130. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
  131. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -2
  132. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -0
  133. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -2
  134. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +24 -4
  135. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
  136. package/deps/rocksdb/rocksdb/util/compression.h +2 -0
  137. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +18 -1
  138. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +67 -4
  139. package/deps/rocksdb/rocksdb/util/threadpool_imp.h +8 -0
  140. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +15 -12
  141. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -2
  142. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +1 -1
  143. package/deps/rocksdb/rocksdb.gyp +5 -1
  144. package/package.json +1 -1
  145. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  146. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -78,8 +78,9 @@ class FastLRUCacheTest;
78
78
  // times at most a fraction p of all slots, without counting tombstones,
79
79
  // are occupied by elements. This means that the probability that a
80
80
  // random probe hits an empty slot is at most p, and thus at most 1/p probes
81
- // are required on average. We use p = 70%, so between 1 and 2 probes are
82
- // needed on average.
81
+ // are required on average. For example, p = 70% implies that between 1 and 2
82
+ // probes are needed on average (bear in mind that this reasoning doesn't
83
+ // consider the effects of clustering over time).
83
84
  // Because the size of the hash table is always rounded up to the next
84
85
  // power of 2, p is really an upper bound on the actual load factor---the
85
86
  // actual load factor is anywhere between p/2 and p. This is a bit wasteful,
@@ -87,7 +88,12 @@ class FastLRUCacheTest;
87
88
  // Since space cost is dominated by the values (the LSM blocks),
88
89
  // overprovisioning the table with metadata only increases the total cache space
89
90
  // usage by a tiny fraction.
90
- constexpr double kLoadFactor = 0.7;
91
+ constexpr double kLoadFactor = 0.35;
92
+
93
+ // The user can exceed kLoadFactor if the sizes of the inserted values don't
94
+ // match estimated_value_size, or if strict_capacity_limit == false. To
95
+ // avoid performance to plunge, we set a strict upper bound on the load factor.
96
+ constexpr double kStrictLoadFactor = 0.7;
91
97
 
92
98
  // Arbitrary seeds.
93
99
  constexpr uint32_t kProbingSeed1 = 0xbc9f1d34;
@@ -103,7 +109,7 @@ struct LRUHandle {
103
109
  size_t total_charge; // TODO(opt): Only allow uint32_t?
104
110
  // The hash of key(). Used for fast sharding and comparisons.
105
111
  uint32_t hash;
106
- // The number of external refs to this entry. The cache itself is not counted.
112
+ // The number of external refs to this entry.
107
113
  uint32_t refs;
108
114
 
109
115
  enum Flags : uint8_t {
@@ -226,16 +232,10 @@ struct LRUHandle {
226
232
  }
227
233
  };
228
234
 
229
- // TODO(Guido) Update the following comment.
230
235
 
231
- // We provide our own simple hash table since it removes a whole bunch
232
- // of porting hacks and is also faster than some of the built-in hash
233
- // table implementations in some of the compiler/runtime combinations
234
- // we have tested. E.g., readrandom speeds up by ~5% over the g++
235
- // 4.4.3's builtin hashtable.
236
236
  class LRUHandleTable {
237
237
  public:
238
- explicit LRUHandleTable(uint8_t hash_bits);
238
+ explicit LRUHandleTable(int hash_bits);
239
239
  ~LRUHandleTable();
240
240
 
241
241
  // Returns a pointer to a visible element matching the key/hash, or
@@ -269,10 +269,17 @@ class LRUHandleTable {
269
269
  }
270
270
  }
271
271
 
272
- uint8_t GetLengthBits() const { return length_bits_; }
272
+ uint32_t GetTableSize() const { return uint32_t{1} << length_bits_; }
273
+
274
+ int GetLengthBits() const { return length_bits_; }
275
+
276
+ uint32_t GetOccupancyLimit() const { return occupancy_limit_; }
273
277
 
274
278
  uint32_t GetOccupancy() const { return occupancy_; }
275
279
 
280
+ // Returns x mod 2^{length_bits_}.
281
+ uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; }
282
+
276
283
  private:
277
284
  int FindVisibleElement(const Slice& key, uint32_t hash, int& probe,
278
285
  int displacement);
@@ -295,11 +302,16 @@ class LRUHandleTable {
295
302
 
296
303
  // Number of hash bits used for table index.
297
304
  // The size of the table is 1 << length_bits_.
298
- uint8_t length_bits_;
305
+ int length_bits_;
306
+
307
+ const uint32_t length_bits_mask_;
299
308
 
300
309
  // Number of elements in the table.
301
310
  uint32_t occupancy_;
302
311
 
312
+ // Maximum number of elements the user can store in the table.
313
+ uint32_t occupancy_limit_;
314
+
303
315
  std::unique_ptr<LRUHandle[]> array_;
304
316
  };
305
317
 
@@ -374,7 +386,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
374
386
  void LRU_Insert(LRUHandle* e);
375
387
 
376
388
  // Free some space following strict LRU policy until enough space
377
- // to hold (usage_ + charge) is freed or the lru list is empty
389
+ // to hold (usage_ + charge) is freed or the LRU list is empty
378
390
  // This function is not thread safe - it needs to be executed while
379
391
  // holding the mutex_.
380
392
  void EvictFromLRU(size_t charge, autovector<LRUHandle>* deleted);
@@ -386,8 +398,8 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
386
398
 
387
399
  // Returns the number of bits used to hash an element in the hash
388
400
  // table.
389
- static uint8_t CalcHashBits(size_t capacity, size_t estimated_value_size,
390
- CacheMetadataChargePolicy metadata_charge_policy);
401
+ static int CalcHashBits(size_t capacity, size_t estimated_value_size,
402
+ CacheMetadataChargePolicy metadata_charge_policy);
391
403
 
392
404
  // Initialized before use.
393
405
  size_t capacity_;
@@ -332,7 +332,7 @@ Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
332
332
  delete[] reinterpret_cast<char*>(e);
333
333
  *handle = nullptr;
334
334
  }
335
- s = Status::Incomplete("Insert failed due to LRU cache being full.");
335
+ s = Status::MemoryLimit("Insert failed due to LRU cache being full.");
336
336
  }
337
337
  } else {
338
338
  // Insert into the cache. Note that the cache might get larger than its
@@ -757,6 +757,17 @@ void LRUCache::WaitAll(std::vector<Handle*>& handles) {
757
757
  }
758
758
  }
759
759
 
760
+ std::string LRUCache::GetPrintableOptions() const {
761
+ std::string ret;
762
+ ret.reserve(20000);
763
+ ret.append(ShardedCache::GetPrintableOptions());
764
+ if (secondary_cache_) {
765
+ ret.append(" secondary_cache:\n");
766
+ ret.append(secondary_cache_->GetPrintableOptions());
767
+ }
768
+ return ret;
769
+ }
770
+
760
771
  } // namespace lru_cache
761
772
 
762
773
  std::shared_ptr<Cache> NewLRUCache(
@@ -482,6 +482,7 @@ class LRUCache
482
482
  virtual DeleterFn GetDeleter(Handle* handle) const override;
483
483
  virtual void DisownData() override;
484
484
  virtual void WaitAll(std::vector<Handle*>& handles) override;
485
+ std::string GetPrintableOptions() const override;
485
486
 
486
487
  // Retrieves number of elements in LRU, for unit test purpose only.
487
488
  size_t TEST_GetLRUSize();
@@ -9,6 +9,7 @@
9
9
  #include <vector>
10
10
 
11
11
  #include "cache/cache_key.h"
12
+ #include "cache/clock_cache.h"
12
13
  #include "cache/fast_lru_cache.h"
13
14
  #include "db/db_test_util.h"
14
15
  #include "file/sst_file_manager_impl.h"
@@ -207,8 +208,8 @@ TEST_F(LRUCacheTest, EntriesWithPriority) {
207
208
  }
208
209
 
209
210
  namespace fast_lru_cache {
210
- // TODO(guido) Consolidate the following FastLRUCache tests with
211
- // that of LRUCache.
211
+
212
+ // TODO(guido) Replicate LRU policy tests from LRUCache here.
212
213
  class FastLRUCacheTest : public testing::Test {
213
214
  public:
214
215
  FastLRUCacheTest() {}
@@ -246,9 +247,8 @@ class FastLRUCacheTest : public testing::Test {
246
247
  estimated_value_size, metadata_charge_policy);
247
248
  }
248
249
 
249
- uint8_t CalcHashBitsWrapper(
250
- size_t capacity, size_t estimated_value_size,
251
- CacheMetadataChargePolicy metadata_charge_policy) {
250
+ int CalcHashBitsWrapper(size_t capacity, size_t estimated_value_size,
251
+ CacheMetadataChargePolicy metadata_charge_policy) {
252
252
  return fast_lru_cache::LRUCacheShard::CalcHashBits(
253
253
  capacity, estimated_value_size, metadata_charge_policy);
254
254
  }
@@ -262,7 +262,7 @@ class FastLRUCacheTest : public testing::Test {
262
262
  return capacity / (fast_lru_cache::kLoadFactor * handle_charge);
263
263
  }
264
264
 
265
- bool TableSizeIsAppropriate(uint8_t hash_bits, double max_occupancy) {
265
+ bool TableSizeIsAppropriate(int hash_bits, double max_occupancy) {
266
266
  if (hash_bits == 0) {
267
267
  return max_occupancy <= 1;
268
268
  } else {
@@ -287,51 +287,63 @@ TEST_F(FastLRUCacheTest, ValidateKeySize) {
287
287
  }
288
288
 
289
289
  TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
290
- size_t capacity = 1024;
291
- size_t estimated_value_size = 1;
292
- CacheMetadataChargePolicy metadata_charge_policy = kDontChargeCacheMetadata;
293
- double max_occupancy =
294
- CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
295
- uint8_t hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
296
- metadata_charge_policy);
297
- EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
290
+ size_t capacity;
291
+ size_t estimated_value_size;
292
+ double max_occupancy;
293
+ int hash_bits;
294
+ CacheMetadataChargePolicy metadata_charge_policy;
295
+
296
+ // Vary the cache capacity, fix the element charge.
297
+ for (int i = 0; i < 2048; i++) {
298
+ capacity = i;
299
+ estimated_value_size = 0;
300
+ metadata_charge_policy = kFullChargeCacheMetadata;
301
+ max_occupancy = CalcMaxOccupancy(capacity, estimated_value_size,
302
+ metadata_charge_policy);
303
+ hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
304
+ metadata_charge_policy);
305
+ EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
306
+ }
298
307
 
299
- capacity = 1024;
308
+ // Fix the cache capacity, vary the element charge.
309
+ for (int i = 0; i < 1024; i++) {
310
+ capacity = 1024;
311
+ estimated_value_size = i;
312
+ metadata_charge_policy = kFullChargeCacheMetadata;
313
+ max_occupancy = CalcMaxOccupancy(capacity, estimated_value_size,
314
+ metadata_charge_policy);
315
+ hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
316
+ metadata_charge_policy);
317
+ EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
318
+ }
319
+
320
+ // Zero-capacity cache, and only values have charge.
321
+ capacity = 0;
300
322
  estimated_value_size = 1;
301
- metadata_charge_policy = kFullChargeCacheMetadata;
302
- max_occupancy =
303
- CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
323
+ metadata_charge_policy = kDontChargeCacheMetadata;
304
324
  hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
305
325
  metadata_charge_policy);
306
- EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
326
+ EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
307
327
 
308
- // No elements fit in cache.
328
+ // Zero-capacity cache, and only metadata has charge.
309
329
  capacity = 0;
310
- estimated_value_size = 1;
311
- metadata_charge_policy = kDontChargeCacheMetadata;
330
+ estimated_value_size = 0;
331
+ metadata_charge_policy = kFullChargeCacheMetadata;
312
332
  hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
313
333
  metadata_charge_policy);
314
334
  EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
315
335
 
316
- // Set the capacity just below a single handle. Because the load factor is <
317
- // 100% at least one handle will fit in the table.
318
- estimated_value_size = 1;
319
- size_t handle_charge = CalcEstimatedHandleChargeWrapper(
320
- 8192 /* estimated_value_size */, kDontChargeCacheMetadata);
321
- capacity = handle_charge - 1;
322
- // The load factor should be bounded away from 100%.
323
- assert(static_cast<size_t>(capacity / fast_lru_cache::kLoadFactor) >
324
- handle_charge);
325
- metadata_charge_policy = kDontChargeCacheMetadata;
326
- max_occupancy =
327
- CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
336
+ // Small cache, large elements.
337
+ capacity = 1024;
338
+ estimated_value_size = 8192;
339
+ metadata_charge_policy = kFullChargeCacheMetadata;
328
340
  hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
329
341
  metadata_charge_policy);
330
- EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
342
+ EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
331
343
 
332
344
  // Large capacity.
333
345
  capacity = 31924172;
334
- estimated_value_size = 321;
346
+ estimated_value_size = 8192;
335
347
  metadata_charge_policy = kFullChargeCacheMetadata;
336
348
  max_occupancy =
337
349
  CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
@@ -342,6 +354,128 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
342
354
 
343
355
  } // namespace fast_lru_cache
344
356
 
357
+ namespace clock_cache {
358
+
359
+ class ClockCacheTest : public testing::Test {
360
+ public:
361
+ ClockCacheTest() {}
362
+ ~ClockCacheTest() override { DeleteShard(); }
363
+
364
+ void DeleteShard() {
365
+ if (shard_ != nullptr) {
366
+ shard_->~ClockCacheShard();
367
+ port::cacheline_aligned_free(shard_);
368
+ shard_ = nullptr;
369
+ }
370
+ }
371
+
372
+ void NewShard(size_t capacity) {
373
+ DeleteShard();
374
+ shard_ = reinterpret_cast<ClockCacheShard*>(
375
+ port::cacheline_aligned_alloc(sizeof(ClockCacheShard)));
376
+ new (shard_) ClockCacheShard(capacity, 1, true /*strict_capacity_limit*/,
377
+ kDontChargeCacheMetadata);
378
+ }
379
+
380
+ Status Insert(const std::string& key,
381
+ Cache::Priority priority = Cache::Priority::LOW) {
382
+ return shard_->Insert(key, 0 /*hash*/, nullptr /*value*/, 1 /*charge*/,
383
+ nullptr /*deleter*/, nullptr /*handle*/, priority);
384
+ }
385
+
386
+ Status Insert(char key, Cache::Priority priority = Cache::Priority::LOW) {
387
+ return Insert(std::string(kCacheKeySize, key), priority);
388
+ }
389
+
390
+ Status Insert(char key, size_t len) { return Insert(std::string(len, key)); }
391
+
392
+ bool Lookup(const std::string& key) {
393
+ auto handle = shard_->Lookup(key, 0 /*hash*/);
394
+ if (handle) {
395
+ shard_->Release(handle);
396
+ return true;
397
+ }
398
+ return false;
399
+ }
400
+
401
+ bool Lookup(char key) { return Lookup(std::string(kCacheKeySize, key)); }
402
+
403
+ void Erase(const std::string& key) { shard_->Erase(key, 0 /*hash*/); }
404
+
405
+ // void ValidateLRUList(std::vector<std::string> keys,
406
+ // size_t num_high_pri_pool_keys = 0) {
407
+ // LRUHandle* lru;
408
+ // LRUHandle* lru_low_pri;
409
+ // cache_->TEST_GetLRUList(&lru, &lru_low_pri);
410
+ // LRUHandle* iter = lru;
411
+ // bool in_high_pri_pool = false;
412
+ // size_t high_pri_pool_keys = 0;
413
+ // if (iter == lru_low_pri) {
414
+ // in_high_pri_pool = true;
415
+ // }
416
+ // for (const auto& key : keys) {
417
+ // iter = iter->next;
418
+ // ASSERT_NE(lru, iter);
419
+ // ASSERT_EQ(key, iter->key().ToString());
420
+ // ASSERT_EQ(in_high_pri_pool, iter->InHighPriPool());
421
+ // if (in_high_pri_pool) {
422
+ // high_pri_pool_keys++;
423
+ // }
424
+ // if (iter == lru_low_pri) {
425
+ // ASSERT_FALSE(in_high_pri_pool);
426
+ // in_high_pri_pool = true;
427
+ // }
428
+ // }
429
+ // ASSERT_EQ(lru, iter->next);
430
+ // ASSERT_TRUE(in_high_pri_pool);
431
+ // ASSERT_EQ(num_high_pri_pool_keys, high_pri_pool_keys);
432
+ // }
433
+
434
+ private:
435
+ clock_cache::ClockCacheShard* shard_ = nullptr;
436
+ };
437
+
438
+ TEST_F(ClockCacheTest, Validate) {
439
+ NewShard(3);
440
+ EXPECT_OK(Insert('a', 16));
441
+ EXPECT_NOK(Insert('b', 15));
442
+ EXPECT_OK(Insert('b', 16));
443
+ EXPECT_NOK(Insert('c', 17));
444
+ EXPECT_NOK(Insert('d', 1000));
445
+ EXPECT_NOK(Insert('e', 11));
446
+ EXPECT_NOK(Insert('f', 0));
447
+ }
448
+
449
+ TEST_F(ClockCacheTest, ClockPriorityTest) {
450
+ clock_cache::ClockHandle handle;
451
+ EXPECT_EQ(handle.GetClockPriority(),
452
+ clock_cache::ClockHandle::ClockPriority::NONE);
453
+ handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::HIGH);
454
+ EXPECT_EQ(handle.GetClockPriority(),
455
+ clock_cache::ClockHandle::ClockPriority::HIGH);
456
+ handle.DecreaseClockPriority();
457
+ EXPECT_EQ(handle.GetClockPriority(),
458
+ clock_cache::ClockHandle::ClockPriority::MEDIUM);
459
+ handle.DecreaseClockPriority();
460
+ EXPECT_EQ(handle.GetClockPriority(),
461
+ clock_cache::ClockHandle::ClockPriority::LOW);
462
+ handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::MEDIUM);
463
+ EXPECT_EQ(handle.GetClockPriority(),
464
+ clock_cache::ClockHandle::ClockPriority::MEDIUM);
465
+ handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::NONE);
466
+ EXPECT_EQ(handle.GetClockPriority(),
467
+ clock_cache::ClockHandle::ClockPriority::NONE);
468
+ handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::MEDIUM);
469
+ EXPECT_EQ(handle.GetClockPriority(),
470
+ clock_cache::ClockHandle::ClockPriority::MEDIUM);
471
+ handle.DecreaseClockPriority();
472
+ handle.DecreaseClockPriority();
473
+ EXPECT_EQ(handle.GetClockPriority(),
474
+ clock_cache::ClockHandle::ClockPriority::NONE);
475
+ }
476
+
477
+ } // namespace clock_cache
478
+
345
479
  class TestSecondaryCache : public SecondaryCache {
346
480
  public:
347
481
  // Specifies what action to take on a lookup for a particular key
@@ -254,7 +254,7 @@ TEST_F(BlobFileCacheTest, GetBlobFileReader_CacheFull) {
254
254
  CacheHandleGuard<BlobFileReader> reader;
255
255
 
256
256
  ASSERT_TRUE(blob_file_cache.GetBlobFileReader(blob_file_number, &reader)
257
- .IsIncomplete());
257
+ .IsMemoryLimit());
258
258
  ASSERT_EQ(reader.GetValue(), nullptr);
259
259
  ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
260
260
  ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 1);
@@ -16,6 +16,7 @@
16
16
  #include "rocksdb/file_system.h"
17
17
  #include "rocksdb/slice.h"
18
18
  #include "rocksdb/status.h"
19
+ #include "table/multiget_context.h"
19
20
  #include "test_util/sync_point.h"
20
21
  #include "util/compression.h"
21
22
  #include "util/crc32c.h"
@@ -335,7 +336,9 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
335
336
 
336
337
  if (!prefetched) {
337
338
  TEST_SYNC_POINT("BlobFileReader::GetBlob:ReadFromFile");
338
-
339
+ PERF_COUNTER_ADD(blob_read_count, 1);
340
+ PERF_COUNTER_ADD(blob_read_byte, record_size);
341
+ PERF_TIMER_GUARD(blob_read_time);
339
342
  const Status s = ReadFromFile(file_reader_.get(), record_offset,
340
343
  static_cast<size_t>(record_size), statistics_,
341
344
  &record_slice, &buf, &aligned_buf,
@@ -372,40 +375,50 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
372
375
  return Status::OK();
373
376
  }
374
377
 
375
- void BlobFileReader::MultiGetBlob(
376
- const ReadOptions& read_options,
377
- const autovector<std::reference_wrapper<const Slice>>& user_keys,
378
- const autovector<uint64_t>& offsets,
379
- const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
380
- autovector<PinnableSlice*>& values, uint64_t* bytes_read) const {
381
- const size_t num_blobs = user_keys.size();
378
+ void BlobFileReader::MultiGetBlob(const ReadOptions& read_options,
379
+ autovector<BlobReadRequest*>& blob_reqs,
380
+ uint64_t* bytes_read) const {
381
+ const size_t num_blobs = blob_reqs.size();
382
382
  assert(num_blobs > 0);
383
- assert(num_blobs == offsets.size());
384
- assert(num_blobs == value_sizes.size());
385
- assert(num_blobs == statuses.size());
386
- assert(num_blobs == values.size());
383
+ assert(num_blobs <= MultiGetContext::MAX_BATCH_SIZE);
387
384
 
388
385
  #ifndef NDEBUG
389
- for (size_t i = 0; i < offsets.size() - 1; ++i) {
390
- assert(offsets[i] <= offsets[i + 1]);
386
+ for (size_t i = 0; i < num_blobs - 1; ++i) {
387
+ assert(blob_reqs[i]->offset <= blob_reqs[i + 1]->offset);
391
388
  }
392
389
  #endif // !NDEBUG
393
390
 
394
- std::vector<FSReadRequest> read_reqs(num_blobs);
391
+ std::vector<FSReadRequest> read_reqs;
395
392
  autovector<uint64_t> adjustments;
396
393
  uint64_t total_len = 0;
394
+ read_reqs.reserve(num_blobs);
397
395
  for (size_t i = 0; i < num_blobs; ++i) {
398
- const size_t key_size = user_keys[i].get().size();
399
- assert(IsValidBlobOffset(offsets[i], key_size, value_sizes[i], file_size_));
396
+ const size_t key_size = blob_reqs[i]->user_key->size();
397
+ const uint64_t offset = blob_reqs[i]->offset;
398
+ const uint64_t value_size = blob_reqs[i]->len;
399
+
400
+ if (!IsValidBlobOffset(offset, key_size, value_size, file_size_)) {
401
+ *blob_reqs[i]->status = Status::Corruption("Invalid blob offset");
402
+ continue;
403
+ }
404
+ if (blob_reqs[i]->compression != compression_type_) {
405
+ *blob_reqs[i]->status =
406
+ Status::Corruption("Compression type mismatch when reading a blob");
407
+ continue;
408
+ }
409
+
400
410
  const uint64_t adjustment =
401
411
  read_options.verify_checksums
402
412
  ? BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size)
403
413
  : 0;
404
- assert(offsets[i] >= adjustment);
414
+ assert(blob_reqs[i]->offset >= adjustment);
405
415
  adjustments.push_back(adjustment);
406
- read_reqs[i].offset = offsets[i] - adjustment;
407
- read_reqs[i].len = value_sizes[i] + adjustment;
408
- total_len += read_reqs[i].len;
416
+
417
+ FSReadRequest read_req;
418
+ read_req.offset = blob_reqs[i]->offset - adjustment;
419
+ read_req.len = blob_reqs[i]->len + adjustment;
420
+ read_reqs.emplace_back(read_req);
421
+ total_len += read_req.len;
409
422
  }
410
423
 
411
424
  RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, total_len);
@@ -428,6 +441,8 @@ void BlobFileReader::MultiGetBlob(
428
441
  }
429
442
  }
430
443
  TEST_SYNC_POINT("BlobFileReader::MultiGetBlob:ReadFromFile");
444
+ PERF_COUNTER_ADD(blob_read_count, num_blobs);
445
+ PERF_COUNTER_ADD(blob_read_byte, total_len);
431
446
  s = file_reader_->MultiRead(IOOptions(), read_reqs.data(), read_reqs.size(),
432
447
  direct_io ? &aligned_buf : nullptr,
433
448
  read_options.rate_limiter_priority);
@@ -435,9 +450,12 @@ void BlobFileReader::MultiGetBlob(
435
450
  for (auto& req : read_reqs) {
436
451
  req.status.PermitUncheckedError();
437
452
  }
438
- for (size_t i = 0; i < num_blobs; ++i) {
439
- assert(statuses[i]);
440
- *statuses[i] = s;
453
+ for (auto& req : blob_reqs) {
454
+ assert(req->status);
455
+ if (!req->status->IsCorruption()) {
456
+ // Avoid overwriting corruption status.
457
+ *req->status = s;
458
+ }
441
459
  }
442
460
  return;
443
461
  }
@@ -445,33 +463,39 @@ void BlobFileReader::MultiGetBlob(
445
463
  assert(s.ok());
446
464
 
447
465
  uint64_t total_bytes = 0;
448
- for (size_t i = 0; i < num_blobs; ++i) {
449
- auto& req = read_reqs[i];
450
- const auto& record_slice = req.result;
466
+ for (size_t i = 0, j = 0; i < num_blobs; ++i) {
467
+ assert(blob_reqs[i]->status);
468
+ if (!blob_reqs[i]->status->ok()) {
469
+ continue;
470
+ }
451
471
 
452
- assert(statuses[i]);
472
+ assert(j < read_reqs.size());
473
+ auto& req = read_reqs[j++];
474
+ const auto& record_slice = req.result;
453
475
  if (req.status.ok() && record_slice.size() != req.len) {
454
476
  req.status = IOStatus::Corruption("Failed to read data from blob file");
455
477
  }
456
478
 
457
- *statuses[i] = req.status;
458
- if (!statuses[i]->ok()) {
479
+ *blob_reqs[i]->status = req.status;
480
+ if (!blob_reqs[i]->status->ok()) {
459
481
  continue;
460
482
  }
461
483
 
462
484
  // Verify checksums if enabled
463
485
  if (read_options.verify_checksums) {
464
- *statuses[i] = VerifyBlob(record_slice, user_keys[i], value_sizes[i]);
465
- if (!statuses[i]->ok()) {
486
+ *blob_reqs[i]->status =
487
+ VerifyBlob(record_slice, *blob_reqs[i]->user_key, blob_reqs[i]->len);
488
+ if (!blob_reqs[i]->status->ok()) {
466
489
  continue;
467
490
  }
468
491
  }
469
492
 
470
493
  // Uncompress blob if needed
471
- Slice value_slice(record_slice.data() + adjustments[i], value_sizes[i]);
472
- *statuses[i] = UncompressBlobIfNeeded(value_slice, compression_type_,
473
- clock_, statistics_, values[i]);
474
- if (statuses[i]->ok()) {
494
+ Slice value_slice(record_slice.data() + adjustments[i], blob_reqs[i]->len);
495
+ *blob_reqs[i]->status =
496
+ UncompressBlobIfNeeded(value_slice, compression_type_, clock_,
497
+ statistics_, blob_reqs[i]->result);
498
+ if (blob_reqs[i]->status->ok()) {
475
499
  total_bytes += record_slice.size();
476
500
  }
477
501
  }
@@ -483,6 +507,8 @@ void BlobFileReader::MultiGetBlob(
483
507
 
484
508
  Status BlobFileReader::VerifyBlob(const Slice& record_slice,
485
509
  const Slice& user_key, uint64_t value_size) {
510
+ PERF_TIMER_GUARD(blob_checksum_time);
511
+
486
512
  BlobLogRecord record;
487
513
 
488
514
  const Slice header_slice(record_slice.data(), BlobLogRecord::kHeaderSize);
@@ -547,6 +573,7 @@ Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
547
573
  CacheAllocationPtr output;
548
574
 
549
575
  {
576
+ PERF_TIMER_GUARD(blob_decompress_time);
550
577
  StopWatch stop_watch(clock, statistics, BLOB_DB_DECOMPRESSION_MICROS);
551
578
  output = UncompressData(info, value_slice.data(), value_slice.size(),
552
579
  &uncompressed_size, compression_format_version,
@@ -8,6 +8,7 @@
8
8
  #include <cinttypes>
9
9
  #include <memory>
10
10
 
11
+ #include "db/blob/blob_read_request.h"
11
12
  #include "file/random_access_file_reader.h"
12
13
  #include "rocksdb/compression_type.h"
13
14
  #include "rocksdb/rocksdb_namespace.h"
@@ -47,12 +48,9 @@ class BlobFileReader {
47
48
  uint64_t* bytes_read) const;
48
49
 
49
50
  // offsets must be sorted in ascending order by caller.
50
- void MultiGetBlob(
51
- const ReadOptions& read_options,
52
- const autovector<std::reference_wrapper<const Slice>>& user_keys,
53
- const autovector<uint64_t>& offsets,
54
- const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
55
- autovector<PinnableSlice*>& values, uint64_t* bytes_read) const;
51
+ void MultiGetBlob(const ReadOptions& read_options,
52
+ autovector<BlobReadRequest*>& blob_reqs,
53
+ uint64_t* bytes_read) const;
56
54
 
57
55
  CompressionType GetCompressionType() const { return compression_type_; }
58
56