@nxtedition/rocksdb 7.1.4 → 7.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/deps/rocksdb/iostats.patch +19 -0
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +15 -1
  3. package/deps/rocksdb/rocksdb/cache/cache_test.cc +93 -58
  4. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -40
  5. package/deps/rocksdb/rocksdb/cache/clock_cache.h +57 -32
  6. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +103 -28
  7. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +33 -1
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +177 -38
  9. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +3 -1
  10. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +2 -2
  11. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +125 -71
  12. package/deps/rocksdb/rocksdb/crash_test.mk +15 -1
  13. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +2 -2
  14. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +1 -1
  15. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +3 -5
  16. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +25 -19
  17. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  18. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +36 -0
  19. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -15
  20. package/deps/rocksdb/rocksdb/db/column_family_test.cc +17 -4
  21. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +8 -8
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +0 -7
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +50 -52
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +33 -11
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +41 -10
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1 -2
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +143 -2
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +43 -18
  30. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +48 -65
  31. package/deps/rocksdb/rocksdb/db/corruption_test.cc +1 -0
  32. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -4
  33. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +17 -8
  34. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +71 -2
  35. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -33
  36. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +18 -35
  37. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -5
  38. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +7 -7
  39. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +15 -8
  40. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +2 -1
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +3 -1
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -0
  43. package/deps/rocksdb/rocksdb/db/db_iter.cc +69 -11
  44. package/deps/rocksdb/rocksdb/db/db_iter.h +16 -0
  45. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +2 -1
  46. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  47. package/deps/rocksdb/rocksdb/db/db_test.cc +61 -28
  48. package/deps/rocksdb/rocksdb/db/db_test2.cc +18 -7
  49. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +17 -0
  50. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +61 -0
  51. package/deps/rocksdb/rocksdb/db/db_write_test.cc +130 -0
  52. package/deps/rocksdb/rocksdb/db/experimental.cc +7 -8
  53. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +1 -2
  54. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -7
  55. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -1
  56. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +4 -2
  57. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +1 -1
  58. package/deps/rocksdb/rocksdb/db/log_reader.cc +48 -11
  59. package/deps/rocksdb/rocksdb/db/log_reader.h +8 -2
  60. package/deps/rocksdb/rocksdb/db/log_test.cc +10 -1
  61. package/deps/rocksdb/rocksdb/db/log_writer.cc +7 -1
  62. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -4
  63. package/deps/rocksdb/rocksdb/db/memtable.cc +49 -14
  64. package/deps/rocksdb/rocksdb/db/memtable.h +60 -14
  65. package/deps/rocksdb/rocksdb/db/memtable_list.cc +14 -8
  66. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +30 -10
  67. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +5 -5
  68. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +5 -0
  69. package/deps/rocksdb/rocksdb/db/repair.cc +2 -3
  70. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -7
  71. package/deps/rocksdb/rocksdb/db/table_cache.cc +72 -0
  72. package/deps/rocksdb/rocksdb/db/table_cache.h +19 -1
  73. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +8 -14
  74. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +2 -2
  75. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +35 -64
  76. package/deps/rocksdb/rocksdb/db/version_edit.cc +3 -32
  77. package/deps/rocksdb/rocksdb/db/version_edit.h +2 -12
  78. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +10 -23
  79. package/deps/rocksdb/rocksdb/db/version_set.cc +34 -10
  80. package/deps/rocksdb/rocksdb/db/version_set.h +3 -3
  81. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -6
  82. package/deps/rocksdb/rocksdb/db/version_set_test.cc +17 -15
  83. package/deps/rocksdb/rocksdb/db/wal_manager.cc +0 -4
  84. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +2 -1
  85. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +137 -42
  86. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +21 -0
  87. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +1 -0
  88. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  89. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -4
  90. package/deps/rocksdb/rocksdb/db/write_thread.cc +51 -46
  91. package/deps/rocksdb/rocksdb/db/write_thread.h +0 -4
  92. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  93. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +6 -0
  94. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +6 -0
  95. package/deps/rocksdb/rocksdb/env/env_posix.cc +1 -1
  96. package/deps/rocksdb/rocksdb/env/env_test.cc +38 -8
  97. package/deps/rocksdb/rocksdb/env/file_system.cc +20 -0
  98. package/deps/rocksdb/rocksdb/env/fs_posix.cc +2 -46
  99. package/deps/rocksdb/rocksdb/env/io_posix.cc +1 -0
  100. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +110 -5
  101. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +7 -0
  102. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +14 -1
  103. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +4 -0
  104. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +7 -0
  106. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +10 -3
  107. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +3 -1
  108. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +1 -1
  109. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +2 -0
  110. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -0
  111. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +9 -13
  112. package/deps/rocksdb/rocksdb/logging/env_logger.h +39 -13
  113. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +1 -1
  114. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +1 -1
  115. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +6 -0
  116. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +4 -1
  117. package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -3
  118. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -5
  119. package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -1
  120. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
  121. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -2
  122. package/deps/rocksdb/rocksdb/port/util_logger.h +1 -3
  123. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +50 -8
  124. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -0
  125. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +7 -0
  126. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +28 -10
  127. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +1 -1
  128. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +5 -2
  129. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +1 -0
  130. package/deps/rocksdb/rocksdb/table/get_context.cc +16 -6
  131. package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
  132. package/deps/rocksdb/rocksdb/table/table_test.cc +2 -1
  133. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +14 -1
  134. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +5 -2
  135. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +7 -8
  136. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +6 -6
  137. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -1
  138. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +2 -0
  139. package/deps/rocksdb/rocksdb/util/stderr_logger.h +13 -0
  140. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +55 -46
  141. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +2 -1
  142. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +10 -0
  143. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -2
  144. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +2 -2
  145. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  146. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +2 -2
  147. package/index.js +2 -2
  148. package/package.json +1 -1
  149. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  150. package/prebuilds/linux-x64/node.napi.node +0 -0
  151. package/deps/rocksdb/rocksdb/logging/posix_logger.h +0 -179
@@ -9,6 +9,8 @@
9
9
 
10
10
  #pragma once
11
11
 
12
+ #include <sys/types.h>
13
+
12
14
  #include <array>
13
15
  #include <atomic>
14
16
  #include <cstdint>
@@ -28,6 +30,9 @@ namespace ROCKSDB_NAMESPACE {
28
30
 
29
31
  namespace clock_cache {
30
32
 
33
+ // Forward declaration of friend class.
34
+ class ClockCacheTest;
35
+
31
36
  // An experimental alternative to LRUCache, using a lock-free, open-addressed
32
37
  // hash table and clock eviction.
33
38
 
@@ -63,10 +68,10 @@ namespace clock_cache {
63
68
  // can't be immediately deleted. In these cases, the flag will be later read
64
69
  // and acted upon by the eviction algorithm. Importantly, WILL_BE_DELETED is
65
70
  // used not only to defer deletions, but also as a barrier for external
66
- // references: once WILL_BE_DELETED is set, lookups (which are the means to
67
- // acquire new external references) will ignore the handle. For this reason,
68
- // when WILL_BE_DELETED is set, we say the handle is invisible (and
69
- // otherwise, that it's visible).
71
+ // references: once WILL_BE_DELETED is set, lookups (which are the most
72
+ // common way to acquire new external references) will ignore the handle.
73
+ // For this reason, when WILL_BE_DELETED is set, we say the handle is
74
+ // invisible (and, otherwise, that it's visible).
70
75
  //
71
76
  //
72
77
  // 3. HASHING AND COLLISION RESOLUTION
@@ -192,10 +197,10 @@ struct ClockHandle {
192
197
  size_t total_charge;
193
198
  std::array<char, kCacheKeySize> key_data;
194
199
 
195
- static constexpr uint8_t kIsElementOffset = 1;
196
- static constexpr uint8_t kClockPriorityOffset = 2;
197
- static constexpr uint8_t kIsHitOffset = 4;
198
- static constexpr uint8_t kCachePriorityOffset = 5;
200
+ static constexpr uint8_t kIsElementOffset = 0;
201
+ static constexpr uint8_t kClockPriorityOffset = 1;
202
+ static constexpr uint8_t kIsHitOffset = 3;
203
+ static constexpr uint8_t kCachePriorityOffset = 4;
199
204
 
200
205
  enum Flags : uint8_t {
201
206
  // Whether the slot is in use by an element.
@@ -252,9 +257,8 @@ struct ClockHandle {
252
257
  // Whether a thread has an exclusive reference to the slot.
253
258
  EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset, // Bit 30
254
259
  // Whether the handle will be deleted soon. When this bit is set, new
255
- // internal
256
- // or external references to this handle stop being accepted.
257
- // There is an exception: external references can be created from
260
+ // internal references to this handle stop being accepted.
261
+ // External references may still be granted---they can be created from
258
262
  // existing external references, or converting from existing internal
259
263
  // references.
260
264
  WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset // Bit 31
@@ -274,6 +278,9 @@ struct ClockHandle {
274
278
 
275
279
  std::atomic<uint32_t> refs;
276
280
 
281
+ // True iff the handle is allocated separately from hash table.
282
+ bool detached;
283
+
277
284
  ClockHandle()
278
285
  : value(nullptr),
279
286
  deleter(nullptr),
@@ -281,7 +288,8 @@ struct ClockHandle {
281
288
  total_charge(0),
282
289
  flags(0),
283
290
  displacements(0),
284
- refs(0) {
291
+ refs(0),
292
+ detached(false) {
285
293
  SetWillBeDeleted(false);
286
294
  SetIsElement(false);
287
295
  SetClockPriority(ClockPriority::NONE);
@@ -300,6 +308,7 @@ struct ClockHandle {
300
308
  value = other.value;
301
309
  deleter = other.deleter;
302
310
  key_data = other.key_data;
311
+ hash = other.hash;
303
312
  total_charge = other.total_charge;
304
313
  }
305
314
 
@@ -350,13 +359,13 @@ struct ClockHandle {
350
359
 
351
360
  // flags functions.
352
361
 
353
- bool IsElement() const { return flags & IS_ELEMENT; }
362
+ bool IsElement() const { return flags & Flags::IS_ELEMENT; }
354
363
 
355
364
  void SetIsElement(bool is_element) {
356
365
  if (is_element) {
357
- flags |= IS_ELEMENT;
366
+ flags |= Flags::IS_ELEMENT;
358
367
  } else {
359
- flags &= static_cast<uint8_t>(~IS_ELEMENT);
368
+ flags &= static_cast<uint8_t>(~Flags::IS_ELEMENT);
360
369
  }
361
370
  }
362
371
 
@@ -400,6 +409,10 @@ struct ClockHandle {
400
409
  flags |= new_priority;
401
410
  }
402
411
 
412
+ bool IsDetached() { return detached; }
413
+
414
+ void SetDetached() { detached = true; }
415
+
403
416
  inline bool IsEmpty() const {
404
417
  return !this->IsElement() && this->displacements == 0;
405
418
  }
@@ -424,7 +437,9 @@ struct ClockHandle {
424
437
  }
425
438
  }
426
439
 
427
- bool HasExternalRefs() const { return (refs & EXTERNAL_REFS) > 0; }
440
+ uint32_t ExternalRefs() const {
441
+ return (refs & EXTERNAL_REFS) >> kExternalRefsOffset;
442
+ }
428
443
 
429
444
  // Tries to take an internal ref. Returns true iff it succeeds.
430
445
  inline bool TryInternalRef() {
@@ -437,7 +452,7 @@ struct ClockHandle {
437
452
 
438
453
  // Tries to take an external ref. Returns true iff it succeeds.
439
454
  inline bool TryExternalRef() {
440
- if (!((refs += kOneExternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) {
455
+ if (!((refs += kOneExternalRef) & EXCLUSIVE_REF)) {
441
456
  return true;
442
457
  }
443
458
  refs -= kOneExternalRef;
@@ -529,8 +544,8 @@ class ClockHandleTable {
529
544
  // Makes h non-evictable.
530
545
  void ClockOff(ClockHandle* h);
531
546
 
532
- // Runs the clock eviction algorithm until there is enough space to
533
- // insert an element with the given charge.
547
+ // Runs the clock eviction algorithm until usage_ + charge is at most
548
+ // capacity_.
534
549
  void ClockRun(size_t charge);
535
550
 
536
551
  // Remove h from the hash table. Requires an exclusive ref to h.
@@ -548,8 +563,6 @@ class ClockHandleTable {
548
563
  RemoveAll(key, hash, probe, deleted);
549
564
  }
550
565
 
551
- void Free(autovector<ClockHandle>* deleted);
552
-
553
566
  // Tries to remove h from the hash table. If the attempt is successful,
554
567
  // the function hands over an exclusive ref to h.
555
568
  bool TryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
@@ -558,8 +571,13 @@ class ClockHandleTable {
558
571
  // success. Requires that the caller thread has no shared ref to h.
559
572
  bool SpinTryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
560
573
 
561
- template <typename T>
562
- void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end,
574
+ // Call this function after an Insert, Remove, RemoveAll, TryRemove
575
+ // or SpinTryRemove. It frees the deleted values and updates the hash table
576
+ // metadata.
577
+ void Free(autovector<ClockHandle>* deleted);
578
+
579
+ void ApplyToEntriesRange(std::function<void(ClockHandle*)> func,
580
+ uint32_t index_begin, uint32_t index_end,
563
581
  bool apply_if_will_be_deleted) {
564
582
  for (uint32_t i = index_begin; i < index_end; i++) {
565
583
  ClockHandle* h = &array_[i];
@@ -573,18 +591,20 @@ class ClockHandleTable {
573
591
  }
574
592
  }
575
593
 
576
- template <typename T>
577
- void ConstApplyToEntriesRange(T func, uint32_t index_begin,
578
- uint32_t index_end,
594
+ void ConstApplyToEntriesRange(std::function<void(const ClockHandle*)> func,
595
+ uint32_t index_begin, uint32_t index_end,
579
596
  bool apply_if_will_be_deleted) const {
580
597
  for (uint32_t i = index_begin; i < index_end; i++) {
581
598
  ClockHandle* h = &array_[i];
582
- if (h->TryExclusiveRef()) {
599
+ // We take an external ref because we are handing over control
600
+ // to a user-defined function, and because the handle will not be
601
+ // modified.
602
+ if (h->TryExternalRef()) {
583
603
  if (h->IsElement() &&
584
604
  (apply_if_will_be_deleted || !h->WillBeDeleted())) {
585
605
  func(h);
586
606
  }
587
- h->ReleaseExclusiveRef();
607
+ h->ReleaseExternalRef();
588
608
  }
589
609
  }
590
610
  }
@@ -601,6 +621,8 @@ class ClockHandleTable {
601
621
 
602
622
  size_t GetCapacity() const { return capacity_; }
603
623
 
624
+ void SetCapacity(size_t capacity) { capacity_ = capacity; }
625
+
604
626
  // Returns x mod 2^{length_bits_}.
605
627
  uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; }
606
628
 
@@ -652,7 +674,7 @@ class ClockHandleTable {
652
674
  const uint32_t occupancy_limit_;
653
675
 
654
676
  // Maximum total charge of all elements stored in the table.
655
- const size_t capacity_;
677
+ size_t capacity_;
656
678
 
657
679
  // We partition the following members into different cache lines
658
680
  // to avoid false sharing among Lookup, Release, Erase and Insert
@@ -745,10 +767,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
745
767
 
746
768
  private:
747
769
  friend class ClockCache;
770
+ friend class ClockCacheTest;
748
771
 
749
- // Free some space following strict clock policy until enough space
750
- // to hold (usage_ + charge) is freed or there are no evictable elements.
751
- void EvictFromClock(size_t charge, autovector<ClockHandle>* deleted);
772
+ ClockHandle* DetachedInsert(ClockHandle* h);
752
773
 
753
774
  // Returns the charge of a single handle.
754
775
  static size_t CalcEstimatedHandleCharge(
@@ -763,6 +784,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
763
784
  // Whether to reject insertion if cache reaches its full capacity.
764
785
  std::atomic<bool> strict_capacity_limit_;
765
786
 
787
+ // Handles allocated separately from the table.
788
+ std::atomic<size_t> detached_usage_;
789
+
766
790
  ClockHandleTable table_;
767
791
  }; // class ClockCacheShard
768
792
 
@@ -797,6 +821,7 @@ class ClockCache
797
821
 
798
822
  private:
799
823
  ClockCacheShard* shards_ = nullptr;
824
+
800
825
  int num_shards_;
801
826
  }; // class ClockCache
802
827
 
@@ -5,6 +5,8 @@
5
5
 
6
6
  #include "cache/compressed_secondary_cache.h"
7
7
 
8
+ #include <algorithm>
9
+ #include <cstdint>
8
10
  #include <memory>
9
11
 
10
12
  #include "memory/memory_allocator.h"
@@ -13,15 +15,6 @@
13
15
 
14
16
  namespace ROCKSDB_NAMESPACE {
15
17
 
16
- namespace {
17
-
18
- void DeletionCallback(const Slice& /*key*/, void* obj) {
19
- delete reinterpret_cast<CacheAllocationPtr*>(obj);
20
- obj = nullptr;
21
- }
22
-
23
- } // namespace
24
-
25
18
  CompressedSecondaryCache::CompressedSecondaryCache(
26
19
  size_t capacity, int num_shard_bits, bool strict_capacity_limit,
27
20
  double high_pri_pool_ratio,
@@ -49,26 +42,29 @@ std::unique_ptr<SecondaryCacheResultHandle> CompressedSecondaryCache::Lookup(
49
42
  return handle;
50
43
  }
51
44
 
52
- CacheAllocationPtr* ptr =
53
- reinterpret_cast<CacheAllocationPtr*>(cache_->Value(lru_handle));
54
- void* value = nullptr;
55
- size_t charge = 0;
56
- Status s;
45
+ CacheValueChunk* handle_value =
46
+ reinterpret_cast<CacheValueChunk*>(cache_->Value(lru_handle));
47
+ size_t handle_value_charge{0};
48
+ CacheAllocationPtr merged_value =
49
+ MergeChunksIntoValue(handle_value, handle_value_charge);
57
50
 
51
+ Status s;
52
+ void* value{nullptr};
53
+ size_t charge{0};
58
54
  if (cache_options_.compression_type == kNoCompression) {
59
- s = create_cb(ptr->get(), cache_->GetCharge(lru_handle), &value, &charge);
55
+ s = create_cb(merged_value.get(), handle_value_charge, &value, &charge);
60
56
  } else {
61
57
  UncompressionContext uncompression_context(cache_options_.compression_type);
62
58
  UncompressionInfo uncompression_info(uncompression_context,
63
59
  UncompressionDict::GetEmptyDict(),
64
60
  cache_options_.compression_type);
65
61
 
66
- size_t uncompressed_size = 0;
62
+ size_t uncompressed_size{0};
67
63
  CacheAllocationPtr uncompressed;
68
- uncompressed = UncompressData(
69
- uncompression_info, (char*)ptr->get(), cache_->GetCharge(lru_handle),
70
- &uncompressed_size, cache_options_.compress_format_version,
71
- cache_options_.memory_allocator.get());
64
+ uncompressed = UncompressData(uncompression_info, (char*)merged_value.get(),
65
+ handle_value_charge, &uncompressed_size,
66
+ cache_options_.compress_format_version,
67
+ cache_options_.memory_allocator.get());
72
68
 
73
69
  if (!uncompressed) {
74
70
  cache_->Release(lru_handle, /* erase_if_last_ref */ true);
@@ -104,7 +100,7 @@ Status CompressedSecondaryCache::Insert(const Slice& key, void* value,
104
100
  if (cache_options_.compression_type != kNoCompression) {
105
101
  CompressionOptions compression_opts;
106
102
  CompressionContext compression_context(cache_options_.compression_type);
107
- uint64_t sample_for_compression = 0;
103
+ uint64_t sample_for_compression{0};
108
104
  CompressionInfo compression_info(
109
105
  compression_opts, compression_context, CompressionDict::GetEmptyDict(),
110
106
  cache_options_.compression_type, sample_for_compression);
@@ -118,14 +114,12 @@ Status CompressedSecondaryCache::Insert(const Slice& key, void* value,
118
114
  }
119
115
 
120
116
  val = Slice(compressed_val);
121
- size = compressed_val.size();
122
- ptr = AllocateBlock(size, cache_options_.memory_allocator.get());
123
- memcpy(ptr.get(), compressed_val.data(), size);
124
117
  }
125
118
 
126
- CacheAllocationPtr* buf = new CacheAllocationPtr(std::move(ptr));
127
-
128
- return cache_->Insert(key, buf, size, DeletionCallback);
119
+ size_t charge{0};
120
+ CacheValueChunk* value_chunks_head =
121
+ SplitValueIntoChunks(val, cache_options_.compression_type, charge);
122
+ return cache_->Insert(key, value_chunks_head, charge, DeletionCallback);
129
123
  }
130
124
 
131
125
  void CompressedSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); }
@@ -133,7 +127,7 @@ void CompressedSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); }
133
127
  std::string CompressedSecondaryCache::GetPrintableOptions() const {
134
128
  std::string ret;
135
129
  ret.reserve(20000);
136
- const int kBufferSize = 200;
130
+ const int kBufferSize{200};
137
131
  char buffer[kBufferSize];
138
132
  ret.append(cache_->GetPrintableOptions());
139
133
  snprintf(buffer, kBufferSize, " compression_type : %s\n",
@@ -145,6 +139,87 @@ std::string CompressedSecondaryCache::GetPrintableOptions() const {
145
139
  return ret;
146
140
  }
147
141
 
142
+ CompressedSecondaryCache::CacheValueChunk*
143
+ CompressedSecondaryCache::SplitValueIntoChunks(
144
+ const Slice& value, const CompressionType compression_type,
145
+ size_t& charge) {
146
+ assert(!value.empty());
147
+ const char* src_ptr = value.data();
148
+ size_t src_size{value.size()};
149
+
150
+ CacheValueChunk dummy_head = CacheValueChunk();
151
+ CacheValueChunk* current_chunk = &dummy_head;
152
+ // Do not split when value size is large or there is no compression.
153
+ size_t predicted_chunk_size{0};
154
+ size_t actual_chunk_size{0};
155
+ size_t tmp_size{0};
156
+ while (src_size > 0) {
157
+ predicted_chunk_size = sizeof(CacheValueChunk) - 1 + src_size;
158
+ auto upper =
159
+ std::upper_bound(malloc_bin_sizes_.begin(), malloc_bin_sizes_.end(),
160
+ predicted_chunk_size);
161
+ // Do not split when value size is too small, too large, close to a bin
162
+ // size, or there is no compression.
163
+ if (upper == malloc_bin_sizes_.begin() ||
164
+ upper == malloc_bin_sizes_.end() ||
165
+ *upper - predicted_chunk_size < malloc_bin_sizes_.front() ||
166
+ compression_type == kNoCompression) {
167
+ tmp_size = predicted_chunk_size;
168
+ } else {
169
+ tmp_size = *(--upper);
170
+ }
171
+
172
+ CacheValueChunk* new_chunk =
173
+ reinterpret_cast<CacheValueChunk*>(new char[tmp_size]);
174
+ current_chunk->next = new_chunk;
175
+ current_chunk = current_chunk->next;
176
+ actual_chunk_size = tmp_size - sizeof(CacheValueChunk) + 1;
177
+ memcpy(current_chunk->data, src_ptr, actual_chunk_size);
178
+ current_chunk->size = actual_chunk_size;
179
+ src_ptr += actual_chunk_size;
180
+ src_size -= actual_chunk_size;
181
+ charge += tmp_size;
182
+ }
183
+ current_chunk->next = nullptr;
184
+
185
+ return dummy_head.next;
186
+ }
187
+
188
+ CacheAllocationPtr CompressedSecondaryCache::MergeChunksIntoValue(
189
+ const void* chunks_head, size_t& charge) {
190
+ const CacheValueChunk* head =
191
+ reinterpret_cast<const CacheValueChunk*>(chunks_head);
192
+ const CacheValueChunk* current_chunk = head;
193
+ charge = 0;
194
+ while (current_chunk != nullptr) {
195
+ charge += current_chunk->size;
196
+ current_chunk = current_chunk->next;
197
+ }
198
+
199
+ CacheAllocationPtr ptr =
200
+ AllocateBlock(charge, cache_options_.memory_allocator.get());
201
+ current_chunk = head;
202
+ size_t pos{0};
203
+ while (current_chunk != nullptr) {
204
+ memcpy(ptr.get() + pos, current_chunk->data, current_chunk->size);
205
+ pos += current_chunk->size;
206
+ current_chunk = current_chunk->next;
207
+ }
208
+
209
+ return ptr;
210
+ }
211
+
212
+ void CompressedSecondaryCache::DeletionCallback(const Slice& /*key*/,
213
+ void* obj) {
214
+ CacheValueChunk* chunks_head = reinterpret_cast<CacheValueChunk*>(obj);
215
+ while (chunks_head != nullptr) {
216
+ CacheValueChunk* tmp_chunk = chunks_head;
217
+ chunks_head = chunks_head->next;
218
+ tmp_chunk->Free();
219
+ }
220
+ obj = nullptr;
221
+ }
222
+
148
223
  std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
149
224
  size_t capacity, int num_shard_bits, bool strict_capacity_limit,
150
225
  double high_pri_pool_ratio,
@@ -5,6 +5,8 @@
5
5
 
6
6
  #pragma once
7
7
 
8
+ #include <array>
9
+ #include <cstddef>
8
10
  #include <memory>
9
11
 
10
12
  #include "cache/lru_cache.h"
@@ -58,7 +60,7 @@ class CompressedSecondaryCache : public SecondaryCache {
58
60
  std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
59
61
  bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
60
62
  CacheMetadataChargePolicy metadata_charge_policy =
61
- kDontChargeCacheMetadata,
63
+ kDefaultCacheMetadataChargePolicy,
62
64
  CompressionType compression_type = CompressionType::kLZ4Compression,
63
65
  uint32_t compress_format_version = 2);
64
66
  virtual ~CompressedSecondaryCache() override;
@@ -79,6 +81,36 @@ class CompressedSecondaryCache : public SecondaryCache {
79
81
  std::string GetPrintableOptions() const override;
80
82
 
81
83
  private:
84
+ friend class CompressedSecondaryCacheTest;
85
+ static constexpr std::array<uint16_t, 33> malloc_bin_sizes_{
86
+ 32, 64, 96, 128, 160, 192, 224, 256, 320, 384, 448,
87
+ 512, 640, 768, 896, 1024, 1280, 1536, 1792, 2048, 2560, 3072,
88
+ 3584, 4096, 5120, 6144, 7168, 8192, 10240, 12288, 14336, 16384, 32768};
89
+
90
+ struct CacheValueChunk {
91
+ // TODO try "CacheAllocationPtr next;".
92
+ CacheValueChunk* next;
93
+ size_t size;
94
+ // Beginning of the chunk data (MUST BE THE LAST FIELD IN THIS STRUCT!)
95
+ char data[1];
96
+
97
+ void Free() { delete[] reinterpret_cast<char*>(this); }
98
+ };
99
+
100
+ // Split value into chunks to better fit into jemalloc bins. The chunks
101
+ // are stored in CacheValueChunk and extra charge is needed for each chunk,
102
+ // so the cache charge is recalculated here.
103
+ CacheValueChunk* SplitValueIntoChunks(const Slice& value,
104
+ const CompressionType compression_type,
105
+ size_t& charge);
106
+
107
+ // After merging chunks, the extra charge for each chunk is removed, so
108
+ // the charge is recalculated.
109
+ CacheAllocationPtr MergeChunksIntoValue(const void* chunks_head,
110
+ size_t& charge);
111
+
112
+ // An implementation of Cache::DeleterFn.
113
+ static void DeletionCallback(const Slice& /*key*/, void* obj);
82
114
  std::shared_ptr<Cache> cache_;
83
115
  CompressedSecondaryCacheOptions cache_options_;
84
116
  };