@nxtedition/rocksdb 7.1.32 → 7.1.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +402 -345
  2. package/deps/rocksdb/rocksdb/cache/clock_cache.h +121 -64
  3. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +28 -18
  4. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +1 -0
  5. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
  6. package/deps/rocksdb/rocksdb/db/builder.cc +2 -1
  7. package/deps/rocksdb/rocksdb/db/c.cc +563 -673
  8. package/deps/rocksdb/rocksdb/db/c_test.c +168 -169
  9. package/deps/rocksdb/rocksdb/db/column_family.cc +16 -15
  10. package/deps/rocksdb/rocksdb/db/column_family.h +7 -7
  11. package/deps/rocksdb/rocksdb/db/column_family_test.cc +17 -28
  12. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -9
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +8 -3
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +114 -0
  15. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -3
  16. package/deps/rocksdb/rocksdb/db/convenience.cc +3 -5
  17. package/deps/rocksdb/rocksdb/db/corruption_test.cc +10 -14
  18. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +9 -13
  19. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +2 -2
  21. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +14 -16
  22. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +52 -72
  23. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +2 -2
  24. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +12 -12
  25. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -2
  26. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +3 -3
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +1 -12
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +3 -0
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +26 -0
  30. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +1 -0
  31. package/deps/rocksdb/rocksdb/db/db_iter.cc +12 -6
  32. package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
  33. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +6 -7
  34. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +10 -8
  35. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +15 -13
  36. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -9
  37. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +4 -4
  38. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +1 -1
  39. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +2 -4
  40. package/deps/rocksdb/rocksdb/db/db_options_test.cc +4 -4
  41. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +7 -4
  42. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +7 -5
  43. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +1 -1
  44. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +8 -6
  45. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +18 -23
  46. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +3 -5
  47. package/deps/rocksdb/rocksdb/db/db_test.cc +10 -5
  48. package/deps/rocksdb/rocksdb/db/db_test2.cc +172 -169
  49. package/deps/rocksdb/rocksdb/db/db_test_util.cc +68 -66
  50. package/deps/rocksdb/rocksdb/db/db_test_util.h +1 -3
  51. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +31 -39
  52. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +182 -2
  53. package/deps/rocksdb/rocksdb/db/db_write_test.cc +43 -40
  54. package/deps/rocksdb/rocksdb/db/dbformat.h +15 -0
  55. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +35 -34
  56. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +10 -11
  57. package/deps/rocksdb/rocksdb/db/error_handler.cc +6 -6
  58. package/deps/rocksdb/rocksdb/db/error_handler.h +93 -94
  59. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -1
  60. package/deps/rocksdb/rocksdb/db/event_helpers.h +3 -3
  61. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +16 -17
  62. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2 -2
  63. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -2
  64. package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -0
  65. package/deps/rocksdb/rocksdb/db/file_indexer.h +2 -1
  66. package/deps/rocksdb/rocksdb/db/file_indexer_test.cc +4 -2
  67. package/deps/rocksdb/rocksdb/db/filename_test.cc +27 -29
  68. package/deps/rocksdb/rocksdb/db/flush_job.cc +7 -13
  69. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +2 -2
  70. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +15 -21
  71. package/deps/rocksdb/rocksdb/db/forward_iterator.h +7 -6
  72. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +4 -2
  73. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +2 -2
  74. package/deps/rocksdb/rocksdb/db/internal_stats.cc +59 -14
  75. package/deps/rocksdb/rocksdb/db/internal_stats.h +27 -11
  76. package/deps/rocksdb/rocksdb/db/job_context.h +5 -6
  77. package/deps/rocksdb/rocksdb/db/listener_test.cc +21 -23
  78. package/deps/rocksdb/rocksdb/db/log_reader.cc +7 -11
  79. package/deps/rocksdb/rocksdb/db/log_reader.h +4 -6
  80. package/deps/rocksdb/rocksdb/db/log_test.cc +6 -12
  81. package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
  82. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +0 -1
  83. package/deps/rocksdb/rocksdb/db/lookup_key.h +4 -1
  84. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +2 -1
  85. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +3 -5
  86. package/deps/rocksdb/rocksdb/db/memtable.cc +34 -22
  87. package/deps/rocksdb/rocksdb/db/memtable.h +4 -6
  88. package/deps/rocksdb/rocksdb/db/memtable_list.cc +7 -0
  89. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +37 -13
  90. package/deps/rocksdb/rocksdb/db/merge_context.h +1 -0
  91. package/deps/rocksdb/rocksdb/db/merge_helper.cc +128 -14
  92. package/deps/rocksdb/rocksdb/db/merge_helper.h +15 -7
  93. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +2 -1
  94. package/deps/rocksdb/rocksdb/db/merge_operator.cc +5 -6
  95. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +4 -3
  96. package/deps/rocksdb/rocksdb/db/options_file_test.cc +1 -1
  97. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +55 -43
  98. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +288 -299
  99. package/deps/rocksdb/rocksdb/db/prefix_test.cc +22 -27
  100. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +1 -1
  101. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +1 -1
  102. package/deps/rocksdb/rocksdb/db/repair.cc +7 -8
  103. package/deps/rocksdb/rocksdb/db/repair_test.cc +3 -4
  104. package/deps/rocksdb/rocksdb/db/snapshot_impl.cc +4 -5
  105. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +10 -4
  106. package/deps/rocksdb/rocksdb/db/table_cache.cc +3 -4
  107. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +6 -7
  108. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +22 -22
  109. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +12 -12
  110. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +6 -8
  111. package/deps/rocksdb/rocksdb/db/trim_history_scheduler.h +2 -0
  112. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +3 -3
  113. package/deps/rocksdb/rocksdb/db/version_edit.cc +2 -5
  114. package/deps/rocksdb/rocksdb/db/version_edit.h +8 -12
  115. package/deps/rocksdb/rocksdb/db/version_set.cc +74 -102
  116. package/deps/rocksdb/rocksdb/db/version_set.h +8 -10
  117. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +0 -5
  118. package/deps/rocksdb/rocksdb/db/version_set_test.cc +47 -45
  119. package/deps/rocksdb/rocksdb/db/wal_manager.cc +6 -5
  120. package/deps/rocksdb/rocksdb/db/wal_manager.h +2 -2
  121. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +4 -3
  122. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +144 -61
  123. package/deps/rocksdb/rocksdb/db/write_batch.cc +41 -24
  124. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +2 -7
  125. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +105 -104
  126. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +5 -4
  127. package/deps/rocksdb/rocksdb/db/write_controller.h +1 -0
  128. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +1 -1
  129. package/deps/rocksdb/rocksdb/db/write_thread.cc +8 -6
  130. package/deps/rocksdb/rocksdb/env/io_posix.h +6 -0
  131. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +134 -65
  132. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +29 -0
  133. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +1 -0
  134. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +1 -4
  135. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +1 -0
  136. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +4 -0
  137. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +14 -4
  138. package/deps/rocksdb/rocksdb/table/get_context.cc +52 -7
  139. package/deps/rocksdb/rocksdb/table/get_context.h +1 -2
  140. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +13 -0
  141. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +36 -4
  142. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +6 -6
  143. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +23 -28
  144. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +11 -1
  145. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +19 -17
  146. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +10 -7
  147. package/index.js +14 -16
  148. package/package.json +1 -1
  149. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  150. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -22,40 +22,129 @@
22
22
 
23
23
  namespace ROCKSDB_NAMESPACE {
24
24
 
25
- namespace hyper_clock_cache {
25
+ namespace clock_cache {
26
26
 
27
+ namespace {
27
28
  inline uint64_t GetRefcount(uint64_t meta) {
28
29
  return ((meta >> ClockHandle::kAcquireCounterShift) -
29
30
  (meta >> ClockHandle::kReleaseCounterShift)) &
30
31
  ClockHandle::kCounterMask;
31
32
  }
32
33
 
34
+ inline uint64_t GetInitialCountdown(Cache::Priority priority) {
35
+ // Set initial clock data from priority
36
+ // TODO: configuration parameters for priority handling and clock cycle
37
+ // count?
38
+ switch (priority) {
39
+ case Cache::Priority::HIGH:
40
+ return ClockHandle::kHighCountdown;
41
+ default:
42
+ assert(false);
43
+ FALLTHROUGH_INTENDED;
44
+ case Cache::Priority::LOW:
45
+ return ClockHandle::kLowCountdown;
46
+ case Cache::Priority::BOTTOM:
47
+ return ClockHandle::kBottomCountdown;
48
+ }
49
+ }
50
+
51
+ inline void FreeDataMarkEmpty(ClockHandle& h) {
52
+ // NOTE: in theory there's more room for parallelism if we copy the handle
53
+ // data and delay actions like this until after marking the entry as empty,
54
+ // but performance tests only show a regression by copying the few words
55
+ // of data.
56
+ h.FreeData();
57
+
58
+ #ifndef NDEBUG
59
+ // Mark slot as empty, with assertion
60
+ uint64_t meta = h.meta.exchange(0, std::memory_order_release);
61
+ assert(meta >> ClockHandle::kStateShift == ClockHandle::kStateConstruction);
62
+ #else
63
+ // Mark slot as empty
64
+ h.meta.store(0, std::memory_order_release);
65
+ #endif
66
+ }
67
+
68
+ inline bool ClockUpdate(ClockHandle& h) {
69
+ uint64_t meta = h.meta.load(std::memory_order_relaxed);
70
+
71
+ uint64_t acquire_count =
72
+ (meta >> ClockHandle::kAcquireCounterShift) & ClockHandle::kCounterMask;
73
+ uint64_t release_count =
74
+ (meta >> ClockHandle::kReleaseCounterShift) & ClockHandle::kCounterMask;
75
+ // fprintf(stderr, "ClockUpdate @ %p: %lu %lu %u\n", &h, acquire_count,
76
+ // release_count, (unsigned)(meta >> ClockHandle::kStateShift));
77
+ if (acquire_count != release_count) {
78
+ // Only clock update entries with no outstanding refs
79
+ return false;
80
+ }
81
+ if (!((meta >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit)) {
82
+ // Only clock update Shareable entries
83
+ return false;
84
+ }
85
+ if ((meta >> ClockHandle::kStateShift == ClockHandle::kStateVisible) &&
86
+ acquire_count > 0) {
87
+ // Decrement clock
88
+ uint64_t new_count =
89
+ std::min(acquire_count - 1, uint64_t{ClockHandle::kMaxCountdown} - 1);
90
+ // Compare-exchange in the decremented clock info, but
91
+ // not aggressively
92
+ uint64_t new_meta =
93
+ (uint64_t{ClockHandle::kStateVisible} << ClockHandle::kStateShift) |
94
+ (new_count << ClockHandle::kReleaseCounterShift) |
95
+ (new_count << ClockHandle::kAcquireCounterShift);
96
+ h.meta.compare_exchange_strong(meta, new_meta, std::memory_order_relaxed);
97
+ return false;
98
+ }
99
+ // Otherwise, remove entry (either unreferenced invisible or
100
+ // unreferenced and expired visible).
101
+ if (h.meta.compare_exchange_strong(
102
+ meta,
103
+ uint64_t{ClockHandle::kStateConstruction} << ClockHandle::kStateShift,
104
+ std::memory_order_acquire)) {
105
+ // Took ownership.
106
+ return true;
107
+ } else {
108
+ // Compare-exchange failing probably
109
+ // indicates the entry was used, so skip it in that case.
110
+ return false;
111
+ }
112
+ }
113
+
114
+ } // namespace
115
+
33
116
  void ClockHandleBasicData::FreeData() const {
34
117
  if (deleter) {
35
118
  UniqueId64x2 unhashed;
36
- (*deleter)(ClockCacheShard::ReverseHash(hashed_key, &unhashed), value);
119
+ (*deleter)(
120
+ ClockCacheShard<HyperClockTable>::ReverseHash(hashed_key, &unhashed),
121
+ value);
37
122
  }
38
123
  }
39
124
 
40
- static_assert(sizeof(ClockHandle) == 64U,
41
- "Expecting size / alignment with common cache line size");
42
-
43
- ClockHandleTable::ClockHandleTable(int hash_bits, bool initial_charge_metadata)
44
- : length_bits_(hash_bits),
125
+ HyperClockTable::HyperClockTable(
126
+ size_t capacity, bool /*strict_capacity_limit*/,
127
+ CacheMetadataChargePolicy metadata_charge_policy, const Opts& opts)
128
+ : length_bits_(CalcHashBits(capacity, opts.estimated_value_size,
129
+ metadata_charge_policy)),
45
130
  length_bits_mask_((size_t{1} << length_bits_) - 1),
46
131
  occupancy_limit_(static_cast<size_t>((uint64_t{1} << length_bits_) *
47
132
  kStrictLoadFactor)),
48
- array_(new ClockHandle[size_t{1} << length_bits_]) {
49
- if (initial_charge_metadata) {
50
- usage_ += size_t{GetTableSize()} * sizeof(ClockHandle);
133
+ array_(new HandleImpl[size_t{1} << length_bits_]) {
134
+ if (metadata_charge_policy ==
135
+ CacheMetadataChargePolicy::kFullChargeCacheMetadata) {
136
+ usage_ += size_t{GetTableSize()} * sizeof(HandleImpl);
51
137
  }
138
+
139
+ static_assert(sizeof(HandleImpl) == 64U,
140
+ "Expecting size / alignment with common cache line size");
52
141
  }
53
142
 
54
- ClockHandleTable::~ClockHandleTable() {
143
+ HyperClockTable::~HyperClockTable() {
55
144
  // Assumes there are no references or active operations on any slot/element
56
145
  // in the table.
57
146
  for (size_t i = 0; i < GetTableSize(); i++) {
58
- ClockHandle& h = array_[i];
147
+ HandleImpl& h = array_[i];
59
148
  switch (h.meta >> ClockHandle::kStateShift) {
60
149
  case ClockHandle::kStateEmpty:
61
150
  // noop
@@ -66,8 +155,7 @@ ClockHandleTable::~ClockHandleTable() {
66
155
  h.FreeData();
67
156
  #ifndef NDEBUG
68
157
  Rollback(h.hashed_key, &h);
69
- usage_.fetch_sub(h.total_charge, std::memory_order_relaxed);
70
- occupancy_.fetch_sub(1U, std::memory_order_relaxed);
158
+ ReclaimEntryUsage(h.GetTotalCharge());
71
159
  #endif
72
160
  break;
73
161
  // otherwise
@@ -84,7 +172,7 @@ ClockHandleTable::~ClockHandleTable() {
84
172
  #endif
85
173
 
86
174
  assert(usage_.load() == 0 ||
87
- usage_.load() == size_t{GetTableSize()} * sizeof(ClockHandle));
175
+ usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl));
88
176
  assert(occupancy_ == 0);
89
177
  }
90
178
 
@@ -161,9 +249,141 @@ inline void CorrectNearOverflow(uint64_t old_meta,
161
249
  }
162
250
  }
163
251
 
164
- Status ClockHandleTable::Insert(const ClockHandleBasicData& proto,
165
- ClockHandle** handle, Cache::Priority priority,
166
- size_t capacity, bool strict_capacity_limit) {
252
+ inline Status HyperClockTable::ChargeUsageMaybeEvictStrict(
253
+ size_t total_charge, size_t capacity, bool need_evict_for_occupancy) {
254
+ if (total_charge > capacity) {
255
+ return Status::MemoryLimit(
256
+ "Cache entry too large for a single cache shard: " +
257
+ std::to_string(total_charge) + " > " + std::to_string(capacity));
258
+ }
259
+ // Grab any available capacity, and free up any more required.
260
+ size_t old_usage = usage_.load(std::memory_order_relaxed);
261
+ size_t new_usage;
262
+ if (LIKELY(old_usage != capacity)) {
263
+ do {
264
+ new_usage = std::min(capacity, old_usage + total_charge);
265
+ } while (!usage_.compare_exchange_weak(old_usage, new_usage,
266
+ std::memory_order_relaxed));
267
+ } else {
268
+ new_usage = old_usage;
269
+ }
270
+ // How much do we need to evict then?
271
+ size_t need_evict_charge = old_usage + total_charge - new_usage;
272
+ size_t request_evict_charge = need_evict_charge;
273
+ if (UNLIKELY(need_evict_for_occupancy) && request_evict_charge == 0) {
274
+ // Require at least 1 eviction.
275
+ request_evict_charge = 1;
276
+ }
277
+ if (request_evict_charge > 0) {
278
+ size_t evicted_charge = 0;
279
+ size_t evicted_count = 0;
280
+ Evict(request_evict_charge, &evicted_charge, &evicted_count);
281
+ occupancy_.fetch_sub(evicted_count, std::memory_order_release);
282
+ if (LIKELY(evicted_charge > need_evict_charge)) {
283
+ assert(evicted_count > 0);
284
+ // Evicted more than enough
285
+ usage_.fetch_sub(evicted_charge - need_evict_charge,
286
+ std::memory_order_relaxed);
287
+ } else if (evicted_charge < need_evict_charge ||
288
+ (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0)) {
289
+ // Roll back to old usage minus evicted
290
+ usage_.fetch_sub(evicted_charge + (new_usage - old_usage),
291
+ std::memory_order_relaxed);
292
+ if (evicted_charge < need_evict_charge) {
293
+ return Status::MemoryLimit(
294
+ "Insert failed because unable to evict entries to stay within "
295
+ "capacity limit.");
296
+ } else {
297
+ return Status::MemoryLimit(
298
+ "Insert failed because unable to evict entries to stay within "
299
+ "table occupancy limit.");
300
+ }
301
+ }
302
+ // If we needed to evict something and we are proceeding, we must have
303
+ // evicted something.
304
+ assert(evicted_count > 0);
305
+ }
306
+ return Status::OK();
307
+ }
308
+
309
+ inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict(
310
+ size_t total_charge, size_t capacity, bool need_evict_for_occupancy) {
311
+ // For simplicity, we consider that either the cache can accept the insert
312
+ // with no evictions, or we must evict enough to make (at least) enough
313
+ // space. It could lead to unnecessary failures or excessive evictions in
314
+ // some extreme cases, but allows a fast, simple protocol. If we allow a
315
+ // race to get us over capacity, then we might never get back to capacity
316
+ // limit if the sizes of entries allow each insertion to evict the minimum
317
+ // charge. Thus, we should evict some extra if it's not a signifcant
318
+ // portion of the shard capacity. This can have the side benefit of
319
+ // involving fewer threads in eviction.
320
+ size_t old_usage = usage_.load(std::memory_order_relaxed);
321
+ size_t need_evict_charge;
322
+ // NOTE: if total_charge > old_usage, there isn't yet enough to evict
323
+ // `total_charge` amount. Even if we only try to evict `old_usage` amount,
324
+ // there's likely something referenced and we would eat CPU looking for
325
+ // enough to evict.
326
+ if (old_usage + total_charge <= capacity || total_charge > old_usage) {
327
+ // Good enough for me (might run over with a race)
328
+ need_evict_charge = 0;
329
+ } else {
330
+ // Try to evict enough space, and maybe some extra
331
+ need_evict_charge = total_charge;
332
+ if (old_usage > capacity) {
333
+ // Not too much to avoid thundering herd while avoiding strict
334
+ // synchronization, such as the compare_exchange used with strict
335
+ // capacity limit.
336
+ need_evict_charge += std::min(capacity / 1024, total_charge) + 1;
337
+ }
338
+ }
339
+ if (UNLIKELY(need_evict_for_occupancy) && need_evict_charge == 0) {
340
+ // Special case: require at least 1 eviction if we only have to
341
+ // deal with occupancy
342
+ need_evict_charge = 1;
343
+ }
344
+ size_t evicted_charge = 0;
345
+ size_t evicted_count = 0;
346
+ if (need_evict_charge > 0) {
347
+ Evict(need_evict_charge, &evicted_charge, &evicted_count);
348
+ // Deal with potential occupancy deficit
349
+ if (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0) {
350
+ assert(evicted_charge == 0);
351
+ // Can't meet occupancy requirement
352
+ return false;
353
+ } else {
354
+ // Update occupancy for evictions
355
+ occupancy_.fetch_sub(evicted_count, std::memory_order_release);
356
+ }
357
+ }
358
+ // Track new usage even if we weren't able to evict enough
359
+ usage_.fetch_add(total_charge - evicted_charge, std::memory_order_relaxed);
360
+ // No underflow
361
+ assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2);
362
+ // Success
363
+ return true;
364
+ }
365
+
366
+ inline HyperClockTable::HandleImpl* HyperClockTable::DetachedInsert(
367
+ const ClockHandleBasicData& proto) {
368
+ // Heap allocated separate from table
369
+ HandleImpl* h = new HandleImpl();
370
+ ClockHandleBasicData* h_alias = h;
371
+ *h_alias = proto;
372
+ h->SetDetached();
373
+ // Single reference (detached entries only created if returning a refed
374
+ // Handle back to user)
375
+ uint64_t meta = uint64_t{ClockHandle::kStateInvisible}
376
+ << ClockHandle::kStateShift;
377
+ meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift;
378
+ h->meta.store(meta, std::memory_order_release);
379
+ // Keep track of how much of usage is detached
380
+ detached_usage_.fetch_add(proto.GetTotalCharge(), std::memory_order_relaxed);
381
+ return h;
382
+ }
383
+
384
+ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
385
+ HandleImpl** handle, Cache::Priority priority,
386
+ size_t capacity, bool strict_capacity_limit) {
167
387
  // Do we have the available occupancy? Optimistically assume we do
168
388
  // and deal with it if we don't.
169
389
  size_t old_occupancy = occupancy_.fetch_add(1, std::memory_order_acquire);
@@ -176,124 +396,31 @@ Status ClockHandleTable::Insert(const ClockHandleBasicData& proto,
176
396
  // Usage/capacity handling is somewhat different depending on
177
397
  // strict_capacity_limit, but mostly pessimistic.
178
398
  bool use_detached_insert = false;
179
- const size_t total_charge = proto.total_charge;
399
+ const size_t total_charge = proto.GetTotalCharge();
180
400
  if (strict_capacity_limit) {
181
- if (total_charge > capacity) {
182
- assert(!use_detached_insert);
401
+ Status s = ChargeUsageMaybeEvictStrict(total_charge, capacity,
402
+ need_evict_for_occupancy);
403
+ if (!s.ok()) {
183
404
  revert_occupancy_fn();
184
- return Status::MemoryLimit(
185
- "Cache entry too large for a single cache shard: " +
186
- std::to_string(total_charge) + " > " + std::to_string(capacity));
187
- }
188
- // Grab any available capacity, and free up any more required.
189
- size_t old_usage = usage_.load(std::memory_order_relaxed);
190
- size_t new_usage;
191
- if (LIKELY(old_usage != capacity)) {
192
- do {
193
- new_usage = std::min(capacity, old_usage + total_charge);
194
- } while (!usage_.compare_exchange_weak(old_usage, new_usage,
195
- std::memory_order_relaxed));
196
- } else {
197
- new_usage = old_usage;
198
- }
199
- // How much do we need to evict then?
200
- size_t need_evict_charge = old_usage + total_charge - new_usage;
201
- size_t request_evict_charge = need_evict_charge;
202
- if (UNLIKELY(need_evict_for_occupancy) && request_evict_charge == 0) {
203
- // Require at least 1 eviction.
204
- request_evict_charge = 1;
205
- }
206
- if (request_evict_charge > 0) {
207
- size_t evicted_charge = 0;
208
- size_t evicted_count = 0;
209
- Evict(request_evict_charge, &evicted_charge, &evicted_count);
210
- occupancy_.fetch_sub(evicted_count, std::memory_order_release);
211
- if (LIKELY(evicted_charge > need_evict_charge)) {
212
- assert(evicted_count > 0);
213
- // Evicted more than enough
214
- usage_.fetch_sub(evicted_charge - need_evict_charge,
215
- std::memory_order_relaxed);
216
- } else if (evicted_charge < need_evict_charge ||
217
- (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0)) {
218
- // Roll back to old usage minus evicted
219
- usage_.fetch_sub(evicted_charge + (new_usage - old_usage),
220
- std::memory_order_relaxed);
221
- assert(!use_detached_insert);
222
- revert_occupancy_fn();
223
- if (evicted_charge < need_evict_charge) {
224
- return Status::MemoryLimit(
225
- "Insert failed because unable to evict entries to stay within "
226
- "capacity limit.");
227
- } else {
228
- return Status::MemoryLimit(
229
- "Insert failed because unable to evict entries to stay within "
230
- "table occupancy limit.");
231
- }
232
- }
233
- // If we needed to evict something and we are proceeding, we must have
234
- // evicted something.
235
- assert(evicted_count > 0);
405
+ return s;
236
406
  }
237
407
  } else {
238
408
  // Case strict_capacity_limit == false
239
-
240
- // For simplicity, we consider that either the cache can accept the insert
241
- // with no evictions, or we must evict enough to make (at least) enough
242
- // space. It could lead to unnecessary failures or excessive evictions in
243
- // some extreme cases, but allows a fast, simple protocol. If we allow a
244
- // race to get us over capacity, then we might never get back to capacity
245
- // limit if the sizes of entries allow each insertion to evict the minimum
246
- // charge. Thus, we should evict some extra if it's not a signifcant
247
- // portion of the shard capacity. This can have the side benefit of
248
- // involving fewer threads in eviction.
249
- size_t old_usage = usage_.load(std::memory_order_relaxed);
250
- size_t need_evict_charge;
251
- // NOTE: if total_charge > old_usage, there isn't yet enough to evict
252
- // `total_charge` amount. Even if we only try to evict `old_usage` amount,
253
- // there's likely something referenced and we would eat CPU looking for
254
- // enough to evict.
255
- if (old_usage + total_charge <= capacity || total_charge > old_usage) {
256
- // Good enough for me (might run over with a race)
257
- need_evict_charge = 0;
258
- } else {
259
- // Try to evict enough space, and maybe some extra
260
- need_evict_charge = total_charge;
261
- if (old_usage > capacity) {
262
- // Not too much to avoid thundering herd while avoiding strict
263
- // synchronization
264
- need_evict_charge += std::min(capacity / 1024, total_charge) + 1;
265
- }
266
- }
267
- if (UNLIKELY(need_evict_for_occupancy) && need_evict_charge == 0) {
268
- // Special case: require at least 1 eviction if we only have to
269
- // deal with occupancy
270
- need_evict_charge = 1;
271
- }
272
- size_t evicted_charge = 0;
273
- size_t evicted_count = 0;
274
- if (need_evict_charge > 0) {
275
- Evict(need_evict_charge, &evicted_charge, &evicted_count);
276
- // Deal with potential occupancy deficit
277
- if (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0) {
278
- assert(evicted_charge == 0);
279
- revert_occupancy_fn();
280
- if (handle == nullptr) {
281
- // Don't insert the entry but still return ok, as if the entry
282
- // inserted into cache and evicted immediately.
283
- proto.FreeData();
284
- return Status::OK();
285
- } else {
286
- use_detached_insert = true;
287
- }
409
+ bool success = ChargeUsageMaybeEvictNonStrict(total_charge, capacity,
410
+ need_evict_for_occupancy);
411
+ if (!success) {
412
+ revert_occupancy_fn();
413
+ if (handle == nullptr) {
414
+ // Don't insert the entry but still return ok, as if the entry
415
+ // inserted into cache and evicted immediately.
416
+ proto.FreeData();
417
+ return Status::OK();
288
418
  } else {
289
- // Update occupancy for evictions
290
- occupancy_.fetch_sub(evicted_count, std::memory_order_release);
419
+ // Need to track usage of fallback detached insert
420
+ usage_.fetch_add(total_charge, std::memory_order_relaxed);
421
+ use_detached_insert = true;
291
422
  }
292
423
  }
293
- // Track new usage even if we weren't able to evict enough
294
- usage_.fetch_add(total_charge - evicted_charge, std::memory_order_relaxed);
295
- // No underflow
296
- assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2);
297
424
  }
298
425
  auto revert_usage_fn = [&]() {
299
426
  usage_.fetch_sub(total_charge, std::memory_order_relaxed);
@@ -310,30 +437,13 @@ Status ClockHandleTable::Insert(const ClockHandleBasicData& proto,
310
437
  // * Have to insert into a suboptimal location (more probes) so that the
311
438
  // old entry can be kept around as well.
312
439
 
313
- // Set initial clock data from priority
314
- // TODO: configuration parameters for priority handling and clock cycle
315
- // count?
316
- uint64_t initial_countdown;
317
- switch (priority) {
318
- case Cache::Priority::HIGH:
319
- initial_countdown = ClockHandle::kHighCountdown;
320
- break;
321
- default:
322
- assert(false);
323
- FALLTHROUGH_INTENDED;
324
- case Cache::Priority::LOW:
325
- initial_countdown = ClockHandle::kLowCountdown;
326
- break;
327
- case Cache::Priority::BOTTOM:
328
- initial_countdown = ClockHandle::kBottomCountdown;
329
- break;
330
- }
440
+ uint64_t initial_countdown = GetInitialCountdown(priority);
331
441
  assert(initial_countdown > 0);
332
442
 
333
443
  size_t probe = 0;
334
- ClockHandle* e = FindSlot(
444
+ HandleImpl* e = FindSlot(
335
445
  proto.hashed_key,
336
- [&](ClockHandle* h) {
446
+ [&](HandleImpl* h) {
337
447
  // Optimistically transition the slot from "empty" to
338
448
  // "under construction" (no effect on other states)
339
449
  uint64_t old_meta =
@@ -414,8 +524,8 @@ Status ClockHandleTable::Insert(const ClockHandleBasicData& proto,
414
524
  (void)old_meta;
415
525
  return false;
416
526
  },
417
- [&](ClockHandle* /*h*/) { return false; },
418
- [&](ClockHandle* h) {
527
+ [&](HandleImpl* /*h*/) { return false; },
528
+ [&](HandleImpl* h) {
419
529
  h->displacements.fetch_add(1, std::memory_order_relaxed);
420
530
  },
421
531
  probe);
@@ -452,20 +562,8 @@ Status ClockHandleTable::Insert(const ClockHandleBasicData& proto,
452
562
  // Run detached insert
453
563
  assert(use_detached_insert);
454
564
 
455
- ClockHandle* h = new ClockHandle();
456
- ClockHandleBasicData* h_alias = h;
457
- *h_alias = proto;
458
- h->detached = true;
459
- // Single reference (detached entries only created if returning a refed
460
- // Handle back to user)
461
- uint64_t meta = uint64_t{ClockHandle::kStateInvisible}
462
- << ClockHandle::kStateShift;
463
- meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift;
464
- h->meta.store(meta, std::memory_order_release);
465
- // Keep track of usage
466
- detached_usage_.fetch_add(total_charge, std::memory_order_relaxed);
565
+ *handle = DetachedInsert(proto);
467
566
 
468
- *handle = h;
469
567
  // The OkOverwritten status is used to count "redundant" insertions into
470
568
  // block cache. This implementation doesn't strictly check for redundant
471
569
  // insertions, but we instead are probably interested in how many insertions
@@ -474,11 +572,12 @@ Status ClockHandleTable::Insert(const ClockHandleBasicData& proto,
474
572
  return Status::OkOverwritten();
475
573
  }
476
574
 
477
- ClockHandle* ClockHandleTable::Lookup(const UniqueId64x2& hashed_key) {
575
+ HyperClockTable::HandleImpl* HyperClockTable::Lookup(
576
+ const UniqueId64x2& hashed_key) {
478
577
  size_t probe = 0;
479
- ClockHandle* e = FindSlot(
578
+ HandleImpl* e = FindSlot(
480
579
  hashed_key,
481
- [&](ClockHandle* h) {
580
+ [&](HandleImpl* h) {
482
581
  // Mostly branch-free version (similar performance)
483
582
  /*
484
583
  uint64_t old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement,
@@ -532,16 +631,16 @@ ClockHandle* ClockHandleTable::Lookup(const UniqueId64x2& hashed_key) {
532
631
  (void)old_meta;
533
632
  return false;
534
633
  },
535
- [&](ClockHandle* h) {
634
+ [&](HandleImpl* h) {
536
635
  return h->displacements.load(std::memory_order_relaxed) == 0;
537
636
  },
538
- [&](ClockHandle* /*h*/) {}, probe);
637
+ [&](HandleImpl* /*h*/) {}, probe);
539
638
 
540
639
  return e;
541
640
  }
542
641
 
543
- bool ClockHandleTable::Release(ClockHandle* h, bool useful,
544
- bool erase_if_last_ref) {
642
+ bool HyperClockTable::Release(HandleImpl* h, bool useful,
643
+ bool erase_if_last_ref) {
545
644
  // In contrast with LRUCache's Release, this function won't delete the handle
546
645
  // when the cache is above capacity and the reference is the last one. Space
547
646
  // is only freed up by EvictFromClock (called by Insert when space is needed)
@@ -595,29 +694,18 @@ bool ClockHandleTable::Release(ClockHandle* h, bool useful,
595
694
  uint64_t{ClockHandle::kStateConstruction} << ClockHandle::kStateShift,
596
695
  std::memory_order_acquire));
597
696
  // Took ownership
598
- // TODO? Delay freeing?
599
- h->FreeData();
600
- size_t total_charge = h->total_charge;
601
- if (UNLIKELY(h->detached)) {
697
+ size_t total_charge = h->GetTotalCharge();
698
+ if (UNLIKELY(h->IsDetached())) {
699
+ h->FreeData();
602
700
  // Delete detached handle
603
701
  delete h;
604
702
  detached_usage_.fetch_sub(total_charge, std::memory_order_relaxed);
703
+ usage_.fetch_sub(total_charge, std::memory_order_relaxed);
605
704
  } else {
606
- UniqueId64x2 hashed_key = h->hashed_key;
607
- #ifndef NDEBUG
608
- // Mark slot as empty, with assertion
609
- old_meta = h->meta.exchange(0, std::memory_order_release);
610
- assert(old_meta >> ClockHandle::kStateShift ==
611
- ClockHandle::kStateConstruction);
612
- #else
613
- // Mark slot as empty
614
- h->meta.store(0, std::memory_order_release);
615
- #endif
616
- occupancy_.fetch_sub(1U, std::memory_order_release);
617
- Rollback(hashed_key, h);
705
+ Rollback(h->hashed_key, h);
706
+ FreeDataMarkEmpty(*h);
707
+ ReclaimEntryUsage(total_charge);
618
708
  }
619
- usage_.fetch_sub(total_charge, std::memory_order_relaxed);
620
- assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2);
621
709
  return true;
622
710
  } else {
623
711
  // Correct for possible (but rare) overflow
@@ -626,7 +714,7 @@ bool ClockHandleTable::Release(ClockHandle* h, bool useful,
626
714
  }
627
715
  }
628
716
 
629
- void ClockHandleTable::Ref(ClockHandle& h) {
717
+ void HyperClockTable::Ref(HandleImpl& h) {
630
718
  // Increment acquire counter
631
719
  uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement,
632
720
  std::memory_order_acquire);
@@ -638,7 +726,7 @@ void ClockHandleTable::Ref(ClockHandle& h) {
638
726
  (void)old_meta;
639
727
  }
640
728
 
641
- void ClockHandleTable::TEST_RefN(ClockHandle& h, size_t n) {
729
+ void HyperClockTable::TEST_RefN(HandleImpl& h, size_t n) {
642
730
  // Increment acquire counter
643
731
  uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement,
644
732
  std::memory_order_acquire);
@@ -648,7 +736,7 @@ void ClockHandleTable::TEST_RefN(ClockHandle& h, size_t n) {
648
736
  (void)old_meta;
649
737
  }
650
738
 
651
- void ClockHandleTable::TEST_ReleaseN(ClockHandle* h, size_t n) {
739
+ void HyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) {
652
740
  if (n > 0) {
653
741
  // Split into n - 1 and 1 steps.
654
742
  uint64_t old_meta = h->meta.fetch_add(
@@ -661,11 +749,11 @@ void ClockHandleTable::TEST_ReleaseN(ClockHandle* h, size_t n) {
661
749
  }
662
750
  }
663
751
 
664
- void ClockHandleTable::Erase(const UniqueId64x2& hashed_key) {
752
+ void HyperClockTable::Erase(const UniqueId64x2& hashed_key) {
665
753
  size_t probe = 0;
666
754
  (void)FindSlot(
667
755
  hashed_key,
668
- [&](ClockHandle* h) {
756
+ [&](HandleImpl* h) {
669
757
  // Could be multiple entries in rare cases. Erase them all.
670
758
  // Optimistically increment acquire counter
671
759
  uint64_t old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement,
@@ -699,20 +787,11 @@ void ClockHandleTable::Erase(const UniqueId64x2& hashed_key) {
699
787
  std::memory_order_acq_rel)) {
700
788
  // Took ownership
701
789
  assert(hashed_key == h->hashed_key);
702
- // TODO? Delay freeing?
703
- h->FreeData();
704
- usage_.fetch_sub(h->total_charge, std::memory_order_relaxed);
705
- assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2);
706
- #ifndef NDEBUG
707
- // Mark slot as empty, with assertion
708
- old_meta = h->meta.exchange(0, std::memory_order_release);
709
- assert(old_meta >> ClockHandle::kStateShift ==
710
- ClockHandle::kStateConstruction);
711
- #else
712
- // Mark slot as empty
713
- h->meta.store(0, std::memory_order_release);
714
- #endif
715
- occupancy_.fetch_sub(1U, std::memory_order_release);
790
+ size_t total_charge = h->GetTotalCharge();
791
+ FreeDataMarkEmpty(*h);
792
+ ReclaimEntryUsage(total_charge);
793
+ // We already have a copy of hashed_key in this case, so OK to
794
+ // delay Rollback until after releasing the entry
716
795
  Rollback(hashed_key, h);
717
796
  break;
718
797
  }
@@ -735,14 +814,14 @@ void ClockHandleTable::Erase(const UniqueId64x2& hashed_key) {
735
814
  }
736
815
  return false;
737
816
  },
738
- [&](ClockHandle* h) {
817
+ [&](HandleImpl* h) {
739
818
  return h->displacements.load(std::memory_order_relaxed) == 0;
740
819
  },
741
- [&](ClockHandle* /*h*/) {}, probe);
820
+ [&](HandleImpl* /*h*/) {}, probe);
742
821
  }
743
822
 
744
- void ClockHandleTable::ConstApplyToEntriesRange(
745
- std::function<void(const ClockHandle&)> func, size_t index_begin,
823
+ void HyperClockTable::ConstApplyToEntriesRange(
824
+ std::function<void(const HandleImpl&)> func, size_t index_begin,
746
825
  size_t index_end, bool apply_if_will_be_deleted) const {
747
826
  uint64_t check_state_mask = ClockHandle::kStateShareableBit;
748
827
  if (!apply_if_will_be_deleted) {
@@ -750,7 +829,7 @@ void ClockHandleTable::ConstApplyToEntriesRange(
750
829
  }
751
830
 
752
831
  for (size_t i = index_begin; i < index_end; i++) {
753
- ClockHandle& h = array_[i];
832
+ HandleImpl& h = array_[i];
754
833
 
755
834
  // Note: to avoid using compare_exchange, we have to be extra careful.
756
835
  uint64_t old_meta = h.meta.load(std::memory_order_relaxed);
@@ -782,9 +861,9 @@ void ClockHandleTable::ConstApplyToEntriesRange(
782
861
  }
783
862
  }
784
863
 
785
- void ClockHandleTable::EraseUnRefEntries() {
864
+ void HyperClockTable::EraseUnRefEntries() {
786
865
  for (size_t i = 0; i <= this->length_bits_mask_; i++) {
787
- ClockHandle& h = array_[i];
866
+ HandleImpl& h = array_[i];
788
867
 
789
868
  uint64_t old_meta = h.meta.load(std::memory_order_relaxed);
790
869
  if (old_meta & (uint64_t{ClockHandle::kStateShareableBit}
@@ -795,28 +874,18 @@ void ClockHandleTable::EraseUnRefEntries() {
795
874
  << ClockHandle::kStateShift,
796
875
  std::memory_order_acquire)) {
797
876
  // Took ownership
798
- UniqueId64x2 hashed_key = h.hashed_key;
799
- h.FreeData();
800
- usage_.fetch_sub(h.total_charge, std::memory_order_relaxed);
801
- #ifndef NDEBUG
802
- // Mark slot as empty, with assertion
803
- old_meta = h.meta.exchange(0, std::memory_order_release);
804
- assert(old_meta >> ClockHandle::kStateShift ==
805
- ClockHandle::kStateConstruction);
806
- #else
807
- // Mark slot as empty
808
- h.meta.store(0, std::memory_order_release);
809
- #endif
810
- occupancy_.fetch_sub(1U, std::memory_order_release);
811
- Rollback(hashed_key, &h);
877
+ size_t total_charge = h.GetTotalCharge();
878
+ Rollback(h.hashed_key, &h);
879
+ FreeDataMarkEmpty(h);
880
+ ReclaimEntryUsage(total_charge);
812
881
  }
813
882
  }
814
883
  }
815
884
 
816
- ClockHandle* ClockHandleTable::FindSlot(
817
- const UniqueId64x2& hashed_key, std::function<bool(ClockHandle*)> match_fn,
818
- std::function<bool(ClockHandle*)> abort_fn,
819
- std::function<void(ClockHandle*)> update_fn, size_t& probe) {
885
+ inline HyperClockTable::HandleImpl* HyperClockTable::FindSlot(
886
+ const UniqueId64x2& hashed_key, std::function<bool(HandleImpl*)> match_fn,
887
+ std::function<bool(HandleImpl*)> abort_fn,
888
+ std::function<void(HandleImpl*)> update_fn, size_t& probe) {
820
889
  // NOTE: upper 32 bits of hashed_key[0] is used for sharding
821
890
  //
822
891
  // We use double-hashing probing. Every probe in the sequence is a
@@ -832,7 +901,7 @@ ClockHandle* ClockHandleTable::FindSlot(
832
901
  size_t increment = static_cast<size_t>(hashed_key[0]) | 1U;
833
902
  size_t current = ModTableSize(base + probe * increment);
834
903
  while (probe <= length_bits_mask_) {
835
- ClockHandle* h = &array_[current];
904
+ HandleImpl* h = &array_[current];
836
905
  if (match_fn(h)) {
837
906
  probe++;
838
907
  return h;
@@ -848,18 +917,29 @@ ClockHandle* ClockHandleTable::FindSlot(
848
917
  return nullptr;
849
918
  }
850
919
 
851
- void ClockHandleTable::Rollback(const UniqueId64x2& hashed_key,
852
- const ClockHandle* h) {
920
+ inline void HyperClockTable::Rollback(const UniqueId64x2& hashed_key,
921
+ const HandleImpl* h) {
853
922
  size_t current = ModTableSize(hashed_key[1]);
854
923
  size_t increment = static_cast<size_t>(hashed_key[0]) | 1U;
855
- for (size_t i = 0; &array_[current] != h; i++) {
924
+ while (&array_[current] != h) {
856
925
  array_[current].displacements.fetch_sub(1, std::memory_order_relaxed);
857
926
  current = ModTableSize(current + increment);
858
927
  }
859
928
  }
860
929
 
861
- void ClockHandleTable::Evict(size_t requested_charge, size_t* freed_charge,
862
- size_t* freed_count) {
930
+ inline void HyperClockTable::ReclaimEntryUsage(size_t total_charge) {
931
+ auto old_occupancy = occupancy_.fetch_sub(1U, std::memory_order_release);
932
+ (void)old_occupancy;
933
+ // No underflow
934
+ assert(old_occupancy > 0);
935
+ auto old_usage = usage_.fetch_sub(total_charge, std::memory_order_relaxed);
936
+ (void)old_usage;
937
+ // No underflow
938
+ assert(old_usage >= total_charge);
939
+ }
940
+
941
+ inline void HyperClockTable::Evict(size_t requested_charge,
942
+ size_t* freed_charge, size_t* freed_count) {
863
943
  // precondition
864
944
  assert(requested_charge > 0);
865
945
 
@@ -880,64 +960,13 @@ void ClockHandleTable::Evict(size_t requested_charge, size_t* freed_charge,
880
960
 
881
961
  for (;;) {
882
962
  for (size_t i = 0; i < step_size; i++) {
883
- ClockHandle& h = array_[ModTableSize(Lower32of64(old_clock_pointer + i))];
884
- uint64_t meta = h.meta.load(std::memory_order_relaxed);
885
-
886
- uint64_t acquire_count = (meta >> ClockHandle::kAcquireCounterShift) &
887
- ClockHandle::kCounterMask;
888
- uint64_t release_count = (meta >> ClockHandle::kReleaseCounterShift) &
889
- ClockHandle::kCounterMask;
890
- if (acquire_count != release_count) {
891
- // Only clock update entries with no outstanding refs
892
- continue;
893
- }
894
- if (!((meta >> ClockHandle::kStateShift) &
895
- ClockHandle::kStateShareableBit)) {
896
- // Only clock update Shareable entries
897
- continue;
898
- }
899
- if ((meta >> ClockHandle::kStateShift == ClockHandle::kStateVisible) &&
900
- acquire_count > 0) {
901
- // Decrement clock
902
- uint64_t new_count = std::min(acquire_count - 1,
903
- uint64_t{ClockHandle::kMaxCountdown} - 1);
904
- // Compare-exchange in the decremented clock info, but
905
- // not aggressively
906
- uint64_t new_meta =
907
- (uint64_t{ClockHandle::kStateVisible} << ClockHandle::kStateShift) |
908
- (new_count << ClockHandle::kReleaseCounterShift) |
909
- (new_count << ClockHandle::kAcquireCounterShift);
910
- h.meta.compare_exchange_strong(meta, new_meta,
911
- std::memory_order_relaxed);
912
- continue;
913
- }
914
- // Otherwise, remove entry (either unreferenced invisible or
915
- // unreferenced and expired visible). Compare-exchange failing probably
916
- // indicates the entry was used, so skip it in that case.
917
- if (h.meta.compare_exchange_strong(
918
- meta,
919
- uint64_t{ClockHandle::kStateConstruction}
920
- << ClockHandle::kStateShift,
921
- std::memory_order_acquire)) {
922
- // Took ownership.
923
- // Save info about h to minimize dependences between atomic updates
924
- // (e.g. fully relaxed Rollback after h released by marking empty)
925
- const UniqueId64x2 h_hashed_key = h.hashed_key;
926
- size_t h_total_charge = h.total_charge;
927
- // TODO? Delay freeing?
928
- h.FreeData();
929
- #ifndef NDEBUG
930
- // Mark slot as empty, with assertion
931
- meta = h.meta.exchange(0, std::memory_order_release);
932
- assert(meta >> ClockHandle::kStateShift ==
933
- ClockHandle::kStateConstruction);
934
- #else
935
- // Mark slot as empty
936
- h.meta.store(0, std::memory_order_release);
937
- #endif
963
+ HandleImpl& h = array_[ModTableSize(Lower32of64(old_clock_pointer + i))];
964
+ bool evicting = ClockUpdate(h);
965
+ if (evicting) {
966
+ Rollback(h.hashed_key, &h);
967
+ *freed_charge += h.GetTotalCharge();
938
968
  *freed_count += 1;
939
- *freed_charge += h_total_charge;
940
- Rollback(h_hashed_key, &h);
969
+ FreeDataMarkEmpty(h);
941
970
  }
942
971
  }
943
972
 
@@ -955,23 +984,26 @@ void ClockHandleTable::Evict(size_t requested_charge, size_t* freed_charge,
955
984
  }
956
985
  }
957
986
 
958
- ClockCacheShard::ClockCacheShard(
959
- size_t capacity, size_t estimated_value_size, bool strict_capacity_limit,
960
- CacheMetadataChargePolicy metadata_charge_policy)
987
+ template <class Table>
988
+ ClockCacheShard<Table>::ClockCacheShard(
989
+ size_t capacity, bool strict_capacity_limit,
990
+ CacheMetadataChargePolicy metadata_charge_policy,
991
+ const typename Table::Opts& opts)
961
992
  : CacheShardBase(metadata_charge_policy),
962
- table_(
963
- CalcHashBits(capacity, estimated_value_size, metadata_charge_policy),
964
- /*initial_charge_metadata*/ metadata_charge_policy ==
965
- kFullChargeCacheMetadata),
993
+ table_(capacity, strict_capacity_limit, metadata_charge_policy, opts),
966
994
  capacity_(capacity),
967
995
  strict_capacity_limit_(strict_capacity_limit) {
968
996
  // Initial charge metadata should not exceed capacity
969
- assert(table_.GetUsage() <= capacity_ || capacity_ < sizeof(ClockHandle));
997
+ assert(table_.GetUsage() <= capacity_ || capacity_ < sizeof(HandleImpl));
970
998
  }
971
999
 
972
- void ClockCacheShard::EraseUnRefEntries() { table_.EraseUnRefEntries(); }
1000
+ template <class Table>
1001
+ void ClockCacheShard<Table>::EraseUnRefEntries() {
1002
+ table_.EraseUnRefEntries();
1003
+ }
973
1004
 
974
- void ClockCacheShard::ApplyToSomeEntries(
1005
+ template <class Table>
1006
+ void ClockCacheShard<Table>::ApplyToSomeEntries(
975
1007
  const std::function<void(const Slice& key, void* value, size_t charge,
976
1008
  DeleterFn deleter)>& callback,
977
1009
  size_t average_entries_per_lock, size_t* state) {
@@ -997,20 +1029,20 @@ void ClockCacheShard::ApplyToSomeEntries(
997
1029
  }
998
1030
 
999
1031
  table_.ConstApplyToEntriesRange(
1000
- [callback](const ClockHandle& h) {
1032
+ [callback](const HandleImpl& h) {
1001
1033
  UniqueId64x2 unhashed;
1002
- callback(ReverseHash(h.hashed_key, &unhashed), h.value, h.total_charge,
1003
- h.deleter);
1034
+ callback(ReverseHash(h.hashed_key, &unhashed), h.value,
1035
+ h.GetTotalCharge(), h.deleter);
1004
1036
  },
1005
1037
  index_begin, index_end, false);
1006
1038
  }
1007
1039
 
1008
- int ClockCacheShard::CalcHashBits(
1040
+ int HyperClockTable::CalcHashBits(
1009
1041
  size_t capacity, size_t estimated_value_size,
1010
1042
  CacheMetadataChargePolicy metadata_charge_policy) {
1011
1043
  double average_slot_charge = estimated_value_size * kLoadFactor;
1012
1044
  if (metadata_charge_policy == kFullChargeCacheMetadata) {
1013
- average_slot_charge += sizeof(ClockHandle);
1045
+ average_slot_charge += sizeof(HandleImpl);
1014
1046
  }
1015
1047
  assert(average_slot_charge > 0.0);
1016
1048
  uint64_t num_slots =
@@ -1020,28 +1052,34 @@ int ClockCacheShard::CalcHashBits(
1020
1052
  if (metadata_charge_policy == kFullChargeCacheMetadata) {
1021
1053
  // For very small estimated value sizes, it's possible to overshoot
1022
1054
  while (hash_bits > 0 &&
1023
- uint64_t{sizeof(ClockHandle)} << hash_bits > capacity) {
1055
+ uint64_t{sizeof(HandleImpl)} << hash_bits > capacity) {
1024
1056
  hash_bits--;
1025
1057
  }
1026
1058
  }
1027
1059
  return hash_bits;
1028
1060
  }
1029
1061
 
1030
- void ClockCacheShard::SetCapacity(size_t capacity) {
1062
+ template <class Table>
1063
+ void ClockCacheShard<Table>::SetCapacity(size_t capacity) {
1031
1064
  capacity_.store(capacity, std::memory_order_relaxed);
1032
1065
  // next Insert will take care of any necessary evictions
1033
1066
  }
1034
1067
 
1035
- void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
1068
+ template <class Table>
1069
+ void ClockCacheShard<Table>::SetStrictCapacityLimit(
1070
+ bool strict_capacity_limit) {
1036
1071
  strict_capacity_limit_.store(strict_capacity_limit,
1037
1072
  std::memory_order_relaxed);
1038
1073
  // next Insert will take care of any necessary evictions
1039
1074
  }
1040
1075
 
1041
- Status ClockCacheShard::Insert(const Slice& key, const UniqueId64x2& hashed_key,
1042
- void* value, size_t charge,
1043
- Cache::DeleterFn deleter, ClockHandle** handle,
1044
- Cache::Priority priority) {
1076
+ template <class Table>
1077
+ Status ClockCacheShard<Table>::Insert(const Slice& key,
1078
+ const UniqueId64x2& hashed_key,
1079
+ void* value, size_t charge,
1080
+ Cache::DeleterFn deleter,
1081
+ HandleImpl** handle,
1082
+ Cache::Priority priority) {
1045
1083
  if (UNLIKELY(key.size() != kCacheKeySize)) {
1046
1084
  return Status::NotSupported("ClockCache only supports key size " +
1047
1085
  std::to_string(kCacheKeySize) + "B");
@@ -1051,22 +1089,23 @@ Status ClockCacheShard::Insert(const Slice& key, const UniqueId64x2& hashed_key,
1051
1089
  proto.value = value;
1052
1090
  proto.deleter = deleter;
1053
1091
  proto.total_charge = charge;
1054
- Status s =
1055
- table_.Insert(proto, reinterpret_cast<ClockHandle**>(handle), priority,
1056
- capacity_.load(std::memory_order_relaxed),
1057
- strict_capacity_limit_.load(std::memory_order_relaxed));
1092
+ Status s = table_.Insert(
1093
+ proto, handle, priority, capacity_.load(std::memory_order_relaxed),
1094
+ strict_capacity_limit_.load(std::memory_order_relaxed));
1058
1095
  return s;
1059
1096
  }
1060
1097
 
1061
- ClockHandle* ClockCacheShard::Lookup(const Slice& key,
1062
- const UniqueId64x2& hashed_key) {
1098
+ template <class Table>
1099
+ typename ClockCacheShard<Table>::HandleImpl* ClockCacheShard<Table>::Lookup(
1100
+ const Slice& key, const UniqueId64x2& hashed_key) {
1063
1101
  if (UNLIKELY(key.size() != kCacheKeySize)) {
1064
1102
  return nullptr;
1065
1103
  }
1066
1104
  return table_.Lookup(hashed_key);
1067
1105
  }
1068
1106
 
1069
- bool ClockCacheShard::Ref(ClockHandle* h) {
1107
+ template <class Table>
1108
+ bool ClockCacheShard<Table>::Ref(HandleImpl* h) {
1070
1109
  if (h == nullptr) {
1071
1110
  return false;
1072
1111
  }
@@ -1074,36 +1113,47 @@ bool ClockCacheShard::Ref(ClockHandle* h) {
1074
1113
  return true;
1075
1114
  }
1076
1115
 
1077
- bool ClockCacheShard::Release(ClockHandle* handle, bool useful,
1078
- bool erase_if_last_ref) {
1116
+ template <class Table>
1117
+ bool ClockCacheShard<Table>::Release(HandleImpl* handle, bool useful,
1118
+ bool erase_if_last_ref) {
1079
1119
  if (handle == nullptr) {
1080
1120
  return false;
1081
1121
  }
1082
1122
  return table_.Release(handle, useful, erase_if_last_ref);
1083
1123
  }
1084
1124
 
1085
- void ClockCacheShard::TEST_RefN(ClockHandle* h, size_t n) {
1125
+ template <class Table>
1126
+ void ClockCacheShard<Table>::TEST_RefN(HandleImpl* h, size_t n) {
1086
1127
  table_.TEST_RefN(*h, n);
1087
1128
  }
1088
1129
 
1089
- void ClockCacheShard::TEST_ReleaseN(ClockHandle* h, size_t n) {
1130
+ template <class Table>
1131
+ void ClockCacheShard<Table>::TEST_ReleaseN(HandleImpl* h, size_t n) {
1090
1132
  table_.TEST_ReleaseN(h, n);
1091
1133
  }
1092
1134
 
1093
- bool ClockCacheShard::Release(ClockHandle* handle, bool erase_if_last_ref) {
1135
+ template <class Table>
1136
+ bool ClockCacheShard<Table>::Release(HandleImpl* handle,
1137
+ bool erase_if_last_ref) {
1094
1138
  return Release(handle, /*useful=*/true, erase_if_last_ref);
1095
1139
  }
1096
1140
 
1097
- void ClockCacheShard::Erase(const Slice& key, const UniqueId64x2& hashed_key) {
1141
+ template <class Table>
1142
+ void ClockCacheShard<Table>::Erase(const Slice& key,
1143
+ const UniqueId64x2& hashed_key) {
1098
1144
  if (UNLIKELY(key.size() != kCacheKeySize)) {
1099
1145
  return;
1100
1146
  }
1101
1147
  table_.Erase(hashed_key);
1102
1148
  }
1103
1149
 
1104
- size_t ClockCacheShard::GetUsage() const { return table_.GetUsage(); }
1150
+ template <class Table>
1151
+ size_t ClockCacheShard<Table>::GetUsage() const {
1152
+ return table_.GetUsage();
1153
+ }
1105
1154
 
1106
- size_t ClockCacheShard::GetPinnedUsage() const {
1155
+ template <class Table>
1156
+ size_t ClockCacheShard<Table>::GetPinnedUsage() const {
1107
1157
  // Computes the pinned usage by scanning the whole hash table. This
1108
1158
  // is slow, but avoids keeping an exact counter on the clock usage,
1109
1159
  // i.e., the number of not externally referenced elements.
@@ -1114,15 +1164,15 @@ size_t ClockCacheShard::GetPinnedUsage() const {
1114
1164
  const bool charge_metadata =
1115
1165
  metadata_charge_policy_ == kFullChargeCacheMetadata;
1116
1166
  table_.ConstApplyToEntriesRange(
1117
- [&table_pinned_usage, charge_metadata](const ClockHandle& h) {
1167
+ [&table_pinned_usage, charge_metadata](const HandleImpl& h) {
1118
1168
  uint64_t meta = h.meta.load(std::memory_order_relaxed);
1119
1169
  uint64_t refcount = GetRefcount(meta);
1120
1170
  // Holding one ref for ConstApplyToEntriesRange
1121
1171
  assert(refcount > 0);
1122
1172
  if (refcount > 1) {
1123
- table_pinned_usage += h.total_charge;
1173
+ table_pinned_usage += h.GetTotalCharge();
1124
1174
  if (charge_metadata) {
1125
- table_pinned_usage += sizeof(ClockHandle);
1175
+ table_pinned_usage += sizeof(HandleImpl);
1126
1176
  }
1127
1177
  }
1128
1178
  },
@@ -1131,14 +1181,19 @@ size_t ClockCacheShard::GetPinnedUsage() const {
1131
1181
  return table_pinned_usage + table_.GetDetachedUsage();
1132
1182
  }
1133
1183
 
1134
- size_t ClockCacheShard::GetOccupancyCount() const {
1184
+ template <class Table>
1185
+ size_t ClockCacheShard<Table>::GetOccupancyCount() const {
1135
1186
  return table_.GetOccupancy();
1136
1187
  }
1137
1188
 
1138
- size_t ClockCacheShard::GetTableAddressCount() const {
1189
+ template <class Table>
1190
+ size_t ClockCacheShard<Table>::GetTableAddressCount() const {
1139
1191
  return table_.GetTableSize();
1140
1192
  }
1141
1193
 
1194
+ // Explicit instantiation
1195
+ template class ClockCacheShard<HyperClockTable>;
1196
+
1142
1197
  HyperClockCache::HyperClockCache(
1143
1198
  size_t capacity, size_t estimated_value_size, int num_shard_bits,
1144
1199
  bool strict_capacity_limit,
@@ -1151,26 +1206,28 @@ HyperClockCache::HyperClockCache(
1151
1206
  // TODO: should not need to go through two levels of pointer indirection to
1152
1207
  // get to table entries
1153
1208
  size_t per_shard = GetPerShardCapacity();
1154
- InitShards([=](ClockCacheShard* cs) {
1155
- new (cs) ClockCacheShard(per_shard, estimated_value_size,
1156
- strict_capacity_limit, metadata_charge_policy);
1209
+ InitShards([=](Shard* cs) {
1210
+ HyperClockTable::Opts opts;
1211
+ opts.estimated_value_size = estimated_value_size;
1212
+ new (cs)
1213
+ Shard(per_shard, strict_capacity_limit, metadata_charge_policy, opts);
1157
1214
  });
1158
1215
  }
1159
1216
 
1160
1217
  void* HyperClockCache::Value(Handle* handle) {
1161
- return reinterpret_cast<const ClockHandle*>(handle)->value;
1218
+ return reinterpret_cast<const HandleImpl*>(handle)->value;
1162
1219
  }
1163
1220
 
1164
1221
  size_t HyperClockCache::GetCharge(Handle* handle) const {
1165
- return reinterpret_cast<const ClockHandle*>(handle)->total_charge;
1222
+ return reinterpret_cast<const HandleImpl*>(handle)->GetTotalCharge();
1166
1223
  }
1167
1224
 
1168
1225
  Cache::DeleterFn HyperClockCache::GetDeleter(Handle* handle) const {
1169
- auto h = reinterpret_cast<const ClockHandle*>(handle);
1226
+ auto h = reinterpret_cast<const HandleImpl*>(handle);
1170
1227
  return h->deleter;
1171
1228
  }
1172
1229
 
1173
- } // namespace hyper_clock_cache
1230
+ } // namespace clock_cache
1174
1231
 
1175
1232
  // DEPRECATED (see public API)
1176
1233
  std::shared_ptr<Cache> NewClockCache(
@@ -1193,7 +1250,7 @@ std::shared_ptr<Cache> HyperClockCacheOptions::MakeSharedCache() const {
1193
1250
  constexpr size_t min_shard_size = 32U * 1024U * 1024U;
1194
1251
  my_num_shard_bits = GetDefaultCacheShardBits(capacity, min_shard_size);
1195
1252
  }
1196
- return std::make_shared<hyper_clock_cache::HyperClockCache>(
1253
+ return std::make_shared<clock_cache::HyperClockCache>(
1197
1254
  capacity, estimated_entry_charge, my_num_shard_bits,
1198
1255
  strict_capacity_limit, metadata_charge_policy, memory_allocator);
1199
1256
  }