@nxtedition/rocksdb 7.0.38 → 7.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/binding.cc +62 -33
  2. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +27 -11
  3. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +310 -337
  4. package/deps/rocksdb/rocksdb/cache/clock_cache.h +394 -352
  5. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +1 -1
  6. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -2
  7. package/deps/rocksdb/rocksdb/db/column_family_test.cc +1 -1
  8. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +13 -3
  9. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +273 -134
  10. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +33 -2
  11. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -3
  12. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +2 -1
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +2 -2
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +133 -5
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +130 -1
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -4
  17. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +11 -9
  18. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +209 -12
  19. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +54 -39
  20. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +102 -19
  21. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +30 -11
  22. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  23. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +28 -25
  24. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +0 -14
  25. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +63 -54
  26. package/deps/rocksdb/rocksdb/db/db_test.cc +6 -6
  27. package/deps/rocksdb/rocksdb/db/error_handler.cc +7 -0
  28. package/deps/rocksdb/rocksdb/db/error_handler.h +10 -9
  29. package/deps/rocksdb/rocksdb/db/log_test.cc +13 -6
  30. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +1 -1
  31. package/deps/rocksdb/rocksdb/db/table_cache.cc +21 -0
  32. package/deps/rocksdb/rocksdb/db/table_cache.h +5 -0
  33. package/deps/rocksdb/rocksdb/db/version_set.cc +3 -2
  34. package/deps/rocksdb/rocksdb/db/version_set.h +6 -4
  35. package/deps/rocksdb/rocksdb/db/version_set_test.cc +8 -6
  36. package/deps/rocksdb/rocksdb/db/wal_edit.cc +22 -15
  37. package/deps/rocksdb/rocksdb/db/wal_edit.h +10 -0
  38. package/deps/rocksdb/rocksdb/db/wal_edit_test.cc +4 -5
  39. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +0 -36
  40. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +1 -12
  41. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +23 -29
  42. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +0 -5
  43. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +7 -0
  44. package/deps/rocksdb/rocksdb/env/env_test.cc +0 -5
  45. package/deps/rocksdb/rocksdb/env/io_posix.cc +1 -7
  46. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +100 -78
  47. package/deps/rocksdb/rocksdb/options/options_test.cc +16 -0
  48. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +51 -0
  49. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +3 -0
  50. package/deps/rocksdb/rocksdb/table/table_reader.h +14 -0
  51. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -0
  52. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +8 -38
  53. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +27 -21
  54. package/deps/rocksdb/rocksdb/util/rate_limiter.h +12 -10
  55. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +11 -8
  56. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +2 -1
  57. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +59 -0
  58. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +12 -0
  59. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +31 -0
  60. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -3
  61. package/index.js +2 -2
  62. package/iterator.js +1 -1
  63. package/max_rev_operator.h +114 -0
  64. package/package.json +1 -1
  65. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  66. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -23,102 +23,137 @@
23
23
  #include "rocksdb/cache.h"
24
24
  #include "rocksdb/secondary_cache.h"
25
25
  #include "util/autovector.h"
26
- #include "util/distributed_mutex.h"
27
26
 
28
27
  namespace ROCKSDB_NAMESPACE {
29
28
 
30
29
  namespace clock_cache {
31
30
 
32
- // Block cache implementation using a lock-free open-address hash table
33
- // and clock eviction.
31
+ // An experimental alternative to LRUCache, using a lock-free, open-addressed
32
+ // hash table and clock eviction.
34
33
 
35
- ///////////////////////////////////////////////////////////////////////////////
36
- // Part 1: Handles
34
+ // ----------------------------------------------------------------------------
35
+ // 1. INTRODUCTION
37
36
  //
38
- // Every slot in the hash table is a ClockHandle. A handle can be in a few
39
- // different states, that stem from the fact that handles can be externally
40
- // referenced and, thus, can't always be immediately evicted when a delete
41
- // operation is executed or when they are replaced by a new version (via an
42
- // insert of the same key). Concretely, the state of a handle is defined by the
43
- // following two properties:
44
- // (R) Externally referenced: A handle can be referenced externally, or not.
45
- // Importantly, a handle can be evicted if and only if it's not
46
- // referenced. In particular, when an handle becomes referenced, it's
47
- // temporarily taken out of clock until all references to it are released.
48
- // (M) Marked for deletion (or invisible): An handle is marked for deletion
49
- // when an operation attempts to delete it, but the handle is externally
50
- // referenced, so it can't be immediately deleted. When this mark is placed,
51
- // lookups will no longer be able to find it. Consequently, no more external
52
- // references will be taken to the handle. When a handle is marked for
53
- // deletion, we also say it's invisible.
54
- // These properties induce 4 different states, with transitions defined as
55
- // follows:
56
- // - Not M --> M: When a handle is deleted or replaced by a new version, but
57
- // not immediately evicted.
58
- // - M --> not M: This cannot happen. Once a handle is marked for deletion,
59
- // there is no can't go back.
60
- // - R --> not R: When all references to an handle are released.
61
- // - Not R --> R: When an unreferenced handle becomes referenced. This can only
62
- // happen if the handle is visible, since references to an handle can only be
63
- // created when it's visible.
37
+ // In RocksDB, a Cache is a concurrent unordered dictionary that supports
38
+ // external references (a.k.a. user references). A ClockCache is a type of Cache
39
+ // that uses the clock algorithm as its eviction policy. Internally, a
40
+ // ClockCache is an open-addressed hash table that stores all KV pairs in a
41
+ // large array. Every slot in the hash table is a ClockHandle, which holds a KV
42
+ // pair plus some additional metadata that controls the different aspects of the
43
+ // cache: external references, the hashing mechanism, concurrent access and the
44
+ // clock algorithm.
64
45
  //
65
- ///////////////////////////////////////////////////////////////////////////////
66
- // Part 2: Hash table structure
67
46
  //
68
- // Internally, the cache uses an open-addressed hash table to index the handles.
69
- // We use tombstone counters to keep track of displacements. Probes are
70
- // generated with double-hashing (but the code can be easily modified to use
71
- // other probing schemes, like linear hashing). Because of the tombstones and
72
- // the two possible visibility states of a handle, the table slots (we use the
73
- // word "slot" to refer to handles that are not necessary valid key-value
74
- // elements) can be in 4 different states:
75
- // 1. Visible element: The slot contains an element in not M state.
76
- // 2. To-be-deleted element: The slot contains an element in M state.
77
- // 3. Tombstone: The slot doesn't contain an element, but there is some other
47
+ // 2. EXTERNAL REFERENCES
48
+ //
49
+ // An externally referenced handle can't be deleted (either evicted by the clock
50
+ // algorithm, or explicitly deleted) or replaced by a new version (via an insert
51
+ // of the same key) until all external references to it have been released by
52
+ // the users. ClockHandles have two members to support external references:
53
+ // - EXTERNAL_REFS counter: The number of external refs. When EXTERNAL_REFS > 0,
54
+ // the handle is externally referenced. Updates that intend to modify the
55
+ // handle will refrain from doing so. Eventually, when all references are
56
+ // released, we have EXTERNAL_REFS == 0, and updates can operate normally on
57
+ // the handle.
58
+ // - WILL_BE_DELETED flag: An handle is marked for deletion when an operation
59
+ // decides the handle should be deleted. This happens either when the last
60
+ // reference to a handle is released (and the release operation is instructed
61
+ // to delete on last reference) or on when a delete operation is called on
62
+ // the item. This flag is needed because an externally referenced handle
63
+ // can't be immediately deleted. In these cases, the flag will be later read
64
+ // and acted upon by the eviction algorithm. Importantly, WILL_BE_DELETED is
65
+ // used not only to defer deletions, but also as a barrier for external
66
+ // references: once WILL_BE_DELETED is set, lookups (which are the means to
67
+ // acquire new external references) will ignore the handle. For this reason,
68
+ // when WILL_BE_DELETED is set, we say the handle is invisible (and
69
+ // otherwise, that it's visible).
70
+ //
71
+ //
72
+ // 3. HASHING AND COLLISION RESOLUTION
73
+ //
74
+ // ClockCache uses an open-addressed hash table to store the handles.
75
+ // We use a variant of tombstones to manage collisions: every slot keeps a
76
+ // count of how many KV pairs that are currently in the cache have probed the
77
+ // slot in an attempt to insert. Probes are generated with double-hashing
78
+ // (although the code can be easily modified to use other probing schemes, like
79
+ // linear probing).
80
+ //
81
+ // A slot in the hash table can be in a few different states:
82
+ // - Element: The slot contains an element. This is indicated with the
83
+ // IS_ELEMENT flag. Element can be sub-classified depending on the
84
+ // value of WILL_BE_DELETED:
85
+ // * Visible element.
86
+ // * Invisible element.
87
+ // - Tombstone: The slot doesn't contain an element, but there is some other
78
88
  // element that probed this slot during its insertion.
79
- // 4. Empty: The slot is unused.
80
- // When a ghost is removed from the table, it can either transition to being a
81
- // tombstone or an empty slot, depending on the number of displacements of the
82
- // slot. In any case, the slot becomes available. When a handle is inserted
83
- // into that slot, it becomes a visible element again.
89
+ // - Empty: The slot is unused---it's neither an element nor a tombstone.
84
90
  //
85
- ///////////////////////////////////////////////////////////////////////////////
86
- // Part 3: The clock algorithm
91
+ // A slot cycles through the following sequence of states:
92
+ // empty or tombstone --> visible element --> invisible element -->
93
+ // empty or tombstone. Initially a slot is available---it's either
94
+ // empty or a tombstone. As soon as a KV pair is written into the slot, it
95
+ // becomes a visible element. At some point, the handle will be deleted
96
+ // by an explicit delete operation, the eviction algorithm, or an overwriting
97
+ // insert. In either case, the handle is marked for deletion. When the an
98
+ // attempt to delete the element finally succeeds, the slot is freed up
99
+ // and becomes available again.
87
100
  //
88
- // We maintain a circular buffer with the handles available for eviction,
89
- // which the clock algorithm traverses (using a "clock pointer") to pick the
90
- // next victim. We use the hash table array as the circular buffer, and mark
91
- // the handles that are evictable. For this we use different clock flags, namely
92
- // NONE, LOW, MEDIUM, HIGH, that represent priorities: LOW, MEDIUM and HIGH
93
- // represent how close an element is from being evictable, LOW being immediately
94
- // evictable. NONE means the slot is not evictable. This is due to one of the
95
- // following reasons:
96
- // (i) the slot doesn't contain an element, or
97
- // (ii) the slot contains an element that is in R state, or
98
- // (iii) the slot contains an element that was in R state but it's
99
- // not any more, and the clock pointer has not swept through the
100
- // slot since the element stopped being referenced.
101
101
  //
102
- // The priority NONE is really only important for case (iii), as in the other
103
- // two cases there are other metadata fields that already capture the state.
104
- // When an element stops being referenced (and is not deleted), the clock
105
- // algorithm must acknowledge this, and assign a non-NONE priority to make
106
- // the element evictable again.
102
+ // 4. CONCURRENCY
107
103
  //
108
- ///////////////////////////////////////////////////////////////////////////////
109
- // Part 4: Synchronization
104
+ // ClockCache is lock-free. At a high level, we synchronize the operations
105
+ // using a read-prioritized, non-blocking variant of RW locks on every slot of
106
+ // the hash table. To do this we generalize the concept of reference:
107
+ // - Internal reference: Taken by a thread that is attempting to read a slot
108
+ // or do a very precise type of update.
109
+ // - Exclusive reference: Taken by a thread that is attempting to write a
110
+ // a slot extensively.
110
111
  //
111
- // We provide the following synchronization guarantees:
112
- // - Lookup is lock-free.
113
- // - Release is lock-free, unless (i) no references to the element are left,
114
- // and (ii) it was marked for deletion or the user wishes to delete if
115
- // releasing the last reference.
116
- // - Insert and Erase still use a per-shard lock.
112
+ // We defer the precise definitions to the comments in the code below.
113
+ // A crucial feature of our references is that attempting to take one never
114
+ // blocks the thread. Another important feature is that readers are
115
+ // prioritized, as they use extremely fast synchronization primitives---they
116
+ // use atomic arithmetic/bit operations, but no compare-and-swaps (which are
117
+ // much slower).
117
118
  //
118
- // Our hash table is lock-free, in the sense that system-wide progress is
119
- // guaranteed, i.e., some thread is always able to make progress.
119
+ // Internal references are used by threads to read slots during a probing
120
+ // sequence, making them the most common references (probing is performed
121
+ // in almost every operation, not just lookups). During a lookup, once
122
+ // the target element is found, and just before the handle is handed over
123
+ // to the user, an internal reference is converted into an external reference.
124
+ // During an update operation, once the target slot is found, an internal
125
+ // reference is converted into an exclusive reference. Interestingly, we
126
+ // can't atomically upgrade from internal to exclusive, or we may run into a
127
+ // deadlock. Releasing the internal reference and then taking an exclusive
128
+ // reference avoids the deadlock, but then the handle may change inbetween.
129
+ // One of the key observations we use in our implementation is that we can
130
+ // make up for this lack of atomicity using IS_ELEMENT and WILL_BE_DELETED.
120
131
  //
121
- ///////////////////////////////////////////////////////////////////////////////
132
+ // Distinguishing internal from external references is useful for two reasons:
133
+ // - Internal references are short lived, but external references are typically
134
+ // not. This is helpful when acquiring an exclusive ref: if there are any
135
+ // external references to the item, it's probably not worth waiting until
136
+ // they go away.
137
+ // - We can precisely determine when there are no more external references to a
138
+ // handle, and proceed to mark it for deletion. This is useful when users
139
+ // release external references.
140
+ //
141
+ //
142
+ // 5. CLOCK ALGORITHM
143
+ //
144
+ // The clock algorithm circularly sweeps through the hash table to find the next
145
+ // victim. Recall that handles that are referenced are not evictable; the clock
146
+ // algorithm never picks those. We use different clock priorities: NONE, LOW,
147
+ // MEDIUM and HIGH. Priorities LOW, MEDIUM and HIGH represent how close an
148
+ // element is from being evicted, LOW being the closest to evicted. NONE means
149
+ // the slot is not evictable. NONE priority is used in one of the following
150
+ // cases:
151
+ // (a) the slot doesn't contain an element, or
152
+ // (b) the slot contains an externally referenced element, or
153
+ // (c) the slot contains an element that used to be externally referenced,
154
+ // and the clock pointer has not swept through the slot since the element
155
+ // stopped being externally referenced.
156
+ // ----------------------------------------------------------------------------
122
157
 
123
158
  // The load factor p is a real number in (0, 1) such that at all
124
159
  // times at most a fraction p of all slots, without counting tombstones,
@@ -138,15 +173,18 @@ constexpr double kLoadFactor = 0.35;
138
173
 
139
174
  // The user can exceed kLoadFactor if the sizes of the inserted values don't
140
175
  // match estimated_value_size, or if strict_capacity_limit == false. To
141
- // avoid performance to plunge, we set a strict upper bound on the load factor.
176
+ // avoid a performance drop, we set a strict upper bound on the load factor.
142
177
  constexpr double kStrictLoadFactor = 0.7;
143
178
 
179
+ // Maximum number of spins when trying to acquire a ref.
180
+ // TODO(Guido) This value was set arbitrarily. Is it appropriate?
181
+ // What's the best way to bound the spinning?
182
+ constexpr uint32_t kSpinsPerTry = 100000;
183
+
144
184
  // Arbitrary seeds.
145
185
  constexpr uint32_t kProbingSeed1 = 0xbc9f1d34;
146
186
  constexpr uint32_t kProbingSeed2 = 0x7a2bb9d5;
147
187
 
148
- // An experimental (under development!) alternative to LRUCache.
149
-
150
188
  struct ClockHandle {
151
189
  void* value;
152
190
  Cache::DeleterFn deleter;
@@ -154,49 +192,6 @@ struct ClockHandle {
154
192
  size_t total_charge;
155
193
  std::array<char, kCacheKeySize> key_data;
156
194
 
157
- static constexpr uint8_t kExternalRefsOffset = 0;
158
- static constexpr uint8_t kSharedRefsOffset = 15;
159
- static constexpr uint8_t kExclusiveRefOffset = 30;
160
- static constexpr uint8_t kWillBeDeletedOffset = 31;
161
-
162
- enum Refs : uint32_t {
163
- // Number of external references to the slot.
164
- EXTERNAL_REFS = ((uint32_t{1} << 15) - 1)
165
- << kExternalRefsOffset, // Bits 0, ..., 14
166
- // Number of internal references plus external references to the slot.
167
- SHARED_REFS = ((uint32_t{1} << 15) - 1)
168
- << kSharedRefsOffset, // Bits 15, ..., 29
169
- // Whether a thread has an exclusive reference to the slot.
170
- EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset, // Bit 30
171
- // Whether the handle will be deleted soon. When this bit is set, new
172
- // internal
173
- // or external references to this handle stop being accepted.
174
- // There is an exception: external references can be created from
175
- // existing external references, or converting from existing internal
176
- // references.
177
- WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset // Bit 31
178
-
179
- // Shared references (i.e., external and internal references) and exclusive
180
- // references are our custom implementation of RW locks---external and
181
- // internal references are read locks, and exclusive references are write
182
- // locks. We prioritize readers, which never block; in fact, they don't even
183
- // use compare-and-swap operations. Using our own implementation of RW locks
184
- // allows us to save many atomic operations by packing data more carefully.
185
- // In particular:
186
- // - Combining EXTERNAL_REFS and SHARED_REFS allows us to convert an
187
- // internal
188
- // reference into an external reference in a single atomic arithmetic
189
- // operation.
190
- // - Combining SHARED_REFS and WILL_BE_DELETED allows us to attempt to take
191
- // a shared reference and check whether the entry is marked for deletion
192
- // in a single atomic arithmetic operation.
193
- };
194
-
195
- static constexpr uint32_t kOneInternalRef = 0x8000;
196
- static constexpr uint32_t kOneExternalRef = 0x8001;
197
-
198
- std::atomic<uint32_t> refs;
199
-
200
195
  static constexpr uint8_t kIsElementOffset = 1;
201
196
  static constexpr uint8_t kClockPriorityOffset = 2;
202
197
  static constexpr uint8_t kIsHitOffset = 4;
@@ -209,7 +204,7 @@ struct ClockHandle {
209
204
  CLOCK_PRIORITY = 3 << kClockPriorityOffset,
210
205
  // Whether the handle has been looked up after its insertion.
211
206
  HAS_HIT = 1 << kIsHitOffset,
212
- // The value of Cache::Priority for the handle.
207
+ // The value of Cache::Priority of the handle.
213
208
  CACHE_PRIORITY = 1 << kCachePriorityOffset,
214
209
  };
215
210
 
@@ -226,30 +221,67 @@ struct ClockHandle {
226
221
  // up in this slot or a higher one.
227
222
  std::atomic<uint32_t> displacements;
228
223
 
229
- // Synchronization rules:
230
- // - Use a shared reference when we want the handle's identity
231
- // members (key_data, hash, value and IS_ELEMENT flag) to
232
- // remain untouched, but not modify them. The only updates
233
- // that a shared reference allows are:
234
- // * set CLOCK_PRIORITY to NONE;
235
- // * set the HAS_HIT bit.
236
- // Notice that these two types of updates are idempotent, so
237
- // they don't require synchronization across shared references.
238
- // - Use an exclusive reference when we want identity members
239
- // to remain untouched, as well as modify any identity member
240
- // or flag.
241
- // - displacements can be modified without holding a reference.
242
- // - refs is only modified through appropriate functions to
243
- // take or release references.
224
+ static constexpr uint8_t kExternalRefsOffset = 0;
225
+ static constexpr uint8_t kSharedRefsOffset = 15;
226
+ static constexpr uint8_t kExclusiveRefOffset = 30;
227
+ static constexpr uint8_t kWillBeDeletedOffset = 31;
228
+
229
+ enum Refs : uint32_t {
230
+ // Synchronization model:
231
+ // - An external reference guarantees that hash, value, key_data
232
+ // and the IS_ELEMENT flag are not modified. Doesn't allow
233
+ // any writes.
234
+ // - An internal reference has the same guarantees as an
235
+ // external reference, and additionally allows the following
236
+ // idempotent updates on the handle:
237
+ // * set CLOCK_PRIORITY to NONE;
238
+ // * set the HAS_HIT bit;
239
+ // * set the WILL_BE_DELETED bit.
240
+ // - A shared reference is either an external reference or an
241
+ // internal reference.
242
+ // - An exclusive reference guarantees that no other thread has a shared
243
+ // or exclusive reference to the handle, and allows writes
244
+ // on the handle.
245
+
246
+ // Number of external references to the slot.
247
+ EXTERNAL_REFS = ((uint32_t{1} << 15) - 1)
248
+ << kExternalRefsOffset, // Bits 0, ..., 14
249
+ // Number of internal references plus external references to the slot.
250
+ SHARED_REFS = ((uint32_t{1} << 15) - 1)
251
+ << kSharedRefsOffset, // Bits 15, ..., 29
252
+ // Whether a thread has an exclusive reference to the slot.
253
+ EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset, // Bit 30
254
+ // Whether the handle will be deleted soon. When this bit is set, new
255
+ // internal
256
+ // or external references to this handle stop being accepted.
257
+ // There is an exception: external references can be created from
258
+ // existing external references, or converting from existing internal
259
+ // references.
260
+ WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset // Bit 31
261
+
262
+ // Having these 4 fields in a single variable allows us to support the
263
+ // following operations efficiently:
264
+ // - Convert an internal reference into an external reference in a single
265
+ // atomic arithmetic operation.
266
+ // - Attempt to take a shared reference using a single atomic arithmetic
267
+ // operation. This is because we can increment the internal ref count
268
+ // as well as checking whether the entry is marked for deletion using a
269
+ // single atomic arithmetic operation (and one non-atomic comparison).
270
+ };
271
+
272
+ static constexpr uint32_t kOneInternalRef = 0x8000;
273
+ static constexpr uint32_t kOneExternalRef = 0x8001;
274
+
275
+ std::atomic<uint32_t> refs;
244
276
 
245
277
  ClockHandle()
246
278
  : value(nullptr),
247
279
  deleter(nullptr),
248
280
  hash(0),
249
281
  total_charge(0),
250
- refs(0),
251
282
  flags(0),
252
- displacements(0) {
283
+ displacements(0),
284
+ refs(0) {
253
285
  SetWillBeDeleted(false);
254
286
  SetIsElement(false);
255
287
  SetClockPriority(ClockPriority::NONE);
@@ -257,26 +289,66 @@ struct ClockHandle {
257
289
  key_data.fill(0);
258
290
  }
259
291
 
292
+ // The copy ctor and assignment operator are only used to copy a handle
293
+ // for immediate deletion. (We need to copy because the slot may become
294
+ // re-used before the deletion is completed.) We only copy the necessary
295
+ // members to carry out the deletion. In particular, we don't need
296
+ // the atomic members.
260
297
  ClockHandle(const ClockHandle& other) { *this = other; }
261
298
 
262
299
  void operator=(const ClockHandle& other) {
263
300
  value = other.value;
264
301
  deleter = other.deleter;
265
- hash = other.hash;
266
- total_charge = other.total_charge;
267
- refs.store(other.refs);
268
302
  key_data = other.key_data;
269
- flags.store(other.flags);
270
- SetWillBeDeleted(other.WillBeDeleted());
271
- SetIsElement(other.IsElement());
272
- SetClockPriority(other.GetClockPriority());
273
- SetCachePriority(other.GetCachePriority());
274
- displacements.store(other.displacements);
303
+ total_charge = other.total_charge;
275
304
  }
276
305
 
277
306
  Slice key() const { return Slice(key_data.data(), kCacheKeySize); }
278
307
 
279
- bool HasExternalRefs() const { return (refs & EXTERNAL_REFS) > 0; }
308
+ void FreeData() {
309
+ if (deleter) {
310
+ (*deleter)(key(), value);
311
+ }
312
+ }
313
+
314
+ // Calculate the memory usage by metadata.
315
+ inline size_t CalcMetaCharge(
316
+ CacheMetadataChargePolicy metadata_charge_policy) const {
317
+ if (metadata_charge_policy != kFullChargeCacheMetadata) {
318
+ return 0;
319
+ } else {
320
+ // #ifdef ROCKSDB_MALLOC_USABLE_SIZE
321
+ // return malloc_usable_size(
322
+ // const_cast<void*>(static_cast<const void*>(this)));
323
+ // #else
324
+ // TODO(Guido) malloc_usable_size only works when we call it on
325
+ // a pointer allocated with malloc. Because our handles are all
326
+ // allocated in a single shot as an array, the user can't call
327
+ // CalcMetaCharge (or CalcTotalCharge or GetCharge) on a handle
328
+ // pointer returned by the cache. Moreover, malloc_usable_size
329
+ // expects a heap-allocated handle, but sometimes in our code we
330
+ // wish to pass a stack-allocated handle (this is only a performance
331
+ // concern).
332
+ // What is the right way to compute metadata charges with pre-allocated
333
+ // handles?
334
+ return sizeof(ClockHandle);
335
+ // #endif
336
+ }
337
+ }
338
+
339
+ inline void CalcTotalCharge(
340
+ size_t charge, CacheMetadataChargePolicy metadata_charge_policy) {
341
+ total_charge = charge + CalcMetaCharge(metadata_charge_policy);
342
+ }
343
+
344
+ inline size_t GetCharge(
345
+ CacheMetadataChargePolicy metadata_charge_policy) const {
346
+ size_t meta_charge = CalcMetaCharge(metadata_charge_policy);
347
+ assert(total_charge >= meta_charge);
348
+ return total_charge - meta_charge;
349
+ }
350
+
351
+ // flags functions.
280
352
 
281
353
  bool IsElement() const { return flags & IS_ELEMENT; }
282
354
 
@@ -292,10 +364,6 @@ struct ClockHandle {
292
364
 
293
365
  void SetHit() { flags |= HAS_HIT; }
294
366
 
295
- bool IsInClock() const {
296
- return GetClockPriority() != ClockHandle::ClockPriority::NONE;
297
- }
298
-
299
367
  Cache::Priority GetCachePriority() const {
300
368
  return static_cast<Cache::Priority>(flags & CACHE_PRIORITY);
301
369
  }
@@ -308,6 +376,10 @@ struct ClockHandle {
308
376
  }
309
377
  }
310
378
 
379
+ bool IsInClock() const {
380
+ return GetClockPriority() != ClockHandle::ClockPriority::NONE;
381
+ }
382
+
311
383
  ClockPriority GetClockPriority() const {
312
384
  return static_cast<ClockPriority>(flags & Flags::CLOCK_PRIORITY);
313
385
  }
@@ -328,49 +400,6 @@ struct ClockHandle {
328
400
  flags |= new_priority;
329
401
  }
330
402
 
331
- void FreeData() {
332
- if (deleter) {
333
- (*deleter)(key(), value);
334
- }
335
- }
336
-
337
- // Calculate the memory usage by metadata.
338
- inline size_t CalcMetaCharge(
339
- CacheMetadataChargePolicy metadata_charge_policy) const {
340
- if (metadata_charge_policy != kFullChargeCacheMetadata) {
341
- return 0;
342
- } else {
343
- // #ifdef ROCKSDB_MALLOC_USABLE_SIZE
344
- // return malloc_usable_size(
345
- // const_cast<void*>(static_cast<const void*>(this)));
346
- // #else
347
- // TODO(Guido) malloc_usable_size only works when we call it on
348
- // a pointer allocated with malloc. Because our handles are all
349
- // allocated in a single shot as an array, the user can't call
350
- // CalcMetaCharge (or CalcTotalCharge or GetCharge) on a handle
351
- // pointer returned by the cache. Moreover, malloc_usable_size
352
- // expects a heap-allocated handle, but sometimes in our code we
353
- // wish to pass a stack-allocated handle (this is only a performance
354
- // concern).
355
- // What is the right way to compute metadata charges with pre-allocated
356
- // handles?
357
- return sizeof(ClockHandle);
358
- // #endif
359
- }
360
- }
361
-
362
- inline void CalcTotalCharge(
363
- size_t charge, CacheMetadataChargePolicy metadata_charge_policy) {
364
- total_charge = charge + CalcMetaCharge(metadata_charge_policy);
365
- }
366
-
367
- inline size_t GetCharge(
368
- CacheMetadataChargePolicy metadata_charge_policy) const {
369
- size_t meta_charge = CalcMetaCharge(metadata_charge_policy);
370
- assert(total_charge >= meta_charge);
371
- return total_charge - meta_charge;
372
- }
373
-
374
403
  inline bool IsEmpty() const {
375
404
  return !this->IsElement() && this->displacements == 0;
376
405
  }
@@ -380,11 +409,12 @@ struct ClockHandle {
380
409
  }
381
410
 
382
411
  inline bool Matches(const Slice& some_key, uint32_t some_hash) const {
383
- return this->IsElement() && this->hash == some_hash &&
384
- this->key() == some_key;
412
+ return this->hash == some_hash && this->key() == some_key;
385
413
  }
386
414
 
387
- bool WillBeDeleted() const { return refs & WILL_BE_DELETED; }
415
+ // refs functions.
416
+
417
+ inline bool WillBeDeleted() const { return refs & WILL_BE_DELETED; }
388
418
 
389
419
  void SetWillBeDeleted(bool will_be_deleted) {
390
420
  if (will_be_deleted) {
@@ -394,28 +424,7 @@ struct ClockHandle {
394
424
  }
395
425
  }
396
426
 
397
- // The following functions are for taking and releasing refs.
398
-
399
- // Tries to take an external ref. Returns true iff it succeeds.
400
- inline bool TryExternalRef() {
401
- if (!((refs += kOneExternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) {
402
- return true;
403
- }
404
- refs -= kOneExternalRef;
405
- return false;
406
- }
407
-
408
- // Releases an external ref. Returns the new value (this is useful to
409
- // avoid an extra atomic read).
410
- inline uint32_t ReleaseExternalRef() { return refs -= kOneExternalRef; }
411
-
412
- // Take an external ref, assuming there is already one external ref
413
- // to the handle.
414
- void Ref() {
415
- // TODO(Guido) Is it okay to assume that the existing external reference
416
- // survives until this function returns?
417
- refs += kOneExternalRef;
418
- }
427
+ bool HasExternalRefs() const { return (refs & EXTERNAL_REFS) > 0; }
419
428
 
420
429
  // Tries to take an internal ref. Returns true iff it succeeds.
421
430
  inline bool TryInternalRef() {
@@ -426,9 +435,19 @@ struct ClockHandle {
426
435
  return false;
427
436
  }
428
437
 
429
- inline void ReleaseInternalRef() { refs -= kOneInternalRef; }
438
+ // Tries to take an external ref. Returns true iff it succeeds.
439
+ inline bool TryExternalRef() {
440
+ if (!((refs += kOneExternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) {
441
+ return true;
442
+ }
443
+ refs -= kOneExternalRef;
444
+ return false;
445
+ }
430
446
 
431
447
  // Tries to take an exclusive ref. Returns true iff it succeeds.
448
+ // TODO(Guido) After every TryExclusiveRef call, we always call
449
+ // WillBeDeleted(). We could save an atomic read by having an output parameter
450
+ // with the last value of refs.
432
451
  inline bool TryExclusiveRef() {
433
452
  uint32_t will_be_deleted = refs & WILL_BE_DELETED;
434
453
  uint32_t expected = will_be_deleted;
@@ -436,15 +455,18 @@ struct ClockHandle {
436
455
  EXCLUSIVE_REF | will_be_deleted);
437
456
  }
438
457
 
439
- // Repeatedly tries to take an exclusive reference, but stops as soon
440
- // as an external reference is detected (in this case the wait would
441
- // presumably be too long).
442
- inline bool TrySpinExclusiveRef() {
458
+ // Repeatedly tries to take an exclusive reference, but aborts as soon
459
+ // as an external or exclusive reference is detected (since the wait
460
+ // would presumably be too long).
461
+ inline bool SpinTryExclusiveRef() {
443
462
  uint32_t expected = 0;
444
463
  uint32_t will_be_deleted = 0;
464
+ uint32_t spins = kSpinsPerTry;
445
465
  while (!refs.compare_exchange_strong(expected,
446
- EXCLUSIVE_REF | will_be_deleted)) {
447
- if (expected & EXTERNAL_REFS) {
466
+ EXCLUSIVE_REF | will_be_deleted) &&
467
+ spins--) {
468
+ std::this_thread::yield();
469
+ if (expected & (EXTERNAL_REFS | EXCLUSIVE_REF)) {
448
470
  return false;
449
471
  }
450
472
  will_be_deleted = expected & WILL_BE_DELETED;
@@ -453,75 +475,88 @@ struct ClockHandle {
453
475
  return true;
454
476
  }
455
477
 
456
- inline void ReleaseExclusiveRef() { refs.fetch_and(~EXCLUSIVE_REF); }
478
+ // Take an external ref, assuming there is already one external ref
479
+ // to the handle.
480
+ void Ref() {
481
+ // TODO(Guido) Is it okay to assume that the existing external reference
482
+ // survives until this function returns?
483
+ refs += kOneExternalRef;
484
+ }
457
485
 
458
- // The following functions are for upgrading and downgrading refs.
459
- // They guarantee atomicity, i.e., no exclusive refs to the handle
460
- // can be taken by a different thread during the conversion.
486
+ inline void ReleaseExternalRef() { refs -= kOneExternalRef; }
461
487
 
462
- inline void ExclusiveToInternalRef() {
463
- refs += kOneInternalRef;
464
- ReleaseExclusiveRef();
465
- }
488
+ inline void ReleaseInternalRef() { refs -= kOneInternalRef; }
466
489
 
490
+ inline void ReleaseExclusiveRef() { refs.fetch_and(~EXCLUSIVE_REF); }
491
+
492
+ // Downgrade an exclusive ref to external.
467
493
  inline void ExclusiveToExternalRef() {
468
494
  refs += kOneExternalRef;
469
495
  ReleaseExclusiveRef();
470
496
  }
471
497
 
472
- // TODO(Guido) Do we want to bound the loop and prepare the
473
- // algorithms to react to a failure?
474
- inline void InternalToExclusiveRef() {
475
- uint32_t expected = kOneInternalRef;
476
- uint32_t will_be_deleted = 0;
477
- while (!refs.compare_exchange_strong(expected,
478
- EXCLUSIVE_REF | will_be_deleted)) {
479
- will_be_deleted = expected & WILL_BE_DELETED;
480
- expected = kOneInternalRef | will_be_deleted;
481
- }
482
- }
483
-
498
+ // Convert an internal ref into external.
484
499
  inline void InternalToExternalRef() {
485
500
  refs += kOneExternalRef - kOneInternalRef;
486
501
  }
487
502
 
488
- // TODO(Guido) Same concern.
489
- inline void ExternalToExclusiveRef() {
490
- uint32_t expected = kOneExternalRef;
491
- uint32_t will_be_deleted = 0;
492
- while (!refs.compare_exchange_strong(expected,
493
- EXCLUSIVE_REF | will_be_deleted)) {
494
- will_be_deleted = expected & WILL_BE_DELETED;
495
- expected = kOneExternalRef | will_be_deleted;
496
- }
497
- }
498
-
499
503
  }; // struct ClockHandle
500
504
 
501
505
  class ClockHandleTable {
502
506
  public:
503
- explicit ClockHandleTable(int hash_bits);
507
+ explicit ClockHandleTable(size_t capacity, int hash_bits);
504
508
  ~ClockHandleTable();
505
509
 
506
- // Returns a pointer to a visible element matching the key/hash, or
507
- // nullptr if not present.
510
+ // Returns a pointer to a visible handle matching the key/hash, or
511
+ // nullptr if not present. When an actual handle is produced, an
512
+ // internal reference is handed over.
508
513
  ClockHandle* Lookup(const Slice& key, uint32_t hash);
509
514
 
510
- // Inserts a copy of h into the hash table.
511
- // Returns a pointer to the inserted handle, or nullptr if no slot
512
- // available was found. If an existing visible element matching the
513
- // key/hash is already present in the hash table, the argument old
514
- // is set to point to it; otherwise, it's set to nullptr.
515
- // Returns an exclusive reference to h, and no references to old.
516
- ClockHandle* Insert(ClockHandle* h, ClockHandle** old);
515
+ // Inserts a copy of h into the hash table. Returns a pointer to the
516
+ // inserted handle, or nullptr if no available slot was found. Every
517
+ // existing visible handle matching the key is already present in the
518
+ // hash table is marked as WILL_BE_DELETED. The deletion is also attempted,
519
+ // and, if the attempt is successful, the handle is inserted into the
520
+ // autovector deleted. When take_reference is true, the function hands
521
+ // over an external reference on the handle, and otherwise no reference is
522
+ // produced.
523
+ ClockHandle* Insert(ClockHandle* h, autovector<ClockHandle>* deleted,
524
+ bool take_reference);
525
+
526
+ // Assigns h the appropriate clock priority, making it evictable.
527
+ void ClockOn(ClockHandle* h);
517
528
 
518
- // Removes h from the hash table. The handle must already be off clock.
519
- void Remove(ClockHandle* h);
529
+ // Makes h non-evictable.
530
+ void ClockOff(ClockHandle* h);
520
531
 
521
- // Extracts the element information from a handle (src), and assigns it
522
- // to a hash table slot (dst). Doesn't touch displacements and refs,
523
- // which are maintained by the hash table algorithm.
524
- void Assign(ClockHandle* dst, ClockHandle* src);
532
+ // Runs the clock eviction algorithm until there is enough space to
533
+ // insert an element with the given charge.
534
+ void ClockRun(size_t charge);
535
+
536
+ // Remove h from the hash table. Requires an exclusive ref to h.
537
+ void Remove(ClockHandle* h, autovector<ClockHandle>* deleted);
538
+
539
+ // Remove from the hash table all handles with matching key/hash along a
540
+ // probe sequence, starting from the given probe number. Doesn't
541
+ // require any references.
542
+ void RemoveAll(const Slice& key, uint32_t hash, uint32_t& probe,
543
+ autovector<ClockHandle>* deleted);
544
+
545
+ void RemoveAll(const Slice& key, uint32_t hash,
546
+ autovector<ClockHandle>* deleted) {
547
+ uint32_t probe = 0;
548
+ RemoveAll(key, hash, probe, deleted);
549
+ }
550
+
551
+ void Free(autovector<ClockHandle>* deleted);
552
+
553
+ // Tries to remove h from the hash table. If the attempt is successful,
554
+ // the function hands over an exclusive ref to h.
555
+ bool TryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
556
+
557
+ // Similar to TryRemove, except that it spins, increasing the chances of
558
+ // success. Requires that the caller thread has no shared ref to h.
559
+ bool SpinTryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
525
560
 
526
561
  template <typename T>
527
562
  void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end,
@@ -531,12 +566,9 @@ class ClockHandleTable {
531
566
  if (h->TryExclusiveRef()) {
532
567
  if (h->IsElement() &&
533
568
  (apply_if_will_be_deleted || !h->WillBeDeleted())) {
534
- // Hand the internal ref over to func, which is now responsible
535
- // to release it.
536
569
  func(h);
537
- } else {
538
- h->ReleaseExclusiveRef();
539
570
  }
571
+ h->ReleaseExclusiveRef();
540
572
  }
541
573
  }
542
574
  }
@@ -565,53 +597,81 @@ class ClockHandleTable {
565
597
 
566
598
  uint32_t GetOccupancy() const { return occupancy_; }
567
599
 
600
+ size_t GetUsage() const { return usage_; }
601
+
602
+ size_t GetCapacity() const { return capacity_; }
603
+
568
604
  // Returns x mod 2^{length_bits_}.
569
605
  uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; }
570
606
 
571
607
  private:
572
- friend class ClockCacheShard;
573
-
574
- int FindElement(const Slice& key, uint32_t hash, uint32_t& probe);
575
-
576
- int FindAvailableSlot(const Slice& key, uint32_t& probe);
577
-
578
- int FindElementOrAvailableSlot(const Slice& key, uint32_t hash,
579
- uint32_t& probe);
580
-
581
- // Returns the index of the first slot probed (hashing with
582
- // the given key) with a handle e such that match(e) is true.
583
- // At every step, the function first tests whether match(e) holds.
584
- // If it's false, it evaluates abort(e) to decide whether the
585
- // search should be aborted, and in the affirmative returns -1.
586
- // For every handle e probed except the last one, the function runs
587
- // update(e). We say a probe to a handle e is aborting if match(e) is
588
- // false and abort(e) is true. The argument probe is one more than the
589
- // last non-aborting probe during the call. This is so that that the
590
- // variable can be used to keep track of progress across consecutive
591
- // calls to FindSlot.
592
- inline int FindSlot(const Slice& key, std::function<bool(ClockHandle*)> match,
593
- std::function<bool(ClockHandle*)> stop,
594
- std::function<void(ClockHandle*)> update,
595
- uint32_t& probe);
596
-
597
- // After a failed FindSlot call (i.e., with answer -1), this function
598
- // decrements all displacements, starting from the 0-th probe.
608
+ // Extracts the element information from a handle (src), and assigns it
609
+ // to a hash table slot (dst). Doesn't touch displacements and refs,
610
+ // which are maintained by the hash table algorithm.
611
+ void Assign(ClockHandle* dst, ClockHandle* src);
612
+
613
+ // Returns the first slot in the probe sequence, starting from the given
614
+ // probe number, with a handle e such that match(e) is true. At every
615
+ // step, the function first tests whether match(e) holds. If this is false,
616
+ // it evaluates abort(e) to decide whether the search should be aborted,
617
+ // and in the affirmative returns -1. For every handle e probed except
618
+ // the last one, the function runs update(e).
619
+ // The probe parameter is modified as follows. We say a probe to a handle
620
+ // e is aborting if match(e) is false and abort(e) is true. Then the final
621
+ // value of probe is one more than the last non-aborting probe during the
622
+ // call. This is so that that the variable can be used to keep track of
623
+ // progress across consecutive calls to FindSlot.
624
+ inline ClockHandle* FindSlot(const Slice& key,
625
+ std::function<bool(ClockHandle*)> match,
626
+ std::function<bool(ClockHandle*)> stop,
627
+ std::function<void(ClockHandle*)> update,
628
+ uint32_t& probe);
629
+
630
+ // Returns an available slot for the given key. All copies of the
631
+ // key found along the probing sequence until an available slot is
632
+ // found are marked for deletion. On each of them, a deletion is
633
+ // attempted, and when the attempt succeeds the slot is assigned to
634
+ // the new copy of the element.
635
+ ClockHandle* FindAvailableSlot(const Slice& key, uint32_t hash,
636
+ uint32_t& probe,
637
+ autovector<ClockHandle>* deleted);
638
+
639
+ // After a failed FindSlot call (i.e., with answer -1) in
640
+ // FindAvailableSlot, this function fixes all displacements's
641
+ // starting from the 0-th probe, until the given probe.
599
642
  void Rollback(const Slice& key, uint32_t probe);
600
643
 
601
644
  // Number of hash bits used for table index.
602
645
  // The size of the table is 1 << length_bits_.
603
- int length_bits_;
646
+ const int length_bits_;
604
647
 
605
648
  // For faster computation of ModTableSize.
606
649
  const uint32_t length_bits_mask_;
607
650
 
608
- // Number of elements in the table.
609
- uint32_t occupancy_;
610
-
611
651
  // Maximum number of elements the user can store in the table.
612
- uint32_t occupancy_limit_;
652
+ const uint32_t occupancy_limit_;
653
+
654
+ // Maximum total charge of all elements stored in the table.
655
+ const size_t capacity_;
613
656
 
657
+ // We partition the following members into different cache lines
658
+ // to avoid false sharing among Lookup, Release, Erase and Insert
659
+ // operations in ClockCacheShard.
660
+
661
+ ALIGN_AS(CACHE_LINE_SIZE)
662
+ // Array of slots comprising the hash table.
614
663
  std::unique_ptr<ClockHandle[]> array_;
664
+
665
+ ALIGN_AS(CACHE_LINE_SIZE)
666
+ // Clock algorithm sweep pointer.
667
+ std::atomic<uint32_t> clock_pointer_;
668
+
669
+ ALIGN_AS(CACHE_LINE_SIZE)
670
+ // Number of elements in the table.
671
+ std::atomic<uint32_t> occupancy_;
672
+
673
+ // Memory size for entries residing in the cache.
674
+ std::atomic<size_t> usage_;
615
675
  }; // class ClockHandleTable
616
676
 
617
677
  // A single shard of sharded cache.
@@ -652,20 +712,26 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
652
712
  Statistics* /*stats*/) override {
653
713
  return Lookup(key, hash);
654
714
  }
715
+
655
716
  Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
656
717
 
657
718
  bool Release(Cache::Handle* handle, bool /*useful*/,
658
719
  bool erase_if_last_ref) override {
659
720
  return Release(handle, erase_if_last_ref);
660
721
  }
722
+
661
723
  bool IsReady(Cache::Handle* /*handle*/) override { return true; }
724
+
662
725
  void Wait(Cache::Handle* /*handle*/) override {}
663
726
 
664
727
  bool Ref(Cache::Handle* handle) override;
728
+
665
729
  bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
730
+
666
731
  void Erase(const Slice& key, uint32_t hash) override;
667
732
 
668
733
  size_t GetUsage() const override;
734
+
669
735
  size_t GetPinnedUsage() const override;
670
736
 
671
737
  void ApplyToSomeEntries(
@@ -675,20 +741,11 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
675
741
 
676
742
  void EraseUnRefEntries() override;
677
743
 
678
- std::string GetPrintableOptions() const override;
744
+ std::string GetPrintableOptions() const override { return std::string{}; }
679
745
 
680
746
  private:
681
747
  friend class ClockCache;
682
748
 
683
- // Makes an element evictable by clock.
684
- void ClockOn(ClockHandle* h);
685
-
686
- // Makes an element non-evictable.
687
- void ClockOff(ClockHandle* h);
688
-
689
- // Requires an exclusive ref on h.
690
- void Evict(ClockHandle* h);
691
-
692
749
  // Free some space following strict clock policy until enough space
693
750
  // to hold (usage_ + charge) is freed or there are no evictable elements.
694
751
  void EvictFromClock(size_t charge, autovector<ClockHandle>* deleted);
@@ -703,34 +760,10 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
703
760
  static int CalcHashBits(size_t capacity, size_t estimated_value_size,
704
761
  CacheMetadataChargePolicy metadata_charge_policy);
705
762
 
706
- // Initialized before use.
707
- size_t capacity_;
708
-
709
763
  // Whether to reject insertion if cache reaches its full capacity.
710
- bool strict_capacity_limit_;
711
-
712
- uint32_t clock_pointer_;
713
-
714
- // ------------^^^^^^^^^^^^^-----------
715
- // Not frequently modified data members
716
- // ------------------------------------
717
- //
718
- // We separate data members that are updated frequently from the ones that
719
- // are not frequently updated so that they don't share the same cache line
720
- // which will lead into false cache sharing
721
- //
722
- // ------------------------------------
723
- // Frequently modified data members
724
- // ------------vvvvvvvvvvvvv-----------
725
- ClockHandleTable table_;
764
+ std::atomic<bool> strict_capacity_limit_;
726
765
 
727
- // Memory size for entries residing in the cache.
728
- size_t usage_;
729
-
730
- // mutex_ protects the following state.
731
- // We don't count mutex_ as the cache's internal state so semantically we
732
- // don't mind mutex_ invoking the non-const actions.
733
- mutable DMutex mutex_;
766
+ ClockHandleTable table_;
734
767
  }; // class ClockCacheShard
735
768
 
736
769
  class ClockCache
@@ -743,19 +776,28 @@ class ClockCache
743
776
  bool strict_capacity_limit,
744
777
  CacheMetadataChargePolicy metadata_charge_policy =
745
778
  kDontChargeCacheMetadata);
779
+
746
780
  ~ClockCache() override;
781
+
747
782
  const char* Name() const override { return "ClockCache"; }
783
+
748
784
  CacheShard* GetShard(uint32_t shard) override;
785
+
749
786
  const CacheShard* GetShard(uint32_t shard) const override;
787
+
750
788
  void* Value(Handle* handle) override;
789
+
751
790
  size_t GetCharge(Handle* handle) const override;
791
+
752
792
  uint32_t GetHash(Handle* handle) const override;
793
+
753
794
  DeleterFn GetDeleter(Handle* handle) const override;
795
+
754
796
  void DisownData() override;
755
797
 
756
798
  private:
757
799
  ClockCacheShard* shards_ = nullptr;
758
- int num_shards_ = 0;
800
+ int num_shards_;
759
801
  }; // class ClockCache
760
802
 
761
803
  } // namespace clock_cache