@nxtedition/rocksdb 7.0.4 → 7.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/binding.cc +320 -324
  2. package/chained-batch.js +6 -1
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +8 -3
  4. package/deps/rocksdb/rocksdb/Makefile +10 -4
  5. package/deps/rocksdb/rocksdb/TARGETS +6 -4
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +9 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_test.cc +14 -0
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +8 -8
  9. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +272 -174
  10. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +201 -57
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +19 -19
  12. package/deps/rocksdb/rocksdb/cache/lru_cache.h +2 -1
  13. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +170 -0
  14. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +95 -0
  15. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +298 -0
  16. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +172 -0
  17. package/deps/rocksdb/rocksdb/db/column_family.cc +8 -3
  18. package/deps/rocksdb/rocksdb/db/column_family.h +6 -3
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +10 -0
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +6 -6
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +38 -0
  23. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -5
  24. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +4 -7
  25. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +74 -71
  26. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +70 -1
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +13 -12
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +36 -0
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -4
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +139 -91
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +48 -14
  33. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +90 -55
  34. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +9 -4
  35. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -1
  36. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +12 -7
  37. package/deps/rocksdb/rocksdb/db/db_write_test.cc +35 -0
  38. package/deps/rocksdb/rocksdb/db/dbformat.cc +3 -1
  39. package/deps/rocksdb/rocksdb/db/dbformat.h +5 -3
  40. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +1 -1
  41. package/deps/rocksdb/rocksdb/db/memtable.cc +1 -0
  42. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +4 -2
  43. package/deps/rocksdb/rocksdb/db/repair.cc +1 -1
  44. package/deps/rocksdb/rocksdb/db/version_builder.cc +43 -1
  45. package/deps/rocksdb/rocksdb/db/version_edit.cc +13 -5
  46. package/deps/rocksdb/rocksdb/db/version_edit.h +22 -1
  47. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +4 -5
  48. package/deps/rocksdb/rocksdb/db/version_set.cc +109 -41
  49. package/deps/rocksdb/rocksdb/db/version_set.h +36 -3
  50. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -4
  51. package/deps/rocksdb/rocksdb/db/version_set_test.cc +10 -10
  52. package/deps/rocksdb/rocksdb/db/version_util.h +1 -1
  53. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +1 -1
  54. package/deps/rocksdb/rocksdb/db/write_batch.cc +34 -10
  55. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +2 -0
  56. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -0
  57. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +2 -0
  58. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -1
  59. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -1
  60. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +7 -5
  61. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +5 -10
  62. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -7
  63. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +2 -0
  64. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +24 -3
  65. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
  66. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +10 -0
  67. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +5 -0
  68. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +4 -4
  69. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +9 -5
  70. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  71. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +1 -0
  72. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +1 -1
  73. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  74. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +0 -3
  75. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +8 -6
  76. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -1
  77. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -2
  78. package/deps/rocksdb/rocksdb/options/options_test.cc +1 -11
  79. package/deps/rocksdb/rocksdb/port/port_posix.h +7 -0
  80. package/deps/rocksdb/rocksdb/port/win/port_win.h +11 -3
  81. package/deps/rocksdb/rocksdb/src.mk +6 -2
  82. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -33
  83. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +3 -3
  84. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -118
  85. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +6 -8
  86. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +10 -13
  87. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +4 -9
  88. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +0 -1
  89. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -28
  90. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -3
  91. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -91
  92. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -30
  93. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -27
  94. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +11 -13
  95. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -40
  96. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +0 -1
  97. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +22 -43
  98. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +11 -22
  99. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +24 -25
  100. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +0 -1
  101. package/deps/rocksdb/rocksdb/table/get_context.h +0 -1
  102. package/deps/rocksdb/rocksdb/table/table_test.cc +3 -18
  103. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +3 -16
  104. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -3
  105. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +1 -1
  106. package/deps/rocksdb/rocksdb/util/bloom_test.cc +0 -201
  107. package/deps/rocksdb/rocksdb/util/distributed_mutex.h +48 -0
  108. package/deps/rocksdb/rocksdb/util/filter_bench.cc +5 -11
  109. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3 -0
  110. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +7 -21
  111. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
  112. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +45 -0
  113. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +21 -14
  114. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +10 -1
  115. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +3 -1
  116. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +9 -0
  117. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +3 -2
  118. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +3 -1
  119. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +5 -4
  120. package/deps/rocksdb/rocksdb.gyp +1 -1
  121. package/index.js +36 -14
  122. package/package-lock.json +2 -2
  123. package/package.json +1 -1
  124. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  125. package/prebuilds/linux-x64/node.napi.node +0 -0
  126. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +0 -358
  127. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +0 -127
  128. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +0 -219
@@ -8,44 +8,131 @@
8
8
  // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
9
  #pragma once
10
10
 
11
+ #include <array>
11
12
  #include <memory>
12
13
  #include <string>
13
14
 
15
+ #include "cache/cache_key.h"
14
16
  #include "cache/sharded_cache.h"
15
17
  #include "port/lang.h"
16
18
  #include "port/malloc.h"
17
19
  #include "port/port.h"
18
20
  #include "rocksdb/secondary_cache.h"
19
21
  #include "util/autovector.h"
22
+ #include "util/distributed_mutex.h"
20
23
 
21
24
  namespace ROCKSDB_NAMESPACE {
22
25
  namespace fast_lru_cache {
23
26
 
27
+ // LRU cache implementation using an open-address hash table.
28
+
29
+ // Every slot in the hash table is an LRUHandle. Because handles can be
30
+ // referenced externally, we can't discard them immediately once they are
31
+ // deleted (via a delete or an LRU eviction) or replaced by a new version
32
+ // (via an insert of the same key). The state of an element is defined by
33
+ // the following two properties:
34
+ // (R) Referenced: An element can be referenced externally (refs > 0), or not.
35
+ // Importantly, an element can be evicted if and only if it's not
36
+ // referenced. In particular, when an element becomes referenced, it's
37
+ // temporarily taken out of the LRU list until all references to it
38
+ // are dropped.
39
+ // (V) Visible: An element can visible for lookups (IS_VISIBLE set), or not.
40
+ // Initially, every element is visible. An element that is not visible is
41
+ // called a ghost.
42
+ // These properties induce 4 different states, with transitions defined as
43
+ // follows:
44
+ // - V --> not V: When a visible element is deleted or replaced by a new
45
+ // version.
46
+ // - Not V --> V: This cannot happen. A ghost remains in that state until it's
47
+ // not referenced any more, at which point it's ready to be removed from the
48
+ // hash table. (A ghost simply waits to transition to the afterlife---it will
49
+ // never be visible again.)
50
+ // - R --> not R: When all references to an element are dropped.
51
+ // - Not R --> R: When an unreferenced element becomes referenced. This can only
52
+ // happen if the element is V, since references to an element can only be
53
+ // created when it's visible.
54
+
55
+ // Internally, the cache uses an open-addressed hash table to index the handles.
56
+ // We use tombstone counters to keep track of displacements.
57
+ // Because of the tombstones and the two possible visibility states of an
58
+ // element, the table slots can be in 4 different states:
59
+ // 1. Visible element (IS_ELEMENT set and IS_VISIBLE set): The slot contains a
60
+ // key-value element.
61
+ // 2. Ghost element (IS_ELEMENT set and IS_VISIBLE unset): The slot contains an
62
+ // element that has been removed, but it's still referenced. It's invisible
63
+ // to lookups.
64
+ // 3. Tombstone (IS_ELEMENT unset and displacements > 0): The slot contains a
65
+ // tombstone.
66
+ // 4. Empty (IS_ELEMENT unset and displacements == 0): The slot is unused.
67
+ // A slot that is an element can further have IS_VISIBLE set or not.
68
+ // When a ghost is removed from the table, it can either transition to being a
69
+ // tombstone or an empty slot, depending on the number of displacements of the
70
+ // slot. In any case, the slot becomes available. When a handle is inserted
71
+ // into that slot, it becomes a visible element again.
72
+
73
+ constexpr uint8_t kCacheKeySize =
74
+ static_cast<uint8_t>(sizeof(ROCKSDB_NAMESPACE::CacheKey));
75
+
76
+ // The load factor p is a real number in (0, 1) such that at all
77
+ // times at most a fraction p of all slots, without counting tombstones,
78
+ // are occupied by elements. This means that the probability that a
79
+ // random probe hits an empty slot is at most p, and thus at most 1/p probes
80
+ // are required on average. We use p = 70%, so between 1 and 2 probes are
81
+ // needed on average.
82
+ // Because the size of the hash table is always rounded up to the next
83
+ // power of 2, p is really an upper bound on the actual load factor---the
84
+ // actual load factor is anywhere between p/2 and p. This is a bit wasteful,
85
+ // but bear in mind that slots only hold metadata, not actual values.
86
+ // Since space cost is dominated by the values (the LSM blocks),
87
+ // overprovisioning the table with metadata only increases the total cache space
88
+ // usage by a tiny fraction.
89
+ constexpr double kLoadFactor = 0.7;
90
+
91
+ // Arbitrary seeds.
92
+ constexpr uint32_t kProbingSeed1 = 0xbc9f1d34;
93
+ constexpr uint32_t kProbingSeed2 = 0x7a2bb9d5;
94
+
24
95
  // An experimental (under development!) alternative to LRUCache
25
96
 
26
97
  struct LRUHandle {
27
98
  void* value;
28
99
  Cache::DeleterFn deleter;
29
- LRUHandle* next_hash;
30
100
  LRUHandle* next;
31
101
  LRUHandle* prev;
32
102
  size_t total_charge; // TODO(opt): Only allow uint32_t?
33
- size_t key_length;
34
103
  // The hash of key(). Used for fast sharding and comparisons.
35
104
  uint32_t hash;
36
105
  // The number of external refs to this entry. The cache itself is not counted.
37
106
  uint32_t refs;
38
107
 
39
108
  enum Flags : uint8_t {
40
- // Whether this entry is referenced by the hash table.
41
- IN_CACHE = (1 << 0),
109
+ // Whether the handle is visible to Lookups.
110
+ IS_VISIBLE = (1 << 0),
111
+ // Whether the slot is in use by an element.
112
+ IS_ELEMENT = (1 << 1),
42
113
  };
43
114
  uint8_t flags;
44
115
 
45
- // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
46
- char key_data[1];
116
+ // The number of elements that hash to this slot or a lower one,
117
+ // but wind up in a higher slot.
118
+ uint32_t displacements;
119
+
120
+ std::array<char, kCacheKeySize> key_data;
121
+
122
+ LRUHandle() {
123
+ value = nullptr;
124
+ deleter = nullptr;
125
+ next = nullptr;
126
+ prev = nullptr;
127
+ total_charge = 0;
128
+ hash = 0;
129
+ refs = 0;
130
+ flags = 0;
131
+ displacements = 0;
132
+ key_data.fill(0);
133
+ }
47
134
 
48
- Slice key() const { return Slice(key_data, key_length); }
135
+ Slice key() const { return Slice(key_data.data(), kCacheKeySize); }
49
136
 
50
137
  // Increase the reference count by 1.
51
138
  void Ref() { refs++; }
@@ -60,22 +147,31 @@ struct LRUHandle {
60
147
  // Return true if there are external refs, false otherwise.
61
148
  bool HasRefs() const { return refs > 0; }
62
149
 
63
- bool InCache() const { return flags & IN_CACHE; }
150
+ bool IsVisible() const { return flags & IS_VISIBLE; }
64
151
 
65
- void SetInCache(bool in_cache) {
66
- if (in_cache) {
67
- flags |= IN_CACHE;
152
+ void SetIsVisible(bool is_visible) {
153
+ if (is_visible) {
154
+ flags |= IS_VISIBLE;
68
155
  } else {
69
- flags &= ~IN_CACHE;
156
+ flags &= ~IS_VISIBLE;
70
157
  }
71
158
  }
72
159
 
73
- void Free() {
160
+ bool IsElement() const { return flags & IS_ELEMENT; }
161
+
162
+ void SetIsElement(bool is_element) {
163
+ if (is_element) {
164
+ flags |= IS_ELEMENT;
165
+ } else {
166
+ flags &= ~IS_ELEMENT;
167
+ }
168
+ }
169
+
170
+ void FreeData() {
74
171
  assert(refs == 0);
75
172
  if (deleter) {
76
173
  (*deleter)(key(), value);
77
174
  }
78
- delete[] reinterpret_cast<char*>(this);
79
175
  }
80
176
 
81
177
  // Calculate the memory usage by metadata.
@@ -84,13 +180,22 @@ struct LRUHandle {
84
180
  if (metadata_charge_policy != kFullChargeCacheMetadata) {
85
181
  return 0;
86
182
  } else {
87
- #ifdef ROCKSDB_MALLOC_USABLE_SIZE
88
- return malloc_usable_size(
89
- const_cast<void*>(static_cast<const void*>(this)));
90
- #else
91
- // This is the size that is used when a new handle is created.
92
- return sizeof(LRUHandle) - 1 + key_length;
93
- #endif
183
+ // #ifdef ROCKSDB_MALLOC_USABLE_SIZE
184
+ // return malloc_usable_size(
185
+ // const_cast<void*>(static_cast<const void*>(this)));
186
+ // #else
187
+ // TODO(Guido) malloc_usable_size only works when we call it on
188
+ // a pointer allocated with malloc. Because our handles are all
189
+ // allocated in a single shot as an array, the user can't call
190
+ // CalcMetaCharge (or CalcTotalCharge or GetCharge) on a handle
191
+ // pointer returned by the cache. Moreover, malloc_usable_size
192
+ // expects a heap-allocated handle, but sometimes in our code we
193
+ // wish to pass a stack-allocated handle (this is only a performance
194
+ // concern).
195
+ // What is the right way to compute metadata charges with pre-allocated
196
+ // handles?
197
+ return sizeof(LRUHandle);
198
+ // #endif
94
199
  }
95
200
  }
96
201
 
@@ -105,8 +210,23 @@ struct LRUHandle {
105
210
  assert(total_charge >= meta_charge);
106
211
  return total_charge - meta_charge;
107
212
  }
213
+
214
+ inline bool IsEmpty() {
215
+ return !this->IsElement() && this->displacements == 0;
216
+ }
217
+
218
+ inline bool IsTombstone() {
219
+ return !this->IsElement() && this->displacements > 0;
220
+ }
221
+
222
+ inline bool Matches(const Slice& some_key, uint32_t some_hash) {
223
+ return this->IsElement() && this->hash == some_hash &&
224
+ this->key() == some_key;
225
+ }
108
226
  };
109
227
 
228
+ // TODO(Guido) Update the following comment.
229
+
110
230
  // We provide our own simple hash table since it removes a whole bunch
111
231
  // of porting hacks and is also faster than some of the built-in hash
112
232
  // table implementations in some of the compiler/runtime combinations
@@ -114,45 +234,72 @@ struct LRUHandle {
114
234
  // 4.4.3's builtin hashtable.
115
235
  class LRUHandleTable {
116
236
  public:
117
- explicit LRUHandleTable(int hash_bits);
237
+ explicit LRUHandleTable(uint8_t hash_bits);
118
238
  ~LRUHandleTable();
119
239
 
240
+ // Returns a pointer to a visible element matching the key/hash, or
241
+ // nullptr if not present.
120
242
  LRUHandle* Lookup(const Slice& key, uint32_t hash);
121
- LRUHandle* Insert(LRUHandle* h);
122
- LRUHandle* Remove(const Slice& key, uint32_t hash);
243
+
244
+ // Inserts a copy of h into the hash table.
245
+ // Returns a pointer to the inserted handle, or nullptr if no slot
246
+ // available was found. If an existing visible element matching the
247
+ // key/hash is already present in the hash table, the argument old
248
+ // is set to pointe to it; otherwise, it's set to nullptr.
249
+ LRUHandle* Insert(LRUHandle* h, LRUHandle** old);
250
+
251
+ // Removes h from the hash table. The handle must already be off
252
+ // the LRU list.
253
+ void Remove(LRUHandle* h);
254
+
255
+ // Turns a visible element h into a ghost (i.e., not visible).
256
+ void Exclude(LRUHandle* h);
257
+
258
+ // Assigns a copy of h to the given slot.
259
+ void Assign(int slot, LRUHandle* h);
123
260
 
124
261
  template <typename T>
125
262
  void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
126
263
  for (uint32_t i = index_begin; i < index_end; i++) {
127
- LRUHandle* h = list_[i];
128
- while (h != nullptr) {
129
- auto n = h->next_hash;
130
- assert(h->InCache());
264
+ LRUHandle* h = &array_[i];
265
+ if (h->IsVisible()) {
131
266
  func(h);
132
- h = n;
133
267
  }
134
268
  }
135
269
  }
136
270
 
137
- int GetLengthBits() const { return length_bits_; }
271
+ uint8_t GetLengthBits() const { return length_bits_; }
138
272
 
139
- // Return the address of the head of the chain in the bucket given
140
- // by the hash.
141
- inline LRUHandle** Head(uint32_t hash);
273
+ uint32_t GetOccupancy() const { return occupancy_; }
142
274
 
143
275
  private:
144
- // Return a pointer to slot that points to a cache entry that
145
- // matches key/hash. If there is no such cache entry, return a
146
- // pointer to the trailing slot in the corresponding linked list.
147
- LRUHandle** FindPointer(const Slice& key, uint32_t hash);
148
-
149
- // Number of hash bits (upper because lower bits used for sharding)
150
- // used for table index. Length == 1 << length_bits_
151
- int length_bits_;
152
-
153
- // The table consists of an array of buckets where each bucket is
154
- // a linked list of cache entries that hash into the bucket.
155
- std::unique_ptr<LRUHandle*[]> list_;
276
+ int FindVisibleElement(const Slice& key, uint32_t hash, int& probe,
277
+ int displacement);
278
+
279
+ int FindAvailableSlot(const Slice& key, int& probe, int displacement);
280
+
281
+ int FindVisibleElementOrAvailableSlot(const Slice& key, uint32_t hash,
282
+ int& probe, int displacement);
283
+
284
+ // Returns the index of the first slot probed (hashing with
285
+ // the given key) with a handle e such that cond(e) is true.
286
+ // Otherwise, if no match is found, returns -1.
287
+ // For every handle e probed except the final slot, updates
288
+ // e->displacements += displacement.
289
+ // The argument probe is modified such that consecutive calls
290
+ // to FindSlot continue probing right after where the previous
291
+ // call left.
292
+ int FindSlot(const Slice& key, std::function<bool(LRUHandle*)> cond,
293
+ int& probe, int displacement);
294
+
295
+ // Number of hash bits used for table index.
296
+ // The size of the table is 1 << length_bits_.
297
+ uint8_t length_bits_;
298
+
299
+ // Number of elements in the table.
300
+ uint32_t occupancy_;
301
+
302
+ std::unique_ptr<LRUHandle[]> array_;
156
303
  };
157
304
 
158
305
  // A single shard of sharded cache.
@@ -172,6 +319,10 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
172
319
  void SetStrictCapacityLimit(bool strict_capacity_limit) override;
173
320
 
174
321
  // Like Cache methods, but with an extra "hash" parameter.
322
+ // Insert an item into the hash table and, if handle is null, insert into
323
+ // the LRU list. Older items are evicted as necessary. If the cache is full
324
+ // and free_handle_on_fail is true, the item is deleted and handle is set to
325
+ // nullptr.
175
326
  Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
176
327
  Cache::DeleterFn deleter, Cache::Handle** handle,
177
328
  Cache::Priority priority) override;
@@ -216,13 +367,6 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
216
367
 
217
368
  private:
218
369
  friend class LRUCache;
219
- // Insert an item into the hash table and, if handle is null, insert into
220
- // the LRU list. Older items are evicted as necessary. If the cache is full
221
- // and free_handle_on_fail is true, the item is deleted and handle is set to
222
- // nullptr.
223
- Status InsertItem(LRUHandle* item, Cache::Handle** handle,
224
- bool free_handle_on_fail);
225
-
226
370
  void LRU_Remove(LRUHandle* e);
227
371
  void LRU_Insert(LRUHandle* e);
228
372
 
@@ -230,12 +374,12 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
230
374
  // to hold (usage_ + charge) is freed or the lru list is empty
231
375
  // This function is not thread safe - it needs to be executed while
232
376
  // holding the mutex_.
233
- void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
377
+ void EvictFromLRU(size_t charge, autovector<LRUHandle>* deleted);
234
378
 
235
- // Returns the number of bits used to hash an element in the per-shard
379
+ // Returns the number of bits used to hash an element in the hash
236
380
  // table.
237
- static int GetHashBits(size_t capacity, size_t estimated_value_size,
238
- CacheMetadataChargePolicy metadata_charge_policy);
381
+ static uint8_t CalcHashBits(size_t capacity, size_t estimated_value_size,
382
+ CacheMetadataChargePolicy metadata_charge_policy);
239
383
 
240
384
  // Initialized before use.
241
385
  size_t capacity_;
@@ -273,7 +417,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
273
417
  // mutex_ protects the following state.
274
418
  // We don't count mutex_ as the cache's internal state so semantically we
275
419
  // don't mind mutex_ invoking the non-const actions.
276
- mutable port::Mutex mutex_;
420
+ mutable DMutex mutex_;
277
421
  };
278
422
 
279
423
  class LRUCache
@@ -16,7 +16,7 @@
16
16
  #include "monitoring/perf_context_imp.h"
17
17
  #include "monitoring/statistics.h"
18
18
  #include "port/lang.h"
19
- #include "util/mutexlock.h"
19
+ #include "util/distributed_mutex.h"
20
20
 
21
21
  namespace ROCKSDB_NAMESPACE {
22
22
  namespace lru_cache {
@@ -135,7 +135,7 @@ LRUCacheShard::LRUCacheShard(
135
135
  void LRUCacheShard::EraseUnRefEntries() {
136
136
  autovector<LRUHandle*> last_reference_list;
137
137
  {
138
- MutexLock l(&mutex_);
138
+ DMutexLock l(mutex_);
139
139
  while (lru_.next != &lru_) {
140
140
  LRUHandle* old = lru_.next;
141
141
  // LRU list contains only elements which can be evicted.
@@ -161,7 +161,7 @@ void LRUCacheShard::ApplyToSomeEntries(
161
161
  // The state is essentially going to be the starting hash, which works
162
162
  // nicely even if we resize between calls because we use upper-most
163
163
  // hash bits for table indexes.
164
- MutexLock l(&mutex_);
164
+ DMutexLock l(mutex_);
165
165
  uint32_t length_bits = table_.GetLengthBits();
166
166
  uint32_t length = uint32_t{1} << length_bits;
167
167
 
@@ -193,13 +193,13 @@ void LRUCacheShard::ApplyToSomeEntries(
193
193
  }
194
194
 
195
195
  void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
196
- MutexLock l(&mutex_);
196
+ DMutexLock l(mutex_);
197
197
  *lru = &lru_;
198
198
  *lru_low_pri = lru_low_pri_;
199
199
  }
200
200
 
201
201
  size_t LRUCacheShard::TEST_GetLRUSize() {
202
- MutexLock l(&mutex_);
202
+ DMutexLock l(mutex_);
203
203
  LRUHandle* lru_handle = lru_.next;
204
204
  size_t lru_size = 0;
205
205
  while (lru_handle != &lru_) {
@@ -210,7 +210,7 @@ size_t LRUCacheShard::TEST_GetLRUSize() {
210
210
  }
211
211
 
212
212
  double LRUCacheShard::GetHighPriPoolRatio() {
213
- MutexLock l(&mutex_);
213
+ DMutexLock l(mutex_);
214
214
  return high_pri_pool_ratio_;
215
215
  }
216
216
 
@@ -285,7 +285,7 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
285
285
  void LRUCacheShard::SetCapacity(size_t capacity) {
286
286
  autovector<LRUHandle*> last_reference_list;
287
287
  {
288
- MutexLock l(&mutex_);
288
+ DMutexLock l(mutex_);
289
289
  capacity_ = capacity;
290
290
  high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
291
291
  EvictFromLRU(0, &last_reference_list);
@@ -304,7 +304,7 @@ void LRUCacheShard::SetCapacity(size_t capacity) {
304
304
  }
305
305
 
306
306
  void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
307
- MutexLock l(&mutex_);
307
+ DMutexLock l(mutex_);
308
308
  strict_capacity_limit_ = strict_capacity_limit;
309
309
  }
310
310
 
@@ -314,7 +314,7 @@ Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
314
314
  autovector<LRUHandle*> last_reference_list;
315
315
 
316
316
  {
317
- MutexLock l(&mutex_);
317
+ DMutexLock l(mutex_);
318
318
 
319
319
  // Free the space following strict LRU policy until enough space
320
320
  // is freed or the lru list is empty.
@@ -402,7 +402,7 @@ void LRUCacheShard::Promote(LRUHandle* e) {
402
402
  } else {
403
403
  // Since the secondary cache lookup failed, mark the item as not in cache
404
404
  // Don't charge the cache as its only metadata that'll shortly be released
405
- MutexLock l(&mutex_);
405
+ DMutexLock l(mutex_);
406
406
  // TODO
407
407
  e->CalcTotalCharge(0, metadata_charge_policy_);
408
408
  e->SetInCache(false);
@@ -416,7 +416,7 @@ Cache::Handle* LRUCacheShard::Lookup(
416
416
  bool wait, Statistics* stats) {
417
417
  LRUHandle* e = nullptr;
418
418
  {
419
- MutexLock l(&mutex_);
419
+ DMutexLock l(mutex_);
420
420
  e = table_.Lookup(key, hash);
421
421
  if (e != nullptr) {
422
422
  assert(e->InCache());
@@ -489,7 +489,7 @@ Cache::Handle* LRUCacheShard::Lookup(
489
489
 
490
490
  bool LRUCacheShard::Ref(Cache::Handle* h) {
491
491
  LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
492
- MutexLock l(&mutex_);
492
+ DMutexLock l(mutex_);
493
493
  // To create another reference - entry must be already externally referenced.
494
494
  assert(e->HasRefs());
495
495
  e->Ref();
@@ -497,7 +497,7 @@ bool LRUCacheShard::Ref(Cache::Handle* h) {
497
497
  }
498
498
 
499
499
  void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
500
- MutexLock l(&mutex_);
500
+ DMutexLock l(mutex_);
501
501
  high_pri_pool_ratio_ = high_pri_pool_ratio;
502
502
  high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
503
503
  MaintainPoolSize();
@@ -510,7 +510,7 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
510
510
  LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
511
511
  bool last_reference = false;
512
512
  {
513
- MutexLock l(&mutex_);
513
+ DMutexLock l(mutex_);
514
514
  last_reference = e->Unref();
515
515
  if (last_reference && e->InCache()) {
516
516
  // The item is still in cache, and nobody else holds a reference to it.
@@ -582,7 +582,7 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
582
582
  LRUHandle* e;
583
583
  bool last_reference = false;
584
584
  {
585
- MutexLock l(&mutex_);
585
+ DMutexLock l(mutex_);
586
586
  e = table_.Remove(key, hash);
587
587
  if (e != nullptr) {
588
588
  assert(e->InCache());
@@ -606,7 +606,7 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
606
606
 
607
607
  bool LRUCacheShard::IsReady(Cache::Handle* handle) {
608
608
  LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
609
- MutexLock l(&mutex_);
609
+ DMutexLock l(mutex_);
610
610
  bool ready = true;
611
611
  if (e->IsPending()) {
612
612
  assert(secondary_cache_);
@@ -617,12 +617,12 @@ bool LRUCacheShard::IsReady(Cache::Handle* handle) {
617
617
  }
618
618
 
619
619
  size_t LRUCacheShard::GetUsage() const {
620
- MutexLock l(&mutex_);
620
+ DMutexLock l(mutex_);
621
621
  return usage_;
622
622
  }
623
623
 
624
624
  size_t LRUCacheShard::GetPinnedUsage() const {
625
- MutexLock l(&mutex_);
625
+ DMutexLock l(mutex_);
626
626
  assert(usage_ >= lru_usage_);
627
627
  return usage_ - lru_usage_;
628
628
  }
@@ -631,7 +631,7 @@ std::string LRUCacheShard::GetPrintableOptions() const {
631
631
  const int kBufferSize = 200;
632
632
  char buffer[kBufferSize];
633
633
  {
634
- MutexLock l(&mutex_);
634
+ DMutexLock l(mutex_);
635
635
  snprintf(buffer, kBufferSize, " high_pri_pool_ratio: %.3lf\n",
636
636
  high_pri_pool_ratio_);
637
637
  }
@@ -17,6 +17,7 @@
17
17
  #include "port/port.h"
18
18
  #include "rocksdb/secondary_cache.h"
19
19
  #include "util/autovector.h"
20
+ #include "util/distributed_mutex.h"
20
21
 
21
22
  namespace ROCKSDB_NAMESPACE {
22
23
  namespace lru_cache {
@@ -453,7 +454,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
453
454
  // mutex_ protects the following state.
454
455
  // We don't count mutex_ as the cache's internal state so semantically we
455
456
  // don't mind mutex_ invoking the non-const actions.
456
- mutable port::Mutex mutex_;
457
+ mutable DMutex mutex_;
457
458
 
458
459
  std::shared_ptr<SecondaryCache> secondary_cache_;
459
460
  };