@nxtedition/rocksdb 7.0.23 → 7.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/binding.cc +3 -1
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +5 -0
  3. package/deps/rocksdb/rocksdb/Makefile +6 -2
  4. package/deps/rocksdb/rocksdb/TARGETS +14 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +4 -1
  6. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +20 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +2 -2
  8. package/deps/rocksdb/rocksdb/cache/cache_test.cc +44 -31
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +491 -722
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.h +468 -2
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +51 -52
  13. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +28 -16
  14. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +12 -1
  15. package/deps/rocksdb/rocksdb/cache/lru_cache.h +1 -0
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +170 -36
  17. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +63 -36
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +4 -6
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +57 -38
  21. package/deps/rocksdb/rocksdb/db/blob/blob_read_request.h +58 -0
  22. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +164 -74
  23. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +42 -29
  24. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +419 -62
  25. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +208 -8
  26. package/deps/rocksdb/rocksdb/db/c.cc +68 -0
  27. package/deps/rocksdb/rocksdb/db/c_test.c +95 -2
  28. package/deps/rocksdb/rocksdb/db/column_family.cc +12 -3
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +92 -15
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +76 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +52 -1
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +30 -1
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +126 -0
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +203 -1584
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +93 -26
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +87 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +314 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +328 -0
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +4 -1
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +7 -3
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +174 -33
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +474 -7
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +825 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +46 -0
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +42 -0
  48. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +223 -0
  49. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +255 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +1253 -0
  51. package/deps/rocksdb/rocksdb/db/corruption_test.cc +32 -8
  52. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3 -1
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -8
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +376 -0
  55. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +103 -78
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +4 -6
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -3
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +21 -6
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +19 -1
  61. package/deps/rocksdb/rocksdb/db/db_iter.cc +91 -14
  62. package/deps/rocksdb/rocksdb/db/db_iter.h +5 -0
  63. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +33 -0
  64. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +79 -0
  65. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +2 -0
  66. package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
  67. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +5 -2
  68. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +185 -0
  69. package/deps/rocksdb/rocksdb/db/dbformat.cc +1 -4
  70. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -8
  71. package/deps/rocksdb/rocksdb/db/internal_stats.cc +71 -29
  72. package/deps/rocksdb/rocksdb/db/internal_stats.h +160 -5
  73. package/deps/rocksdb/rocksdb/db/log_reader.cc +29 -3
  74. package/deps/rocksdb/rocksdb/db/log_reader.h +12 -3
  75. package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -3
  76. package/deps/rocksdb/rocksdb/db/version_edit.cc +6 -0
  77. package/deps/rocksdb/rocksdb/db/version_set.cc +93 -129
  78. package/deps/rocksdb/rocksdb/db/version_set.h +4 -4
  79. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
  80. package/deps/rocksdb/rocksdb/db/version_set_test.cc +42 -35
  81. package/deps/rocksdb/rocksdb/db/write_batch.cc +10 -2
  82. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +10 -4
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -3
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -2
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +5 -1
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +140 -8
  89. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +12 -0
  90. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +46 -7
  91. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +7 -0
  92. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +27 -7
  93. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +8 -0
  94. package/deps/rocksdb/rocksdb/env/env_posix.cc +14 -0
  95. package/deps/rocksdb/rocksdb/env/env_test.cc +130 -1
  96. package/deps/rocksdb/rocksdb/env/fs_posix.cc +7 -1
  97. package/deps/rocksdb/rocksdb/env/io_posix.cc +18 -50
  98. package/deps/rocksdb/rocksdb/env/io_posix.h +53 -6
  99. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +8 -10
  100. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +3 -7
  101. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +239 -259
  102. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +84 -19
  103. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +24 -4
  104. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +1 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +31 -1
  106. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +11 -7
  107. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +2 -0
  108. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +14 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +20 -0
  110. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +37 -13
  111. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +7 -0
  112. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +14 -0
  113. package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +9 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +13 -13
  115. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -2
  116. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +38 -0
  117. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +7 -1
  118. package/deps/rocksdb/rocksdb/port/win/env_win.cc +17 -0
  119. package/deps/rocksdb/rocksdb/port/win/env_win.h +8 -0
  120. package/deps/rocksdb/rocksdb/port/win/io_win.cc +6 -3
  121. package/deps/rocksdb/rocksdb/src.mk +5 -0
  122. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -2
  123. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
  124. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +5 -2
  125. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
  126. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +15 -12
  127. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +5 -4
  128. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +2 -1
  129. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1 -1
  130. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
  131. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -2
  132. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -0
  133. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -2
  134. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +24 -4
  135. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
  136. package/deps/rocksdb/rocksdb/util/compression.h +2 -0
  137. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +18 -1
  138. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +67 -4
  139. package/deps/rocksdb/rocksdb/util/threadpool_imp.h +8 -0
  140. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +15 -12
  141. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -2
  142. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +1 -1
  143. package/deps/rocksdb/rocksdb.gyp +5 -1
  144. package/package.json +1 -1
  145. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  146. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -9,8 +9,474 @@
9
9
 
10
10
  #pragma once
11
11
 
12
+ #include <array>
13
+ #include <memory>
14
+ #include <string>
15
+
16
+ #include "cache/cache_key.h"
17
+ #include "cache/sharded_cache.h"
18
+ #include "port/lang.h"
19
+ #include "port/malloc.h"
20
+ #include "port/port.h"
12
21
  #include "rocksdb/cache.h"
22
+ #include "rocksdb/secondary_cache.h"
23
+ #include "util/autovector.h"
24
+ #include "util/distributed_mutex.h"
25
+
26
+ namespace ROCKSDB_NAMESPACE {
27
+
28
+ namespace clock_cache {
29
+
30
+ // Clock cache implementation. This is based on FastLRUCache's open-addressed
31
+ // hash table. Importantly, it stores elements in an array, and resolves
32
+ // collision using a probing strategy. Visibility and referenceability of
33
+ // elements works as usual. See fast_lru_cache.h for a detailed description.
34
+ //
35
+ // The main difference with FastLRUCache is, not surprisingly, the eviction
36
+ // algorithm
37
+ // ---instead of an LRU list, we maintain a circular list with the elements
38
+ // available for eviction, which the clock algorithm traverses to pick the next
39
+ // victim. The clock list is represented using the array of handles, and we
40
+ // simply mark those elements that are present in the list. This is done using
41
+ // different clock flags, namely NONE, LOW, MEDIUM, HIGH, that represent
42
+ // priorities: NONE means that the element is not part of the clock list, and
43
+ // LOW to HIGH represent how close an element is from being evictable (LOW being
44
+ // immediately evictable). When the clock pointer steps on an element that is
45
+ // not immediately evictable, it decreases its priority.
46
+
47
+ constexpr double kLoadFactor = 0.35; // See fast_lru_cache.h.
48
+
49
+ constexpr double kStrictLoadFactor = 0.7; // See fast_lru_cache.h.
50
+
51
+ // Arbitrary seeds.
52
+ constexpr uint32_t kProbingSeed1 = 0xbc9f1d34;
53
+ constexpr uint32_t kProbingSeed2 = 0x7a2bb9d5;
54
+
55
+ // An experimental (under development!) alternative to LRUCache
56
+
57
+ struct ClockHandle {
58
+ void* value;
59
+ Cache::DeleterFn deleter;
60
+ uint32_t hash;
61
+ size_t total_charge; // TODO(opt): Only allow uint32_t?
62
+ // The number of external refs to this entry.
63
+ uint32_t refs;
64
+
65
+ static constexpr int kIsVisibleOffset = 0;
66
+ static constexpr int kIsElementOffset = 1;
67
+ static constexpr int kClockPriorityOffset = 2;
68
+ static constexpr int kIsHitOffset = 4;
69
+ static constexpr int kCachePriorityOffset = 5;
70
+
71
+ enum Flags : uint8_t {
72
+ // Whether the handle is visible to Lookups.
73
+ IS_VISIBLE = (1 << kIsVisibleOffset),
74
+ // Whether the slot is in use by an element.
75
+ IS_ELEMENT = (1 << kIsElementOffset),
76
+ // Clock priorities. Represents how close a handle is from
77
+ // being evictable.
78
+ CLOCK_PRIORITY = (3 << kClockPriorityOffset),
79
+ // Whether the handle has been looked up after its insertion.
80
+ HAS_HIT = (1 << kIsHitOffset),
81
+ CACHE_PRIORITY = (1 << kCachePriorityOffset),
82
+ };
83
+ uint8_t flags;
84
+
85
+ enum ClockPriority : uint8_t {
86
+ NONE = (0 << kClockPriorityOffset), // Not an element in the eyes of clock.
87
+ LOW = (1 << kClockPriorityOffset), // Immediately evictable.
88
+ MEDIUM = (2 << kClockPriorityOffset),
89
+ HIGH = (3 << kClockPriorityOffset)
90
+ // Priority is NONE if and only if
91
+ // (i) the handle is not an element, or
92
+ // (ii) the handle is an element but it is being referenced.
93
+ };
94
+
95
+ // The number of elements that hash to this slot or a lower one,
96
+ // but wind up in a higher slot.
97
+ uint32_t displacements;
98
+
99
+ std::array<char, kCacheKeySize> key_data;
100
+
101
+ ClockHandle() {
102
+ value = nullptr;
103
+ deleter = nullptr;
104
+ hash = 0;
105
+ total_charge = 0;
106
+ refs = 0;
107
+ flags = 0;
108
+ SetIsVisible(false);
109
+ SetIsElement(false);
110
+ SetClockPriority(ClockPriority::NONE);
111
+ SetCachePriority(Cache::Priority::LOW);
112
+ displacements = 0;
113
+ key_data.fill(0);
114
+ }
115
+
116
+ Slice key() const { return Slice(key_data.data(), kCacheKeySize); }
117
+
118
+ // Increase the reference count by 1.
119
+ void Ref() { refs++; }
120
+
121
+ // Just reduce the reference count by 1. Return true if it was last reference.
122
+ bool Unref() {
123
+ assert(refs > 0);
124
+ refs--;
125
+ return refs == 0;
126
+ }
127
+
128
+ // Return true if there are external refs, false otherwise.
129
+ bool HasRefs() const { return refs > 0; }
130
+
131
+ bool IsVisible() const { return flags & IS_VISIBLE; }
132
+
133
+ void SetIsVisible(bool is_visible) {
134
+ if (is_visible) {
135
+ flags |= IS_VISIBLE;
136
+ } else {
137
+ flags &= ~IS_VISIBLE;
138
+ }
139
+ }
140
+
141
+ bool IsElement() const { return flags & IS_ELEMENT; }
142
+
143
+ void SetIsElement(bool is_element) {
144
+ if (is_element) {
145
+ flags |= IS_ELEMENT;
146
+ } else {
147
+ flags &= ~IS_ELEMENT;
148
+ }
149
+ }
150
+
151
+ bool HasHit() const { return flags & HAS_HIT; }
152
+
153
+ void SetHit() { flags |= HAS_HIT; }
154
+
155
+ bool IsInClockList() const {
156
+ return GetClockPriority() != ClockHandle::ClockPriority::NONE;
157
+ }
158
+
159
+ Cache::Priority GetCachePriority() const {
160
+ return static_cast<Cache::Priority>(flags & CACHE_PRIORITY);
161
+ }
162
+
163
+ void SetCachePriority(Cache::Priority priority) {
164
+ if (priority == Cache::Priority::HIGH) {
165
+ flags |= Flags::CACHE_PRIORITY;
166
+ } else {
167
+ flags &= ~Flags::CACHE_PRIORITY;
168
+ }
169
+ }
170
+
171
+ ClockPriority GetClockPriority() const {
172
+ return static_cast<ClockPriority>(flags & Flags::CLOCK_PRIORITY);
173
+ }
174
+
175
+ void SetClockPriority(ClockPriority priority) {
176
+ flags &= ~Flags::CLOCK_PRIORITY;
177
+ flags |= priority;
178
+ }
179
+
180
+ void DecreaseClockPriority() {
181
+ uint8_t p = static_cast<uint8_t>(flags & Flags::CLOCK_PRIORITY) >>
182
+ kClockPriorityOffset;
183
+ assert(p > 0);
184
+ p--;
185
+ flags &= ~Flags::CLOCK_PRIORITY;
186
+ ClockPriority new_priority =
187
+ static_cast<ClockPriority>(p << kClockPriorityOffset);
188
+ flags |= new_priority;
189
+ }
190
+
191
+ void FreeData() {
192
+ assert(refs == 0);
193
+ if (deleter) {
194
+ (*deleter)(key(), value);
195
+ }
196
+ }
197
+
198
+ // Calculate the memory usage by metadata.
199
+ inline size_t CalcMetaCharge(
200
+ CacheMetadataChargePolicy metadata_charge_policy) const {
201
+ if (metadata_charge_policy != kFullChargeCacheMetadata) {
202
+ return 0;
203
+ } else {
204
+ // #ifdef ROCKSDB_MALLOC_USABLE_SIZE
205
+ // return malloc_usable_size(
206
+ // const_cast<void*>(static_cast<const void*>(this)));
207
+ // #else
208
+ // TODO(Guido) malloc_usable_size only works when we call it on
209
+ // a pointer allocated with malloc. Because our handles are all
210
+ // allocated in a single shot as an array, the user can't call
211
+ // CalcMetaCharge (or CalcTotalCharge or GetCharge) on a handle
212
+ // pointer returned by the cache. Moreover, malloc_usable_size
213
+ // expects a heap-allocated handle, but sometimes in our code we
214
+ // wish to pass a stack-allocated handle (this is only a performance
215
+ // concern).
216
+ // What is the right way to compute metadata charges with pre-allocated
217
+ // handles?
218
+ return sizeof(ClockHandle);
219
+ // #endif
220
+ }
221
+ }
222
+
223
+ inline void CalcTotalCharge(
224
+ size_t charge, CacheMetadataChargePolicy metadata_charge_policy) {
225
+ total_charge = charge + CalcMetaCharge(metadata_charge_policy);
226
+ }
227
+
228
+ inline size_t GetCharge(
229
+ CacheMetadataChargePolicy metadata_charge_policy) const {
230
+ size_t meta_charge = CalcMetaCharge(metadata_charge_policy);
231
+ assert(total_charge >= meta_charge);
232
+ return total_charge - meta_charge;
233
+ }
234
+
235
+ inline bool IsEmpty() {
236
+ return !this->IsElement() && this->displacements == 0;
237
+ }
238
+
239
+ inline bool IsTombstone() {
240
+ return !this->IsElement() && this->displacements > 0;
241
+ }
242
+
243
+ inline bool Matches(const Slice& some_key) {
244
+ return this->IsElement() && this->key() == some_key;
245
+ }
246
+ }; // struct ClockHandle
247
+
248
+ class ClockHandleTable {
249
+ public:
250
+ explicit ClockHandleTable(int hash_bits);
251
+ ~ClockHandleTable();
252
+
253
+ // Returns a pointer to a visible element matching the key/hash, or
254
+ // nullptr if not present.
255
+ ClockHandle* Lookup(const Slice& key);
256
+
257
+ // Inserts a copy of h into the hash table.
258
+ // Returns a pointer to the inserted handle, or nullptr if no slot
259
+ // available was found. If an existing visible element matching the
260
+ // key/hash is already present in the hash table, the argument old
261
+ // is set to pointe to it; otherwise, it's set to nullptr.
262
+ ClockHandle* Insert(ClockHandle* h, ClockHandle** old);
263
+
264
+ // Removes h from the hash table. The handle must already be off
265
+ // the clock list.
266
+ void Remove(ClockHandle* h);
267
+
268
+ // Turns a visible element h into a ghost (i.e., not visible).
269
+ void Exclude(ClockHandle* h);
13
270
 
14
- #if defined(TBB) && !defined(ROCKSDB_LITE)
15
- #define SUPPORT_CLOCK_CACHE
271
+ // Assigns a copy of h to the given slot.
272
+ void Assign(int slot, ClockHandle* h);
273
+
274
+ template <typename T>
275
+ void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
276
+ for (uint32_t i = index_begin; i < index_end; i++) {
277
+ ClockHandle* h = &array_[i];
278
+ if (h->IsVisible()) {
279
+ func(h);
280
+ }
281
+ }
282
+ }
283
+
284
+ uint32_t GetTableSize() const { return uint32_t{1} << length_bits_; }
285
+
286
+ int GetLengthBits() const { return length_bits_; }
287
+
288
+ uint32_t GetOccupancyLimit() const { return occupancy_limit_; }
289
+
290
+ uint32_t GetOccupancy() const { return occupancy_; }
291
+
292
+ // Returns x mod 2^{length_bits_}.
293
+ uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; }
294
+
295
+ private:
296
+ friend class ClockCacheShard;
297
+
298
+ int FindVisibleElement(const Slice& key, int& probe, int displacement);
299
+
300
+ int FindAvailableSlot(const Slice& key, int& probe, int displacement);
301
+
302
+ int FindVisibleElementOrAvailableSlot(const Slice& key, int& probe,
303
+ int displacement);
304
+
305
+ // Returns the index of the first slot probed (hashing with
306
+ // the given key) with a handle e such that cond(e) is true.
307
+ // Otherwise, if no match is found, returns -1.
308
+ // For every handle e probed except the final slot, updates
309
+ // e->displacements += displacement.
310
+ // The argument probe is modified such that consecutive calls
311
+ // to FindSlot continue probing right after where the previous
312
+ // call left.
313
+ int FindSlot(const Slice& key, std::function<bool(ClockHandle*)> cond,
314
+ int& probe, int displacement);
315
+
316
+ // Number of hash bits used for table index.
317
+ // The size of the table is 1 << length_bits_.
318
+ int length_bits_;
319
+
320
+ const uint32_t length_bits_mask_;
321
+
322
+ // Number of elements in the table.
323
+ uint32_t occupancy_;
324
+
325
+ // Maximum number of elements the user can store in the table.
326
+ uint32_t occupancy_limit_;
327
+
328
+ std::unique_ptr<ClockHandle[]> array_;
329
+ }; // class ClockHandleTable
330
+
331
+ // A single shard of sharded cache.
332
+ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
333
+ public:
334
+ ClockCacheShard(size_t capacity, size_t estimated_value_size,
335
+ bool strict_capacity_limit,
336
+ CacheMetadataChargePolicy metadata_charge_policy);
337
+ ~ClockCacheShard() override = default;
338
+
339
+ // Separate from constructor so caller can easily make an array of ClockCache
340
+ // if current usage is more than new capacity, the function will attempt to
341
+ // free the needed space.
342
+ void SetCapacity(size_t capacity) override;
343
+
344
+ // Set the flag to reject insertion if cache if full.
345
+ void SetStrictCapacityLimit(bool strict_capacity_limit) override;
346
+
347
+ // Like Cache methods, but with an extra "hash" parameter.
348
+ // Insert an item into the hash table and, if handle is null, insert into
349
+ // the clock list. Older items are evicted as necessary. If the cache is full
350
+ // and free_handle_on_fail is true, the item is deleted and handle is set to
351
+ // nullptr.
352
+ Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
353
+ Cache::DeleterFn deleter, Cache::Handle** handle,
354
+ Cache::Priority priority) override;
355
+
356
+ Status Insert(const Slice& key, uint32_t hash, void* value,
357
+ const Cache::CacheItemHelper* helper, size_t charge,
358
+ Cache::Handle** handle, Cache::Priority priority) override {
359
+ return Insert(key, hash, value, charge, helper->del_cb, handle, priority);
360
+ }
361
+
362
+ Cache::Handle* Lookup(const Slice& key, uint32_t hash,
363
+ const Cache::CacheItemHelper* /*helper*/,
364
+ const Cache::CreateCallback& /*create_cb*/,
365
+ Cache::Priority /*priority*/, bool /*wait*/,
366
+ Statistics* /*stats*/) override {
367
+ return Lookup(key, hash);
368
+ }
369
+ Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
370
+
371
+ bool Release(Cache::Handle* handle, bool /*useful*/,
372
+ bool erase_if_last_ref) override {
373
+ return Release(handle, erase_if_last_ref);
374
+ }
375
+ bool IsReady(Cache::Handle* /*handle*/) override { return true; }
376
+ void Wait(Cache::Handle* /*handle*/) override {}
377
+
378
+ bool Ref(Cache::Handle* handle) override;
379
+ bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
380
+ void Erase(const Slice& key, uint32_t hash) override;
381
+
382
+ size_t GetUsage() const override;
383
+ size_t GetPinnedUsage() const override;
384
+
385
+ void ApplyToSomeEntries(
386
+ const std::function<void(const Slice& key, void* value, size_t charge,
387
+ DeleterFn deleter)>& callback,
388
+ uint32_t average_entries_per_lock, uint32_t* state) override;
389
+
390
+ void EraseUnRefEntries() override;
391
+
392
+ std::string GetPrintableOptions() const override;
393
+
394
+ private:
395
+ friend class ClockCache;
396
+ void ClockRemove(ClockHandle* e);
397
+ void ClockInsert(ClockHandle* e);
398
+
399
+ // Free some space following strict clock policy until enough space
400
+ // to hold (usage_ + charge) is freed or the clock list is empty
401
+ // This function is not thread safe - it needs to be executed while
402
+ // holding the mutex_.
403
+ void EvictFromClock(size_t charge, autovector<ClockHandle>* deleted);
404
+
405
+ // Returns the charge of a single handle.
406
+ static size_t CalcEstimatedHandleCharge(
407
+ size_t estimated_value_size,
408
+ CacheMetadataChargePolicy metadata_charge_policy);
409
+
410
+ // Returns the number of bits used to hash an element in the hash
411
+ // table.
412
+ static int CalcHashBits(size_t capacity, size_t estimated_value_size,
413
+ CacheMetadataChargePolicy metadata_charge_policy);
414
+
415
+ // Initialized before use.
416
+ size_t capacity_;
417
+
418
+ // Whether to reject insertion if cache reaches its full capacity.
419
+ bool strict_capacity_limit_;
420
+
421
+ uint32_t clock_pointer_;
422
+
423
+ // ------------^^^^^^^^^^^^^-----------
424
+ // Not frequently modified data members
425
+ // ------------------------------------
426
+ //
427
+ // We separate data members that are updated frequently from the ones that
428
+ // are not frequently updated so that they don't share the same cache line
429
+ // which will lead into false cache sharing
430
+ //
431
+ // ------------------------------------
432
+ // Frequently modified data members
433
+ // ------------vvvvvvvvvvvvv-----------
434
+ ClockHandleTable table_;
435
+
436
+ // Memory size for entries residing in the cache.
437
+ size_t usage_;
438
+
439
+ // Memory size for unpinned entries in the clock list.
440
+ size_t clock_usage_;
441
+
442
+ // mutex_ protects the following state.
443
+ // We don't count mutex_ as the cache's internal state so semantically we
444
+ // don't mind mutex_ invoking the non-const actions.
445
+ mutable DMutex mutex_;
446
+ }; // class ClockCacheShard
447
+
448
+ class ClockCache
449
+ #ifdef NDEBUG
450
+ final
16
451
  #endif
452
+ : public ShardedCache {
453
+ public:
454
+ ClockCache(size_t capacity, size_t estimated_value_size, int num_shard_bits,
455
+ bool strict_capacity_limit,
456
+ CacheMetadataChargePolicy metadata_charge_policy =
457
+ kDontChargeCacheMetadata);
458
+ ~ClockCache() override;
459
+ const char* Name() const override { return "ClockCache"; }
460
+ CacheShard* GetShard(uint32_t shard) override;
461
+ const CacheShard* GetShard(uint32_t shard) const override;
462
+ void* Value(Handle* handle) override;
463
+ size_t GetCharge(Handle* handle) const override;
464
+ uint32_t GetHash(Handle* handle) const override;
465
+ DeleterFn GetDeleter(Handle* handle) const override;
466
+ void DisownData() override;
467
+
468
+ private:
469
+ ClockCacheShard* shards_ = nullptr;
470
+ int num_shards_ = 0;
471
+ }; // class ClockCache
472
+
473
+ } // namespace clock_cache
474
+
475
+ // Only for internal testing, temporarily replacing NewClockCache.
476
+ // TODO(Guido) Remove once NewClockCache constructs a ClockCache again.
477
+ extern std::shared_ptr<Cache> ExperimentalNewClockCache(
478
+ size_t capacity, size_t estimated_value_size, int num_shard_bits,
479
+ bool strict_capacity_limit,
480
+ CacheMetadataChargePolicy metadata_charge_policy);
481
+
482
+ } // namespace ROCKSDB_NAMESPACE
@@ -139,7 +139,7 @@ std::string CompressedSecondaryCache::GetPrintableOptions() const {
139
139
  snprintf(buffer, kBufferSize, " compression_type : %s\n",
140
140
  CompressionTypeToString(cache_options_.compression_type).c_str());
141
141
  ret.append(buffer);
142
- snprintf(buffer, kBufferSize, " compression_type : %d\n",
142
+ snprintf(buffer, kBufferSize, " compress_format_version : %d\n",
143
143
  cache_options_.compress_format_version);
144
144
  ret.append(buffer);
145
145
  return ret;
@@ -9,8 +9,6 @@
9
9
 
10
10
  #include "cache/fast_lru_cache.h"
11
11
 
12
- #include <math.h>
13
-
14
12
  #include <cassert>
15
13
  #include <cstdint>
16
14
  #include <cstdio>
@@ -21,39 +19,25 @@
21
19
  #include "port/lang.h"
22
20
  #include "util/distributed_mutex.h"
23
21
  #include "util/hash.h"
22
+ #include "util/math.h"
24
23
  #include "util/random.h"
25
24
 
26
25
  namespace ROCKSDB_NAMESPACE {
27
26
 
28
27
  namespace fast_lru_cache {
29
28
 
30
- namespace {
31
- // Returns x % 2^{bits}.
32
- inline uint32_t BinaryMod(uint32_t x, uint8_t bits) {
33
- assert(bits <= 32);
34
- return (x << (32 - bits)) >> (32 - bits);
35
- }
36
- } // anonymous namespace
37
-
38
- LRUHandleTable::LRUHandleTable(uint8_t hash_bits)
29
+ LRUHandleTable::LRUHandleTable(int hash_bits)
39
30
  : length_bits_(hash_bits),
31
+ length_bits_mask_((uint32_t{1} << length_bits_) - 1),
40
32
  occupancy_(0),
33
+ occupancy_limit_(static_cast<uint32_t>((uint32_t{1} << length_bits_) *
34
+ kStrictLoadFactor)),
41
35
  array_(new LRUHandle[size_t{1} << length_bits_]) {
42
36
  assert(hash_bits <= 32);
43
37
  }
44
38
 
45
39
  LRUHandleTable::~LRUHandleTable() {
46
- // TODO(Guido) If users still hold references to handles,
47
- // those will become invalidated. And if we choose not to
48
- // delete the data, it will become leaked.
49
- ApplyToEntriesRange(
50
- [](LRUHandle* h) {
51
- // TODO(Guido) Remove the HasRefs() check?
52
- if (!h->HasRefs()) {
53
- h->FreeData();
54
- }
55
- },
56
- 0, uint32_t{1} << length_bits_);
40
+ ApplyToEntriesRange([](LRUHandle* h) { h->FreeData(); }, 0, GetTableSize());
57
41
  }
58
42
 
59
43
  LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
@@ -161,11 +145,10 @@ int LRUHandleTable::FindVisibleElementOrAvailableSlot(const Slice& key,
161
145
  inline int LRUHandleTable::FindSlot(const Slice& key,
162
146
  std::function<bool(LRUHandle*)> cond,
163
147
  int& probe, int displacement) {
164
- uint32_t base =
165
- BinaryMod(Hash(key.data(), key.size(), kProbingSeed1), length_bits_);
166
- uint32_t increment = BinaryMod(
167
- (Hash(key.data(), key.size(), kProbingSeed2) << 1) | 1, length_bits_);
168
- uint32_t current = BinaryMod(base + probe * increment, length_bits_);
148
+ uint32_t base = ModTableSize(Hash(key.data(), key.size(), kProbingSeed1));
149
+ uint32_t increment =
150
+ ModTableSize((Hash(key.data(), key.size(), kProbingSeed2) << 1) | 1);
151
+ uint32_t current = ModTableSize(base + probe * increment);
169
152
  while (true) {
170
153
  LRUHandle* h = &array_[current];
171
154
  probe++;
@@ -182,7 +165,7 @@ inline int LRUHandleTable::FindSlot(const Slice& key,
182
165
  return -1;
183
166
  }
184
167
  h->displacements += displacement;
185
- current = BinaryMod(current + increment, length_bits_);
168
+ current = ModTableSize(current + increment);
186
169
  }
187
170
  }
188
171
 
@@ -233,7 +216,7 @@ void LRUCacheShard::ApplyToSomeEntries(
233
216
  // hash bits for table indexes.
234
217
  DMutexLock l(mutex_);
235
218
  uint32_t length_bits = table_.GetLengthBits();
236
- uint32_t length = uint32_t{1} << length_bits;
219
+ uint32_t length = table_.GetTableSize();
237
220
 
238
221
  assert(average_entries_per_lock > 0);
239
222
  // Assuming we are called with same average_entries_per_lock repeatedly,
@@ -302,22 +285,16 @@ size_t LRUCacheShard::CalcEstimatedHandleCharge(
302
285
  return h.total_charge;
303
286
  }
304
287
 
305
- uint8_t LRUCacheShard::CalcHashBits(
288
+ int LRUCacheShard::CalcHashBits(
306
289
  size_t capacity, size_t estimated_value_size,
307
290
  CacheMetadataChargePolicy metadata_charge_policy) {
308
291
  size_t handle_charge =
309
292
  CalcEstimatedHandleCharge(estimated_value_size, metadata_charge_policy);
310
- size_t num_entries =
311
- static_cast<size_t>(capacity / (kLoadFactor * handle_charge));
312
-
313
- // Compute the ceiling of log2(num_entries). If num_entries == 0, return 0.
314
- uint8_t num_hash_bits = 0;
315
- size_t num_entries_copy = num_entries;
316
- while (num_entries_copy >>= 1) {
317
- ++num_hash_bits;
318
- }
319
- num_hash_bits += size_t{1} << num_hash_bits < num_entries ? 1 : 0;
320
- return num_hash_bits;
293
+ assert(handle_charge > 0);
294
+ uint32_t num_entries =
295
+ static_cast<uint32_t>(capacity / (kLoadFactor * handle_charge)) + 1;
296
+ assert(num_entries <= uint32_t{1} << 31);
297
+ return FloorLog2((num_entries << 1) - 1);
321
298
  }
322
299
 
323
300
  void LRUCacheShard::SetCapacity(size_t capacity) {
@@ -362,33 +339,52 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
362
339
  autovector<LRUHandle> last_reference_list;
363
340
  {
364
341
  DMutexLock l(mutex_);
342
+ assert(table_.GetOccupancy() <= table_.GetOccupancyLimit());
365
343
 
366
344
  // Free the space following strict LRU policy until enough space
367
345
  // is freed or the lru list is empty.
368
346
  EvictFromLRU(tmp.total_charge, &last_reference_list);
369
347
  if ((usage_ + tmp.total_charge > capacity_ &&
370
348
  (strict_capacity_limit_ || handle == nullptr)) ||
371
- table_.GetOccupancy() == size_t{1} << table_.GetLengthBits()) {
372
- // Originally, when strict_capacity_limit_ == false and handle != nullptr
373
- // (i.e., the user wants to immediately get a reference to the new
374
- // handle), the insertion would proceed even if the total charge already
375
- // exceeds capacity. We can't do this now, because we can't physically
376
- // insert a new handle when the table is at maximum occupancy.
349
+ table_.GetOccupancy() == table_.GetOccupancyLimit()) {
350
+ // There are two measures of capacity:
351
+ // - Space (or charge) capacity: The maximum possible sum of the charges
352
+ // of the elements.
353
+ // - Table capacity: The number of slots in the hash table.
354
+ // These are incomparable, in the sense that one doesn't imply the other.
355
+ // Typically we will reach space capacity before table capacity---
356
+ // if the user always inserts values with size equal to
357
+ // estimated_value_size, then at most a kLoadFactor fraction of slots
358
+ // will ever be occupied. But in some cases we may reach table capacity
359
+ // before space capacity---if the user initially claims a very large
360
+ // estimated_value_size but then inserts tiny values, more elements than
361
+ // initially estimated will be inserted.
362
+
377
363
  // TODO(Guido) Some tests (at least two from cache_test, as well as the
378
- // stress tests) currently assume the old behavior.
364
+ // stress tests) currently assume the table capacity is unbounded.
379
365
  if (handle == nullptr) {
380
366
  // Don't insert the entry but still return ok, as if the entry inserted
381
367
  // into cache and get evicted immediately.
382
368
  last_reference_list.push_back(tmp);
383
369
  } else {
384
- s = Status::Incomplete("Insert failed due to LRU cache being full.");
370
+ if (table_.GetOccupancy() == table_.GetOccupancyLimit()) {
371
+ // TODO: Consider using a distinct status for this case, but usually
372
+ // it will be handled the same way as reaching charge capacity limit
373
+ s = Status::MemoryLimit(
374
+ "Insert failed because all slots in the hash table are full.");
375
+ } else {
376
+ s = Status::MemoryLimit(
377
+ "Insert failed because the total charge has exceeded the "
378
+ "capacity.");
379
+ }
385
380
  }
386
381
  } else {
387
382
  // Insert into the cache. Note that the cache might get larger than its
388
383
  // capacity if not enough space was freed up.
389
384
  LRUHandle* old;
390
385
  LRUHandle* h = table_.Insert(&tmp, &old);
391
- assert(h != nullptr); // Insertions should never fail.
386
+ assert(h != nullptr); // We're below occupancy, so this insertion should
387
+ // never fail.
392
388
  usage_ += h->total_charge;
393
389
  if (old != nullptr) {
394
390
  s = Status::OkOverwritten();
@@ -431,7 +427,8 @@ Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
431
427
  if (h != nullptr) {
432
428
  assert(h->IsVisible());
433
429
  if (!h->HasRefs()) {
434
- // The entry is in LRU since it's in hash and has no external references
430
+ // The entry is in LRU since it's in hash and has no external
431
+ // references.
435
432
  LRU_Remove(h);
436
433
  }
437
434
  h->Ref();
@@ -497,7 +494,7 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
497
494
  table_.Exclude(h);
498
495
  if (!h->HasRefs()) {
499
496
  // The entry is in LRU since it's in cache and has no external
500
- // references
497
+ // references.
501
498
  LRU_Remove(h);
502
499
  table_.Remove(h);
503
500
  assert(usage_ >= h->total_charge);
@@ -531,6 +528,8 @@ LRUCache::LRUCache(size_t capacity, size_t estimated_value_size,
531
528
  int num_shard_bits, bool strict_capacity_limit,
532
529
  CacheMetadataChargePolicy metadata_charge_policy)
533
530
  : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
531
+ assert(estimated_value_size > 0 ||
532
+ metadata_charge_policy != kDontChargeCacheMetadata);
534
533
  num_shards_ = 1 << num_shard_bits;
535
534
  shards_ = reinterpret_cast<LRUCacheShard*>(
536
535
  port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));