@nxtedition/rocksdb 7.0.4 → 7.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +320 -324
- package/chained-batch.js +6 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +8 -3
- package/deps/rocksdb/rocksdb/Makefile +10 -4
- package/deps/rocksdb/rocksdb/TARGETS +6 -4
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +9 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +14 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +8 -8
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +272 -174
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +201 -57
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +19 -19
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +170 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +95 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +298 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +172 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +8 -3
- package/deps/rocksdb/rocksdb/db/column_family.h +6 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +10 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +38 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -5
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +4 -7
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +74 -71
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +70 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +13 -12
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +36 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +139 -91
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +48 -14
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +90 -55
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +9 -4
- package/deps/rocksdb/rocksdb/db/db_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +12 -7
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +35 -0
- package/deps/rocksdb/rocksdb/db/dbformat.cc +3 -1
- package/deps/rocksdb/rocksdb/db/dbformat.h +5 -3
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/memtable.cc +1 -0
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/repair.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +43 -1
- package/deps/rocksdb/rocksdb/db/version_edit.cc +13 -5
- package/deps/rocksdb/rocksdb/db/version_edit.h +22 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +4 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +109 -41
- package/deps/rocksdb/rocksdb/db/version_set.h +36 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -4
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +10 -10
- package/deps/rocksdb/rocksdb/db/version_util.h +1 -1
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/write_batch.cc +34 -10
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +2 -0
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +7 -5
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +5 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +2 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +24 -3
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +10 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +4 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +9 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +0 -3
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +8 -6
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -2
- package/deps/rocksdb/rocksdb/options/options_test.cc +1 -11
- package/deps/rocksdb/rocksdb/port/port_posix.h +7 -0
- package/deps/rocksdb/rocksdb/port/win/port_win.h +11 -3
- package/deps/rocksdb/rocksdb/src.mk +6 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -33
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -118
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +6 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +10 -13
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +4 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -28
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -3
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -91
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -30
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -27
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +11 -13
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -40
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +22 -43
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +11 -22
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +24 -25
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +0 -1
- package/deps/rocksdb/rocksdb/table/get_context.h +0 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +3 -18
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +3 -16
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -3
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +1 -1
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +0 -201
- package/deps/rocksdb/rocksdb/util/distributed_mutex.h +48 -0
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +5 -11
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +7 -21
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +45 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +21 -14
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +10 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +3 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +9 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +3 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +3 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +5 -4
- package/deps/rocksdb/rocksdb.gyp +1 -1
- package/index.js +36 -14
- package/package-lock.json +2 -2
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +0 -358
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +0 -127
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +0 -219
|
@@ -8,44 +8,131 @@
|
|
|
8
8
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
9
9
|
#pragma once
|
|
10
10
|
|
|
11
|
+
#include <array>
|
|
11
12
|
#include <memory>
|
|
12
13
|
#include <string>
|
|
13
14
|
|
|
15
|
+
#include "cache/cache_key.h"
|
|
14
16
|
#include "cache/sharded_cache.h"
|
|
15
17
|
#include "port/lang.h"
|
|
16
18
|
#include "port/malloc.h"
|
|
17
19
|
#include "port/port.h"
|
|
18
20
|
#include "rocksdb/secondary_cache.h"
|
|
19
21
|
#include "util/autovector.h"
|
|
22
|
+
#include "util/distributed_mutex.h"
|
|
20
23
|
|
|
21
24
|
namespace ROCKSDB_NAMESPACE {
|
|
22
25
|
namespace fast_lru_cache {
|
|
23
26
|
|
|
27
|
+
// LRU cache implementation using an open-address hash table.
|
|
28
|
+
|
|
29
|
+
// Every slot in the hash table is an LRUHandle. Because handles can be
|
|
30
|
+
// referenced externally, we can't discard them immediately once they are
|
|
31
|
+
// deleted (via a delete or an LRU eviction) or replaced by a new version
|
|
32
|
+
// (via an insert of the same key). The state of an element is defined by
|
|
33
|
+
// the following two properties:
|
|
34
|
+
// (R) Referenced: An element can be referenced externally (refs > 0), or not.
|
|
35
|
+
// Importantly, an element can be evicted if and only if it's not
|
|
36
|
+
// referenced. In particular, when an element becomes referenced, it's
|
|
37
|
+
// temporarily taken out of the LRU list until all references to it
|
|
38
|
+
// are dropped.
|
|
39
|
+
// (V) Visible: An element can visible for lookups (IS_VISIBLE set), or not.
|
|
40
|
+
// Initially, every element is visible. An element that is not visible is
|
|
41
|
+
// called a ghost.
|
|
42
|
+
// These properties induce 4 different states, with transitions defined as
|
|
43
|
+
// follows:
|
|
44
|
+
// - V --> not V: When a visible element is deleted or replaced by a new
|
|
45
|
+
// version.
|
|
46
|
+
// - Not V --> V: This cannot happen. A ghost remains in that state until it's
|
|
47
|
+
// not referenced any more, at which point it's ready to be removed from the
|
|
48
|
+
// hash table. (A ghost simply waits to transition to the afterlife---it will
|
|
49
|
+
// never be visible again.)
|
|
50
|
+
// - R --> not R: When all references to an element are dropped.
|
|
51
|
+
// - Not R --> R: When an unreferenced element becomes referenced. This can only
|
|
52
|
+
// happen if the element is V, since references to an element can only be
|
|
53
|
+
// created when it's visible.
|
|
54
|
+
|
|
55
|
+
// Internally, the cache uses an open-addressed hash table to index the handles.
|
|
56
|
+
// We use tombstone counters to keep track of displacements.
|
|
57
|
+
// Because of the tombstones and the two possible visibility states of an
|
|
58
|
+
// element, the table slots can be in 4 different states:
|
|
59
|
+
// 1. Visible element (IS_ELEMENT set and IS_VISIBLE set): The slot contains a
|
|
60
|
+
// key-value element.
|
|
61
|
+
// 2. Ghost element (IS_ELEMENT set and IS_VISIBLE unset): The slot contains an
|
|
62
|
+
// element that has been removed, but it's still referenced. It's invisible
|
|
63
|
+
// to lookups.
|
|
64
|
+
// 3. Tombstone (IS_ELEMENT unset and displacements > 0): The slot contains a
|
|
65
|
+
// tombstone.
|
|
66
|
+
// 4. Empty (IS_ELEMENT unset and displacements == 0): The slot is unused.
|
|
67
|
+
// A slot that is an element can further have IS_VISIBLE set or not.
|
|
68
|
+
// When a ghost is removed from the table, it can either transition to being a
|
|
69
|
+
// tombstone or an empty slot, depending on the number of displacements of the
|
|
70
|
+
// slot. In any case, the slot becomes available. When a handle is inserted
|
|
71
|
+
// into that slot, it becomes a visible element again.
|
|
72
|
+
|
|
73
|
+
constexpr uint8_t kCacheKeySize =
|
|
74
|
+
static_cast<uint8_t>(sizeof(ROCKSDB_NAMESPACE::CacheKey));
|
|
75
|
+
|
|
76
|
+
// The load factor p is a real number in (0, 1) such that at all
|
|
77
|
+
// times at most a fraction p of all slots, without counting tombstones,
|
|
78
|
+
// are occupied by elements. This means that the probability that a
|
|
79
|
+
// random probe hits an empty slot is at most p, and thus at most 1/p probes
|
|
80
|
+
// are required on average. We use p = 70%, so between 1 and 2 probes are
|
|
81
|
+
// needed on average.
|
|
82
|
+
// Because the size of the hash table is always rounded up to the next
|
|
83
|
+
// power of 2, p is really an upper bound on the actual load factor---the
|
|
84
|
+
// actual load factor is anywhere between p/2 and p. This is a bit wasteful,
|
|
85
|
+
// but bear in mind that slots only hold metadata, not actual values.
|
|
86
|
+
// Since space cost is dominated by the values (the LSM blocks),
|
|
87
|
+
// overprovisioning the table with metadata only increases the total cache space
|
|
88
|
+
// usage by a tiny fraction.
|
|
89
|
+
constexpr double kLoadFactor = 0.7;
|
|
90
|
+
|
|
91
|
+
// Arbitrary seeds.
|
|
92
|
+
constexpr uint32_t kProbingSeed1 = 0xbc9f1d34;
|
|
93
|
+
constexpr uint32_t kProbingSeed2 = 0x7a2bb9d5;
|
|
94
|
+
|
|
24
95
|
// An experimental (under development!) alternative to LRUCache
|
|
25
96
|
|
|
26
97
|
struct LRUHandle {
|
|
27
98
|
void* value;
|
|
28
99
|
Cache::DeleterFn deleter;
|
|
29
|
-
LRUHandle* next_hash;
|
|
30
100
|
LRUHandle* next;
|
|
31
101
|
LRUHandle* prev;
|
|
32
102
|
size_t total_charge; // TODO(opt): Only allow uint32_t?
|
|
33
|
-
size_t key_length;
|
|
34
103
|
// The hash of key(). Used for fast sharding and comparisons.
|
|
35
104
|
uint32_t hash;
|
|
36
105
|
// The number of external refs to this entry. The cache itself is not counted.
|
|
37
106
|
uint32_t refs;
|
|
38
107
|
|
|
39
108
|
enum Flags : uint8_t {
|
|
40
|
-
// Whether
|
|
41
|
-
|
|
109
|
+
// Whether the handle is visible to Lookups.
|
|
110
|
+
IS_VISIBLE = (1 << 0),
|
|
111
|
+
// Whether the slot is in use by an element.
|
|
112
|
+
IS_ELEMENT = (1 << 1),
|
|
42
113
|
};
|
|
43
114
|
uint8_t flags;
|
|
44
115
|
|
|
45
|
-
//
|
|
46
|
-
|
|
116
|
+
// The number of elements that hash to this slot or a lower one,
|
|
117
|
+
// but wind up in a higher slot.
|
|
118
|
+
uint32_t displacements;
|
|
119
|
+
|
|
120
|
+
std::array<char, kCacheKeySize> key_data;
|
|
121
|
+
|
|
122
|
+
LRUHandle() {
|
|
123
|
+
value = nullptr;
|
|
124
|
+
deleter = nullptr;
|
|
125
|
+
next = nullptr;
|
|
126
|
+
prev = nullptr;
|
|
127
|
+
total_charge = 0;
|
|
128
|
+
hash = 0;
|
|
129
|
+
refs = 0;
|
|
130
|
+
flags = 0;
|
|
131
|
+
displacements = 0;
|
|
132
|
+
key_data.fill(0);
|
|
133
|
+
}
|
|
47
134
|
|
|
48
|
-
Slice key() const { return Slice(key_data,
|
|
135
|
+
Slice key() const { return Slice(key_data.data(), kCacheKeySize); }
|
|
49
136
|
|
|
50
137
|
// Increase the reference count by 1.
|
|
51
138
|
void Ref() { refs++; }
|
|
@@ -60,22 +147,31 @@ struct LRUHandle {
|
|
|
60
147
|
// Return true if there are external refs, false otherwise.
|
|
61
148
|
bool HasRefs() const { return refs > 0; }
|
|
62
149
|
|
|
63
|
-
bool
|
|
150
|
+
bool IsVisible() const { return flags & IS_VISIBLE; }
|
|
64
151
|
|
|
65
|
-
void
|
|
66
|
-
if (
|
|
67
|
-
flags |=
|
|
152
|
+
void SetIsVisible(bool is_visible) {
|
|
153
|
+
if (is_visible) {
|
|
154
|
+
flags |= IS_VISIBLE;
|
|
68
155
|
} else {
|
|
69
|
-
flags &= ~
|
|
156
|
+
flags &= ~IS_VISIBLE;
|
|
70
157
|
}
|
|
71
158
|
}
|
|
72
159
|
|
|
73
|
-
|
|
160
|
+
bool IsElement() const { return flags & IS_ELEMENT; }
|
|
161
|
+
|
|
162
|
+
void SetIsElement(bool is_element) {
|
|
163
|
+
if (is_element) {
|
|
164
|
+
flags |= IS_ELEMENT;
|
|
165
|
+
} else {
|
|
166
|
+
flags &= ~IS_ELEMENT;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
void FreeData() {
|
|
74
171
|
assert(refs == 0);
|
|
75
172
|
if (deleter) {
|
|
76
173
|
(*deleter)(key(), value);
|
|
77
174
|
}
|
|
78
|
-
delete[] reinterpret_cast<char*>(this);
|
|
79
175
|
}
|
|
80
176
|
|
|
81
177
|
// Calculate the memory usage by metadata.
|
|
@@ -84,13 +180,22 @@ struct LRUHandle {
|
|
|
84
180
|
if (metadata_charge_policy != kFullChargeCacheMetadata) {
|
|
85
181
|
return 0;
|
|
86
182
|
} else {
|
|
87
|
-
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
|
88
|
-
return malloc_usable_size(
|
|
89
|
-
|
|
90
|
-
#else
|
|
91
|
-
//
|
|
92
|
-
|
|
93
|
-
|
|
183
|
+
// #ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
|
184
|
+
// return malloc_usable_size(
|
|
185
|
+
// const_cast<void*>(static_cast<const void*>(this)));
|
|
186
|
+
// #else
|
|
187
|
+
// TODO(Guido) malloc_usable_size only works when we call it on
|
|
188
|
+
// a pointer allocated with malloc. Because our handles are all
|
|
189
|
+
// allocated in a single shot as an array, the user can't call
|
|
190
|
+
// CalcMetaCharge (or CalcTotalCharge or GetCharge) on a handle
|
|
191
|
+
// pointer returned by the cache. Moreover, malloc_usable_size
|
|
192
|
+
// expects a heap-allocated handle, but sometimes in our code we
|
|
193
|
+
// wish to pass a stack-allocated handle (this is only a performance
|
|
194
|
+
// concern).
|
|
195
|
+
// What is the right way to compute metadata charges with pre-allocated
|
|
196
|
+
// handles?
|
|
197
|
+
return sizeof(LRUHandle);
|
|
198
|
+
// #endif
|
|
94
199
|
}
|
|
95
200
|
}
|
|
96
201
|
|
|
@@ -105,8 +210,23 @@ struct LRUHandle {
|
|
|
105
210
|
assert(total_charge >= meta_charge);
|
|
106
211
|
return total_charge - meta_charge;
|
|
107
212
|
}
|
|
213
|
+
|
|
214
|
+
inline bool IsEmpty() {
|
|
215
|
+
return !this->IsElement() && this->displacements == 0;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
inline bool IsTombstone() {
|
|
219
|
+
return !this->IsElement() && this->displacements > 0;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
inline bool Matches(const Slice& some_key, uint32_t some_hash) {
|
|
223
|
+
return this->IsElement() && this->hash == some_hash &&
|
|
224
|
+
this->key() == some_key;
|
|
225
|
+
}
|
|
108
226
|
};
|
|
109
227
|
|
|
228
|
+
// TODO(Guido) Update the following comment.
|
|
229
|
+
|
|
110
230
|
// We provide our own simple hash table since it removes a whole bunch
|
|
111
231
|
// of porting hacks and is also faster than some of the built-in hash
|
|
112
232
|
// table implementations in some of the compiler/runtime combinations
|
|
@@ -114,45 +234,72 @@ struct LRUHandle {
|
|
|
114
234
|
// 4.4.3's builtin hashtable.
|
|
115
235
|
class LRUHandleTable {
|
|
116
236
|
public:
|
|
117
|
-
explicit LRUHandleTable(
|
|
237
|
+
explicit LRUHandleTable(uint8_t hash_bits);
|
|
118
238
|
~LRUHandleTable();
|
|
119
239
|
|
|
240
|
+
// Returns a pointer to a visible element matching the key/hash, or
|
|
241
|
+
// nullptr if not present.
|
|
120
242
|
LRUHandle* Lookup(const Slice& key, uint32_t hash);
|
|
121
|
-
|
|
122
|
-
|
|
243
|
+
|
|
244
|
+
// Inserts a copy of h into the hash table.
|
|
245
|
+
// Returns a pointer to the inserted handle, or nullptr if no slot
|
|
246
|
+
// available was found. If an existing visible element matching the
|
|
247
|
+
// key/hash is already present in the hash table, the argument old
|
|
248
|
+
// is set to pointe to it; otherwise, it's set to nullptr.
|
|
249
|
+
LRUHandle* Insert(LRUHandle* h, LRUHandle** old);
|
|
250
|
+
|
|
251
|
+
// Removes h from the hash table. The handle must already be off
|
|
252
|
+
// the LRU list.
|
|
253
|
+
void Remove(LRUHandle* h);
|
|
254
|
+
|
|
255
|
+
// Turns a visible element h into a ghost (i.e., not visible).
|
|
256
|
+
void Exclude(LRUHandle* h);
|
|
257
|
+
|
|
258
|
+
// Assigns a copy of h to the given slot.
|
|
259
|
+
void Assign(int slot, LRUHandle* h);
|
|
123
260
|
|
|
124
261
|
template <typename T>
|
|
125
262
|
void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
|
|
126
263
|
for (uint32_t i = index_begin; i < index_end; i++) {
|
|
127
|
-
LRUHandle* h =
|
|
128
|
-
|
|
129
|
-
auto n = h->next_hash;
|
|
130
|
-
assert(h->InCache());
|
|
264
|
+
LRUHandle* h = &array_[i];
|
|
265
|
+
if (h->IsVisible()) {
|
|
131
266
|
func(h);
|
|
132
|
-
h = n;
|
|
133
267
|
}
|
|
134
268
|
}
|
|
135
269
|
}
|
|
136
270
|
|
|
137
|
-
|
|
271
|
+
uint8_t GetLengthBits() const { return length_bits_; }
|
|
138
272
|
|
|
139
|
-
|
|
140
|
-
// by the hash.
|
|
141
|
-
inline LRUHandle** Head(uint32_t hash);
|
|
273
|
+
uint32_t GetOccupancy() const { return occupancy_; }
|
|
142
274
|
|
|
143
275
|
private:
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
//
|
|
154
|
-
//
|
|
155
|
-
|
|
276
|
+
int FindVisibleElement(const Slice& key, uint32_t hash, int& probe,
|
|
277
|
+
int displacement);
|
|
278
|
+
|
|
279
|
+
int FindAvailableSlot(const Slice& key, int& probe, int displacement);
|
|
280
|
+
|
|
281
|
+
int FindVisibleElementOrAvailableSlot(const Slice& key, uint32_t hash,
|
|
282
|
+
int& probe, int displacement);
|
|
283
|
+
|
|
284
|
+
// Returns the index of the first slot probed (hashing with
|
|
285
|
+
// the given key) with a handle e such that cond(e) is true.
|
|
286
|
+
// Otherwise, if no match is found, returns -1.
|
|
287
|
+
// For every handle e probed except the final slot, updates
|
|
288
|
+
// e->displacements += displacement.
|
|
289
|
+
// The argument probe is modified such that consecutive calls
|
|
290
|
+
// to FindSlot continue probing right after where the previous
|
|
291
|
+
// call left.
|
|
292
|
+
int FindSlot(const Slice& key, std::function<bool(LRUHandle*)> cond,
|
|
293
|
+
int& probe, int displacement);
|
|
294
|
+
|
|
295
|
+
// Number of hash bits used for table index.
|
|
296
|
+
// The size of the table is 1 << length_bits_.
|
|
297
|
+
uint8_t length_bits_;
|
|
298
|
+
|
|
299
|
+
// Number of elements in the table.
|
|
300
|
+
uint32_t occupancy_;
|
|
301
|
+
|
|
302
|
+
std::unique_ptr<LRUHandle[]> array_;
|
|
156
303
|
};
|
|
157
304
|
|
|
158
305
|
// A single shard of sharded cache.
|
|
@@ -172,6 +319,10 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
|
|
|
172
319
|
void SetStrictCapacityLimit(bool strict_capacity_limit) override;
|
|
173
320
|
|
|
174
321
|
// Like Cache methods, but with an extra "hash" parameter.
|
|
322
|
+
// Insert an item into the hash table and, if handle is null, insert into
|
|
323
|
+
// the LRU list. Older items are evicted as necessary. If the cache is full
|
|
324
|
+
// and free_handle_on_fail is true, the item is deleted and handle is set to
|
|
325
|
+
// nullptr.
|
|
175
326
|
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
|
|
176
327
|
Cache::DeleterFn deleter, Cache::Handle** handle,
|
|
177
328
|
Cache::Priority priority) override;
|
|
@@ -216,13 +367,6 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
|
|
|
216
367
|
|
|
217
368
|
private:
|
|
218
369
|
friend class LRUCache;
|
|
219
|
-
// Insert an item into the hash table and, if handle is null, insert into
|
|
220
|
-
// the LRU list. Older items are evicted as necessary. If the cache is full
|
|
221
|
-
// and free_handle_on_fail is true, the item is deleted and handle is set to
|
|
222
|
-
// nullptr.
|
|
223
|
-
Status InsertItem(LRUHandle* item, Cache::Handle** handle,
|
|
224
|
-
bool free_handle_on_fail);
|
|
225
|
-
|
|
226
370
|
void LRU_Remove(LRUHandle* e);
|
|
227
371
|
void LRU_Insert(LRUHandle* e);
|
|
228
372
|
|
|
@@ -230,12 +374,12 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
|
|
|
230
374
|
// to hold (usage_ + charge) is freed or the lru list is empty
|
|
231
375
|
// This function is not thread safe - it needs to be executed while
|
|
232
376
|
// holding the mutex_.
|
|
233
|
-
void EvictFromLRU(size_t charge, autovector<LRUHandle
|
|
377
|
+
void EvictFromLRU(size_t charge, autovector<LRUHandle>* deleted);
|
|
234
378
|
|
|
235
|
-
// Returns the number of bits used to hash an element in the
|
|
379
|
+
// Returns the number of bits used to hash an element in the hash
|
|
236
380
|
// table.
|
|
237
|
-
static
|
|
238
|
-
|
|
381
|
+
static uint8_t CalcHashBits(size_t capacity, size_t estimated_value_size,
|
|
382
|
+
CacheMetadataChargePolicy metadata_charge_policy);
|
|
239
383
|
|
|
240
384
|
// Initialized before use.
|
|
241
385
|
size_t capacity_;
|
|
@@ -273,7 +417,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
|
|
|
273
417
|
// mutex_ protects the following state.
|
|
274
418
|
// We don't count mutex_ as the cache's internal state so semantically we
|
|
275
419
|
// don't mind mutex_ invoking the non-const actions.
|
|
276
|
-
mutable
|
|
420
|
+
mutable DMutex mutex_;
|
|
277
421
|
};
|
|
278
422
|
|
|
279
423
|
class LRUCache
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
#include "monitoring/perf_context_imp.h"
|
|
17
17
|
#include "monitoring/statistics.h"
|
|
18
18
|
#include "port/lang.h"
|
|
19
|
-
#include "util/
|
|
19
|
+
#include "util/distributed_mutex.h"
|
|
20
20
|
|
|
21
21
|
namespace ROCKSDB_NAMESPACE {
|
|
22
22
|
namespace lru_cache {
|
|
@@ -135,7 +135,7 @@ LRUCacheShard::LRUCacheShard(
|
|
|
135
135
|
void LRUCacheShard::EraseUnRefEntries() {
|
|
136
136
|
autovector<LRUHandle*> last_reference_list;
|
|
137
137
|
{
|
|
138
|
-
|
|
138
|
+
DMutexLock l(mutex_);
|
|
139
139
|
while (lru_.next != &lru_) {
|
|
140
140
|
LRUHandle* old = lru_.next;
|
|
141
141
|
// LRU list contains only elements which can be evicted.
|
|
@@ -161,7 +161,7 @@ void LRUCacheShard::ApplyToSomeEntries(
|
|
|
161
161
|
// The state is essentially going to be the starting hash, which works
|
|
162
162
|
// nicely even if we resize between calls because we use upper-most
|
|
163
163
|
// hash bits for table indexes.
|
|
164
|
-
|
|
164
|
+
DMutexLock l(mutex_);
|
|
165
165
|
uint32_t length_bits = table_.GetLengthBits();
|
|
166
166
|
uint32_t length = uint32_t{1} << length_bits;
|
|
167
167
|
|
|
@@ -193,13 +193,13 @@ void LRUCacheShard::ApplyToSomeEntries(
|
|
|
193
193
|
}
|
|
194
194
|
|
|
195
195
|
void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
|
|
196
|
-
|
|
196
|
+
DMutexLock l(mutex_);
|
|
197
197
|
*lru = &lru_;
|
|
198
198
|
*lru_low_pri = lru_low_pri_;
|
|
199
199
|
}
|
|
200
200
|
|
|
201
201
|
size_t LRUCacheShard::TEST_GetLRUSize() {
|
|
202
|
-
|
|
202
|
+
DMutexLock l(mutex_);
|
|
203
203
|
LRUHandle* lru_handle = lru_.next;
|
|
204
204
|
size_t lru_size = 0;
|
|
205
205
|
while (lru_handle != &lru_) {
|
|
@@ -210,7 +210,7 @@ size_t LRUCacheShard::TEST_GetLRUSize() {
|
|
|
210
210
|
}
|
|
211
211
|
|
|
212
212
|
double LRUCacheShard::GetHighPriPoolRatio() {
|
|
213
|
-
|
|
213
|
+
DMutexLock l(mutex_);
|
|
214
214
|
return high_pri_pool_ratio_;
|
|
215
215
|
}
|
|
216
216
|
|
|
@@ -285,7 +285,7 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
|
|
|
285
285
|
void LRUCacheShard::SetCapacity(size_t capacity) {
|
|
286
286
|
autovector<LRUHandle*> last_reference_list;
|
|
287
287
|
{
|
|
288
|
-
|
|
288
|
+
DMutexLock l(mutex_);
|
|
289
289
|
capacity_ = capacity;
|
|
290
290
|
high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
|
|
291
291
|
EvictFromLRU(0, &last_reference_list);
|
|
@@ -304,7 +304,7 @@ void LRUCacheShard::SetCapacity(size_t capacity) {
|
|
|
304
304
|
}
|
|
305
305
|
|
|
306
306
|
void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
|
|
307
|
-
|
|
307
|
+
DMutexLock l(mutex_);
|
|
308
308
|
strict_capacity_limit_ = strict_capacity_limit;
|
|
309
309
|
}
|
|
310
310
|
|
|
@@ -314,7 +314,7 @@ Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
|
|
|
314
314
|
autovector<LRUHandle*> last_reference_list;
|
|
315
315
|
|
|
316
316
|
{
|
|
317
|
-
|
|
317
|
+
DMutexLock l(mutex_);
|
|
318
318
|
|
|
319
319
|
// Free the space following strict LRU policy until enough space
|
|
320
320
|
// is freed or the lru list is empty.
|
|
@@ -402,7 +402,7 @@ void LRUCacheShard::Promote(LRUHandle* e) {
|
|
|
402
402
|
} else {
|
|
403
403
|
// Since the secondary cache lookup failed, mark the item as not in cache
|
|
404
404
|
// Don't charge the cache as its only metadata that'll shortly be released
|
|
405
|
-
|
|
405
|
+
DMutexLock l(mutex_);
|
|
406
406
|
// TODO
|
|
407
407
|
e->CalcTotalCharge(0, metadata_charge_policy_);
|
|
408
408
|
e->SetInCache(false);
|
|
@@ -416,7 +416,7 @@ Cache::Handle* LRUCacheShard::Lookup(
|
|
|
416
416
|
bool wait, Statistics* stats) {
|
|
417
417
|
LRUHandle* e = nullptr;
|
|
418
418
|
{
|
|
419
|
-
|
|
419
|
+
DMutexLock l(mutex_);
|
|
420
420
|
e = table_.Lookup(key, hash);
|
|
421
421
|
if (e != nullptr) {
|
|
422
422
|
assert(e->InCache());
|
|
@@ -489,7 +489,7 @@ Cache::Handle* LRUCacheShard::Lookup(
|
|
|
489
489
|
|
|
490
490
|
bool LRUCacheShard::Ref(Cache::Handle* h) {
|
|
491
491
|
LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
|
|
492
|
-
|
|
492
|
+
DMutexLock l(mutex_);
|
|
493
493
|
// To create another reference - entry must be already externally referenced.
|
|
494
494
|
assert(e->HasRefs());
|
|
495
495
|
e->Ref();
|
|
@@ -497,7 +497,7 @@ bool LRUCacheShard::Ref(Cache::Handle* h) {
|
|
|
497
497
|
}
|
|
498
498
|
|
|
499
499
|
void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
|
|
500
|
-
|
|
500
|
+
DMutexLock l(mutex_);
|
|
501
501
|
high_pri_pool_ratio_ = high_pri_pool_ratio;
|
|
502
502
|
high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
|
|
503
503
|
MaintainPoolSize();
|
|
@@ -510,7 +510,7 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
|
|
|
510
510
|
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
|
|
511
511
|
bool last_reference = false;
|
|
512
512
|
{
|
|
513
|
-
|
|
513
|
+
DMutexLock l(mutex_);
|
|
514
514
|
last_reference = e->Unref();
|
|
515
515
|
if (last_reference && e->InCache()) {
|
|
516
516
|
// The item is still in cache, and nobody else holds a reference to it.
|
|
@@ -582,7 +582,7 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
|
|
|
582
582
|
LRUHandle* e;
|
|
583
583
|
bool last_reference = false;
|
|
584
584
|
{
|
|
585
|
-
|
|
585
|
+
DMutexLock l(mutex_);
|
|
586
586
|
e = table_.Remove(key, hash);
|
|
587
587
|
if (e != nullptr) {
|
|
588
588
|
assert(e->InCache());
|
|
@@ -606,7 +606,7 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
|
|
|
606
606
|
|
|
607
607
|
bool LRUCacheShard::IsReady(Cache::Handle* handle) {
|
|
608
608
|
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
|
|
609
|
-
|
|
609
|
+
DMutexLock l(mutex_);
|
|
610
610
|
bool ready = true;
|
|
611
611
|
if (e->IsPending()) {
|
|
612
612
|
assert(secondary_cache_);
|
|
@@ -617,12 +617,12 @@ bool LRUCacheShard::IsReady(Cache::Handle* handle) {
|
|
|
617
617
|
}
|
|
618
618
|
|
|
619
619
|
size_t LRUCacheShard::GetUsage() const {
|
|
620
|
-
|
|
620
|
+
DMutexLock l(mutex_);
|
|
621
621
|
return usage_;
|
|
622
622
|
}
|
|
623
623
|
|
|
624
624
|
size_t LRUCacheShard::GetPinnedUsage() const {
|
|
625
|
-
|
|
625
|
+
DMutexLock l(mutex_);
|
|
626
626
|
assert(usage_ >= lru_usage_);
|
|
627
627
|
return usage_ - lru_usage_;
|
|
628
628
|
}
|
|
@@ -631,7 +631,7 @@ std::string LRUCacheShard::GetPrintableOptions() const {
|
|
|
631
631
|
const int kBufferSize = 200;
|
|
632
632
|
char buffer[kBufferSize];
|
|
633
633
|
{
|
|
634
|
-
|
|
634
|
+
DMutexLock l(mutex_);
|
|
635
635
|
snprintf(buffer, kBufferSize, " high_pri_pool_ratio: %.3lf\n",
|
|
636
636
|
high_pri_pool_ratio_);
|
|
637
637
|
}
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
#include "port/port.h"
|
|
18
18
|
#include "rocksdb/secondary_cache.h"
|
|
19
19
|
#include "util/autovector.h"
|
|
20
|
+
#include "util/distributed_mutex.h"
|
|
20
21
|
|
|
21
22
|
namespace ROCKSDB_NAMESPACE {
|
|
22
23
|
namespace lru_cache {
|
|
@@ -453,7 +454,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
|
|
|
453
454
|
// mutex_ protects the following state.
|
|
454
455
|
// We don't count mutex_ as the cache's internal state so semantically we
|
|
455
456
|
// don't mind mutex_ invoking the non-const actions.
|
|
456
|
-
mutable
|
|
457
|
+
mutable DMutex mutex_;
|
|
457
458
|
|
|
458
459
|
std::shared_ptr<SecondaryCache> secondary_cache_;
|
|
459
460
|
};
|