@nxtedition/rocksdb 8.2.0-alpha.1 → 8.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +11 -74
- package/binding.gyp +7 -5
- package/deps/rocksdb/rocksdb/CMakeLists.txt +4 -0
- package/deps/rocksdb/rocksdb/TARGETS +7 -0
- package/deps/rocksdb/rocksdb/cache/cache.cc +43 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +8 -5
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +1 -1
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +12 -48
- package/deps/rocksdb/rocksdb/cache/charged_cache.cc +26 -18
- package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -62
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +119 -44
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +34 -29
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +3 -3
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -2
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +148 -209
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +118 -284
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +23 -71
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +351 -392
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +5 -2
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +296 -0
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +52 -0
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +22 -19
- package/deps/rocksdb/rocksdb/cache/typed_cache.h +56 -20
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +4 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +3 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +19 -25
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +216 -0
- package/deps/rocksdb/rocksdb/db/c.cc +90 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +8 -7
- package/deps/rocksdb/rocksdb/db/column_family.h +0 -6
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +24 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +18 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +245 -302
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +75 -15
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -5
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +91 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5 -12
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +16 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +47 -24
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +4 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -3
- package/deps/rocksdb/rocksdb/db/db_iter.cc +28 -29
- package/deps/rocksdb/rocksdb/db/db_iter.h +0 -3
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +176 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +391 -2
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +26 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +13 -5
- package/deps/rocksdb/rocksdb/db/dbformat.h +3 -1
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +0 -6
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +3 -0
- package/deps/rocksdb/rocksdb/db/forward_iterator.h +1 -1
- package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +68 -40
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +3 -3
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +115 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +169 -72
- package/deps/rocksdb/rocksdb/db/internal_stats.h +36 -7
- package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +4 -0
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +151 -0
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +47 -16
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +10 -8
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +91 -93
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_set.cc +30 -14
- package/deps/rocksdb/rocksdb/db/version_set.h +1 -0
- package/deps/rocksdb/rocksdb/db/write_stall_stats.cc +179 -0
- package/deps/rocksdb/rocksdb/db/write_stall_stats.h +47 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +109 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +147 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +31 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +42 -59
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +7 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +6 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +127 -36
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +8 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +35 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +29 -8
- package/deps/rocksdb/rocksdb/file/file_util.cc +14 -10
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +183 -63
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +159 -66
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +52 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +134 -73
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +46 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +0 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +18 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +28 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +39 -0
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +9 -1
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -2
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +17 -7
- package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -0
- package/deps/rocksdb/rocksdb/src.mk +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -34
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +11 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +5 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +126 -132
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +16 -16
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +0 -16
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +370 -0
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +44 -0
- package/deps/rocksdb/rocksdb/table/get_context.cc +4 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +555 -267
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +10 -5
- package/deps/rocksdb/rocksdb/table/table_test.cc +113 -70
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +96 -0
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +117 -0
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +5 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +9 -2
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +11 -0
- package/deps/rocksdb/rocksdb.gyp +6 -7
- package/index.js +0 -6
- package/package.json +1 -1
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/liburing/liburing.gyp +0 -20
- package/tmp/test.js +0 -7
|
@@ -12,8 +12,8 @@
|
|
|
12
12
|
// NOTE: in 'main' development branch, this should be the *next*
|
|
13
13
|
// minor or major version number planned for release.
|
|
14
14
|
#define ROCKSDB_MAJOR 8
|
|
15
|
-
#define ROCKSDB_MINOR
|
|
16
|
-
#define ROCKSDB_PATCH
|
|
15
|
+
#define ROCKSDB_MINOR 1
|
|
16
|
+
#define ROCKSDB_PATCH 1
|
|
17
17
|
|
|
18
18
|
// Do not use these. We made the mistake of declaring macros starting with
|
|
19
19
|
// double underscore. Now we have to live with our choice. We'll deprecate these
|
|
@@ -97,15 +97,22 @@ class PinnableWideColumns {
|
|
|
97
97
|
|
|
98
98
|
void SetPlainValue(const Slice& value);
|
|
99
99
|
void SetPlainValue(const Slice& value, Cleanable* cleanable);
|
|
100
|
+
void SetPlainValue(PinnableSlice&& value);
|
|
101
|
+
void SetPlainValue(std::string&& value);
|
|
100
102
|
|
|
101
103
|
Status SetWideColumnValue(const Slice& value);
|
|
102
104
|
Status SetWideColumnValue(const Slice& value, Cleanable* cleanable);
|
|
105
|
+
Status SetWideColumnValue(PinnableSlice&& value);
|
|
106
|
+
Status SetWideColumnValue(std::string&& value);
|
|
103
107
|
|
|
104
108
|
void Reset();
|
|
105
109
|
|
|
106
110
|
private:
|
|
107
111
|
void CopyValue(const Slice& value);
|
|
108
112
|
void PinOrCopyValue(const Slice& value, Cleanable* cleanable);
|
|
113
|
+
void MoveValue(PinnableSlice&& value);
|
|
114
|
+
void MoveValue(std::string&& value);
|
|
115
|
+
|
|
109
116
|
void CreateIndexForPlainValue();
|
|
110
117
|
Status CreateIndexForWideColumns();
|
|
111
118
|
|
|
@@ -127,6 +134,18 @@ inline void PinnableWideColumns::PinOrCopyValue(const Slice& value,
|
|
|
127
134
|
value_.PinSlice(value, cleanable);
|
|
128
135
|
}
|
|
129
136
|
|
|
137
|
+
inline void PinnableWideColumns::MoveValue(PinnableSlice&& value) {
|
|
138
|
+
value_ = std::move(value);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
inline void PinnableWideColumns::MoveValue(std::string&& value) {
|
|
142
|
+
std::string* const buf = value_.GetSelf();
|
|
143
|
+
assert(buf);
|
|
144
|
+
|
|
145
|
+
*buf = std::move(value);
|
|
146
|
+
value_.PinSelf();
|
|
147
|
+
}
|
|
148
|
+
|
|
130
149
|
inline void PinnableWideColumns::CreateIndexForPlainValue() {
|
|
131
150
|
columns_ = WideColumns{{kDefaultWideColumnName, value_}};
|
|
132
151
|
}
|
|
@@ -142,6 +161,16 @@ inline void PinnableWideColumns::SetPlainValue(const Slice& value,
|
|
|
142
161
|
CreateIndexForPlainValue();
|
|
143
162
|
}
|
|
144
163
|
|
|
164
|
+
inline void PinnableWideColumns::SetPlainValue(PinnableSlice&& value) {
|
|
165
|
+
MoveValue(std::move(value));
|
|
166
|
+
CreateIndexForPlainValue();
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
inline void PinnableWideColumns::SetPlainValue(std::string&& value) {
|
|
170
|
+
MoveValue(std::move(value));
|
|
171
|
+
CreateIndexForPlainValue();
|
|
172
|
+
}
|
|
173
|
+
|
|
145
174
|
inline Status PinnableWideColumns::SetWideColumnValue(const Slice& value) {
|
|
146
175
|
CopyValue(value);
|
|
147
176
|
return CreateIndexForWideColumns();
|
|
@@ -153,6 +182,16 @@ inline Status PinnableWideColumns::SetWideColumnValue(const Slice& value,
|
|
|
153
182
|
return CreateIndexForWideColumns();
|
|
154
183
|
}
|
|
155
184
|
|
|
185
|
+
inline Status PinnableWideColumns::SetWideColumnValue(PinnableSlice&& value) {
|
|
186
|
+
MoveValue(std::move(value));
|
|
187
|
+
return CreateIndexForWideColumns();
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
inline Status PinnableWideColumns::SetWideColumnValue(std::string&& value) {
|
|
191
|
+
MoveValue(std::move(value));
|
|
192
|
+
return CreateIndexForWideColumns();
|
|
193
|
+
}
|
|
194
|
+
|
|
156
195
|
inline void PinnableWideColumns::Reset() {
|
|
157
196
|
value_.Reset();
|
|
158
197
|
columns_.clear();
|
|
@@ -69,6 +69,7 @@ PerfContext::PerfContext(const PerfContext& other) {
|
|
|
69
69
|
internal_delete_skipped_count = other.internal_delete_skipped_count;
|
|
70
70
|
internal_recent_skipped_count = other.internal_recent_skipped_count;
|
|
71
71
|
internal_merge_count = other.internal_merge_count;
|
|
72
|
+
internal_merge_point_lookup_count = other.internal_merge_point_lookup_count;
|
|
72
73
|
internal_range_del_reseek_count = other.internal_range_del_reseek_count;
|
|
73
74
|
write_wal_time = other.write_wal_time;
|
|
74
75
|
get_snapshot_time = other.get_snapshot_time;
|
|
@@ -188,6 +189,7 @@ PerfContext::PerfContext(PerfContext&& other) noexcept {
|
|
|
188
189
|
internal_delete_skipped_count = other.internal_delete_skipped_count;
|
|
189
190
|
internal_recent_skipped_count = other.internal_recent_skipped_count;
|
|
190
191
|
internal_merge_count = other.internal_merge_count;
|
|
192
|
+
internal_merge_point_lookup_count = other.internal_merge_point_lookup_count;
|
|
191
193
|
internal_range_del_reseek_count = other.internal_range_del_reseek_count;
|
|
192
194
|
write_wal_time = other.write_wal_time;
|
|
193
195
|
get_snapshot_time = other.get_snapshot_time;
|
|
@@ -309,6 +311,7 @@ PerfContext& PerfContext::operator=(const PerfContext& other) {
|
|
|
309
311
|
internal_delete_skipped_count = other.internal_delete_skipped_count;
|
|
310
312
|
internal_recent_skipped_count = other.internal_recent_skipped_count;
|
|
311
313
|
internal_merge_count = other.internal_merge_count;
|
|
314
|
+
internal_merge_point_lookup_count = other.internal_merge_point_lookup_count;
|
|
312
315
|
internal_range_del_reseek_count = other.internal_range_del_reseek_count;
|
|
313
316
|
write_wal_time = other.write_wal_time;
|
|
314
317
|
get_snapshot_time = other.get_snapshot_time;
|
|
@@ -422,6 +425,7 @@ void PerfContext::Reset() {
|
|
|
422
425
|
internal_delete_skipped_count = 0;
|
|
423
426
|
internal_recent_skipped_count = 0;
|
|
424
427
|
internal_merge_count = 0;
|
|
428
|
+
internal_merge_point_lookup_count = 0;
|
|
425
429
|
internal_range_del_reseek_count = 0;
|
|
426
430
|
write_wal_time = 0;
|
|
427
431
|
|
|
@@ -556,6 +560,7 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const {
|
|
|
556
560
|
PERF_CONTEXT_OUTPUT(internal_delete_skipped_count);
|
|
557
561
|
PERF_CONTEXT_OUTPUT(internal_recent_skipped_count);
|
|
558
562
|
PERF_CONTEXT_OUTPUT(internal_merge_count);
|
|
563
|
+
PERF_CONTEXT_OUTPUT(internal_merge_point_lookup_count);
|
|
559
564
|
PERF_CONTEXT_OUTPUT(internal_range_del_reseek_count);
|
|
560
565
|
PERF_CONTEXT_OUTPUT(write_wal_time);
|
|
561
566
|
PERF_CONTEXT_OUTPUT(get_snapshot_time);
|
|
@@ -213,7 +213,13 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
|
|
213
213
|
{BLOB_DB_CACHE_BYTES_READ, "rocksdb.blobdb.cache.bytes.read"},
|
|
214
214
|
{BLOB_DB_CACHE_BYTES_WRITE, "rocksdb.blobdb.cache.bytes.write"},
|
|
215
215
|
{READ_ASYNC_MICROS, "rocksdb.read.async.micros"},
|
|
216
|
-
{ASYNC_READ_ERROR_COUNT, "rocksdb.async.read.error.count"}
|
|
216
|
+
{ASYNC_READ_ERROR_COUNT, "rocksdb.async.read.error.count"},
|
|
217
|
+
{SECONDARY_CACHE_FILTER_HITS, "rocksdb.secondary.cache.filter.hits"},
|
|
218
|
+
{SECONDARY_CACHE_INDEX_HITS, "rocksdb.secondary.cache.index.hits"},
|
|
219
|
+
{SECONDARY_CACHE_DATA_HITS, "rocksdb.secondary.cache.data.hits"},
|
|
220
|
+
{TABLE_OPEN_PREFETCH_TAIL_MISS, "rocksdb.table.open.prefetch.tail.miss"},
|
|
221
|
+
{TABLE_OPEN_PREFETCH_TAIL_HIT, "rocksdb.table.open.prefetch.tail.hit"},
|
|
222
|
+
};
|
|
217
223
|
|
|
218
224
|
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
|
|
219
225
|
{DB_GET, "rocksdb.db.get.micros"},
|
|
@@ -269,6 +275,8 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
|
|
|
269
275
|
{MULTIGET_IO_BATCH_SIZE, "rocksdb.multiget.io.batch.size"},
|
|
270
276
|
{NUM_LEVEL_READ_PER_MULTIGET, "rocksdb.num.level.read.per.multiget"},
|
|
271
277
|
{ASYNC_PREFETCH_ABORT_MICROS, "rocksdb.async.prefetch.abort.micros"},
|
|
278
|
+
{TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
|
|
279
|
+
"rocksdb.table.open.prefetch.tail.read.bytes"},
|
|
272
280
|
};
|
|
273
281
|
|
|
274
282
|
std::shared_ptr<Statistics> CreateDBStatistics() {
|
|
@@ -1236,8 +1236,8 @@ class TestSecondaryCache : public SecondaryCache {
|
|
|
1236
1236
|
std::unique_ptr<SecondaryCacheResultHandle> Lookup(
|
|
1237
1237
|
const Slice& /*key*/, const Cache::CacheItemHelper* /*helper*/,
|
|
1238
1238
|
Cache::CreateContext* /*create_context*/, bool /*wait*/,
|
|
1239
|
-
bool /*advise_erase*/, bool&
|
|
1240
|
-
|
|
1239
|
+
bool /*advise_erase*/, bool& kept_in_sec_cache) override {
|
|
1240
|
+
kept_in_sec_cache = true;
|
|
1241
1241
|
return nullptr;
|
|
1242
1242
|
}
|
|
1243
1243
|
|
|
@@ -141,20 +141,30 @@ void PrintStack(void* frames[], int num_frames) {
|
|
|
141
141
|
}
|
|
142
142
|
|
|
143
143
|
void PrintStack(int first_frames_to_skip) {
|
|
144
|
-
|
|
145
|
-
//
|
|
146
|
-
//
|
|
147
|
-
//
|
|
148
|
-
|
|
144
|
+
// Default to getting stack traces with GDB, at least on Linux where we
|
|
145
|
+
// know how to attach to a particular thread.
|
|
146
|
+
//
|
|
147
|
+
// * Address space layout randomization (ASLR) interferes with getting good
|
|
148
|
+
// stack information from backtrace+addr2line. This is more likely to show
|
|
149
|
+
// up with LIB_MODE=shared builds (when kernel.randomize_va_space >= 1)
|
|
150
|
+
// but can also show up with LIB_MODE=static builds ((when
|
|
151
|
+
// kernel.randomize_va_space == 2).
|
|
152
|
+
// * It doesn't appear easy to detect when ASLR is in use.
|
|
153
|
+
// * With DEBUG_LEVEL < 2, backtrace() can skip frames that are not skipped
|
|
154
|
+
// in GDB.
|
|
155
|
+
#if defined(OS_LINUX)
|
|
156
|
+
// Default true, override with ROCKSDB_BACKTRACE_STACK=1
|
|
157
|
+
bool gdb_stack_trace = getenv("ROCKSDB_BACKTRACE_STACK") == nullptr;
|
|
149
158
|
#else
|
|
150
|
-
|
|
159
|
+
// Default false, override with ROCKSDB_GDB_STACK=1
|
|
160
|
+
bool gdb_stack_trace = getenv("ROCKSDB_GDB_STACK") != nullptr;
|
|
151
161
|
#endif
|
|
152
162
|
// Also support invoking interactive debugger on stack trace, with this
|
|
153
163
|
// envvar set to non-empty
|
|
154
164
|
char* debug_env = getenv("ROCKSDB_DEBUG");
|
|
155
165
|
bool debug = debug_env != nullptr && strlen(debug_env) > 0;
|
|
156
166
|
|
|
157
|
-
if (
|
|
167
|
+
if (gdb_stack_trace || debug) {
|
|
158
168
|
// Allow ouside debugger to attach, even with Yama security restrictions
|
|
159
169
|
#ifdef PR_SET_PTRACER_ANY
|
|
160
170
|
(void)prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
|
|
@@ -227,6 +227,7 @@ class WinFileSystem : public FileSystem {
|
|
|
227
227
|
const FileOptions& file_options) const override;
|
|
228
228
|
FileOptions OptimizeForManifestWrite(
|
|
229
229
|
const FileOptions& file_options) const override;
|
|
230
|
+
bool use_async_io() override { return false; }
|
|
230
231
|
|
|
231
232
|
protected:
|
|
232
233
|
static uint64_t FileTimeToUnixTime(const FILETIME& ftTime);
|
|
@@ -10,6 +10,7 @@ LIB_SOURCES = \
|
|
|
10
10
|
cache/lru_cache.cc \
|
|
11
11
|
cache/compressed_secondary_cache.cc \
|
|
12
12
|
cache/secondary_cache.cc \
|
|
13
|
+
cache/secondary_cache_adapter.cc \
|
|
13
14
|
cache/sharded_cache.cc \
|
|
14
15
|
db/arena_wrapped_db_iter.cc \
|
|
15
16
|
db/blob/blob_contents.cc \
|
|
@@ -96,6 +97,7 @@ LIB_SOURCES = \
|
|
|
96
97
|
db/write_batch.cc \
|
|
97
98
|
db/write_batch_base.cc \
|
|
98
99
|
db/write_controller.cc \
|
|
100
|
+
db/write_stall_stats.cc \
|
|
99
101
|
db/write_thread.cc \
|
|
100
102
|
env/composite_env.cc \
|
|
101
103
|
env/env.cc \
|
|
@@ -198,6 +200,7 @@ LIB_SOURCES = \
|
|
|
198
200
|
table/get_context.cc \
|
|
199
201
|
table/iterator.cc \
|
|
200
202
|
table/merging_iterator.cc \
|
|
203
|
+
table/compaction_merging_iterator.cc \
|
|
201
204
|
table/meta_blocks.cc \
|
|
202
205
|
table/persistent_cache_helper.cc \
|
|
203
206
|
table/plain/plain_table_bloom.cc \
|
|
@@ -381,6 +384,7 @@ TEST_LIB_SOURCES = \
|
|
|
381
384
|
db/db_test_util.cc \
|
|
382
385
|
db/db_with_timestamp_test_util.cc \
|
|
383
386
|
test_util/mock_time_env.cc \
|
|
387
|
+
test_util/secondary_cache_test_util.cc \
|
|
384
388
|
test_util/testharness.cc \
|
|
385
389
|
test_util/testutil.cc \
|
|
386
390
|
utilities/agg_merge/test_agg_merge.cc \
|
|
@@ -94,7 +94,7 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) {
|
|
|
94
94
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict, \
|
|
95
95
|
CachableEntry<T>* out_parsed_block, GetContext* get_context, \
|
|
96
96
|
BlockCacheLookupContext* lookup_context, bool for_compaction, \
|
|
97
|
-
bool use_cache, bool
|
|
97
|
+
bool use_cache, bool async_read) const;
|
|
98
98
|
|
|
99
99
|
INSTANTIATE_RETRIEVE_BLOCK(ParsedFullFilterBlock);
|
|
100
100
|
INSTANTIATE_RETRIEVE_BLOCK(UncompressionDict);
|
|
@@ -591,7 +591,7 @@ Status BlockBasedTable::Open(
|
|
|
591
591
|
if (!ioptions.allow_mmap_reads) {
|
|
592
592
|
s = PrefetchTail(ro, file.get(), file_size, force_direct_prefetch,
|
|
593
593
|
tail_prefetch_stats, prefetch_all, preload_all,
|
|
594
|
-
&prefetch_buffer);
|
|
594
|
+
&prefetch_buffer, ioptions.stats);
|
|
595
595
|
// Return error in prefetch path to users.
|
|
596
596
|
if (!s.ok()) {
|
|
597
597
|
return s;
|
|
@@ -802,7 +802,7 @@ Status BlockBasedTable::PrefetchTail(
|
|
|
802
802
|
const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
|
|
803
803
|
bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
|
|
804
804
|
const bool prefetch_all, const bool preload_all,
|
|
805
|
-
std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer) {
|
|
805
|
+
std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer, Statistics* stats) {
|
|
806
806
|
size_t tail_prefetch_size = 0;
|
|
807
807
|
if (tail_prefetch_stats != nullptr) {
|
|
808
808
|
// Multiple threads may get a 0 (no history) when running in parallel,
|
|
@@ -842,9 +842,12 @@ Status BlockBasedTable::PrefetchTail(
|
|
|
842
842
|
}
|
|
843
843
|
|
|
844
844
|
// Use `FilePrefetchBuffer`
|
|
845
|
-
prefetch_buffer->reset(
|
|
846
|
-
|
|
847
|
-
|
|
845
|
+
prefetch_buffer->reset(new FilePrefetchBuffer(
|
|
846
|
+
0 /* readahead_size */, 0 /* max_readahead_size */, true /* enable */,
|
|
847
|
+
true /* track_min_offset */, false /* implicit_auto_readahead */,
|
|
848
|
+
0 /* num_file_reads */, 0 /* num_file_reads_for_auto_readahead */,
|
|
849
|
+
nullptr /* fs */, nullptr /* clock */, stats,
|
|
850
|
+
FilePrefetchBufferUsage::kTableOpenPrefetchTail));
|
|
848
851
|
|
|
849
852
|
IOOptions opts;
|
|
850
853
|
Status s = file->PrepareIOOptions(ro, opts);
|
|
@@ -1251,24 +1254,31 @@ Status BlockBasedTable::ReadMetaIndexBlock(
|
|
|
1251
1254
|
}
|
|
1252
1255
|
|
|
1253
1256
|
template <typename TBlocklike>
|
|
1254
|
-
|
|
1255
|
-
const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
|
|
1256
|
-
CachableEntry<TBlocklike>* out_parsed_block, const bool wait,
|
|
1257
|
-
GetContext* get_context) const {
|
|
1258
|
-
assert(out_parsed_block);
|
|
1259
|
-
assert(out_parsed_block->IsEmpty());
|
|
1257
|
+
Cache::Priority BlockBasedTable::GetCachePriority() const {
|
|
1260
1258
|
// Here we treat the legacy name "...index_and_filter_blocks..." to mean all
|
|
1261
1259
|
// metadata blocks that might go into block cache, EXCEPT only those needed
|
|
1262
1260
|
// for the read path (Get, etc.). TableProperties should not be needed on the
|
|
1263
1261
|
// read path (prefix extractor setting is an O(1) size special case that we
|
|
1264
1262
|
// are working not to require from TableProperties), so it is not given
|
|
1265
1263
|
// high-priority treatment if it should go into BlockCache.
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1264
|
+
if constexpr (TBlocklike::kBlockType == BlockType::kData ||
|
|
1265
|
+
TBlocklike::kBlockType == BlockType::kProperties) {
|
|
1266
|
+
return Cache::Priority::LOW;
|
|
1267
|
+
} else if (rep_->table_options
|
|
1268
|
+
.cache_index_and_filter_blocks_with_high_priority) {
|
|
1269
|
+
return Cache::Priority::HIGH;
|
|
1270
|
+
} else {
|
|
1271
|
+
return Cache::Priority::LOW;
|
|
1272
|
+
}
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
template <typename TBlocklike>
|
|
1276
|
+
WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::GetDataBlockFromCache(
|
|
1277
|
+
const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
|
|
1278
|
+
CachableEntry<TBlocklike>* out_parsed_block,
|
|
1279
|
+
GetContext* get_context) const {
|
|
1280
|
+
assert(out_parsed_block);
|
|
1281
|
+
assert(out_parsed_block->IsEmpty());
|
|
1272
1282
|
|
|
1273
1283
|
Status s;
|
|
1274
1284
|
Statistics* statistics = rep_->ioptions.statistics.get();
|
|
@@ -1277,8 +1287,8 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::GetDataBlockFromCache(
|
|
|
1277
1287
|
if (block_cache) {
|
|
1278
1288
|
assert(!cache_key.empty());
|
|
1279
1289
|
auto cache_handle = block_cache.LookupFull(
|
|
1280
|
-
cache_key, &rep_->create_context,
|
|
1281
|
-
rep_->ioptions.lowest_used_cache_tier);
|
|
1290
|
+
cache_key, &rep_->create_context, GetCachePriority<TBlocklike>(),
|
|
1291
|
+
statistics, rep_->ioptions.lowest_used_cache_tier);
|
|
1282
1292
|
|
|
1283
1293
|
// Avoid updating metrics here if the handle is not complete yet. This
|
|
1284
1294
|
// happens with MultiGet and secondary cache. So update the metrics only
|
|
@@ -1311,11 +1321,6 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::PutDataBlockToCache(
|
|
|
1311
1321
|
MemoryAllocator* memory_allocator, GetContext* get_context) const {
|
|
1312
1322
|
const ImmutableOptions& ioptions = rep_->ioptions;
|
|
1313
1323
|
const uint32_t format_version = rep_->table_options.format_version;
|
|
1314
|
-
const Cache::Priority priority =
|
|
1315
|
-
rep_->table_options.cache_index_and_filter_blocks_with_high_priority &&
|
|
1316
|
-
TBlocklike::kBlockType != BlockType::kData
|
|
1317
|
-
? Cache::Priority::HIGH
|
|
1318
|
-
: Cache::Priority::LOW;
|
|
1319
1324
|
assert(out_parsed_block);
|
|
1320
1325
|
assert(out_parsed_block->IsEmpty());
|
|
1321
1326
|
|
|
@@ -1346,7 +1351,7 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::PutDataBlockToCache(
|
|
|
1346
1351
|
size_t charge = block_holder->ApproximateMemoryUsage();
|
|
1347
1352
|
BlockCacheTypedHandle<TBlocklike>* cache_handle = nullptr;
|
|
1348
1353
|
s = block_cache.InsertFull(cache_key, block_holder.get(), charge,
|
|
1349
|
-
&cache_handle,
|
|
1354
|
+
&cache_handle, GetCachePriority<TBlocklike>(),
|
|
1350
1355
|
rep_->ioptions.lowest_used_cache_tier);
|
|
1351
1356
|
|
|
1352
1357
|
if (s.ok()) {
|
|
@@ -1443,10 +1448,9 @@ WithBlocklikeCheck<Status, TBlocklike>
|
|
|
1443
1448
|
BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|
1444
1449
|
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
|
|
1445
1450
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
bool async_read) const {
|
|
1451
|
+
bool for_compaction, CachableEntry<TBlocklike>* out_parsed_block,
|
|
1452
|
+
GetContext* get_context, BlockCacheLookupContext* lookup_context,
|
|
1453
|
+
BlockContents* contents, bool async_read) const {
|
|
1450
1454
|
assert(out_parsed_block != nullptr);
|
|
1451
1455
|
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
|
1452
1456
|
BlockCacheInterface<TBlocklike> block_cache{
|
|
@@ -1465,7 +1469,7 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|
|
1465
1469
|
key = key_data.AsSlice();
|
|
1466
1470
|
|
|
1467
1471
|
if (!contents) {
|
|
1468
|
-
s = GetDataBlockFromCache(key, block_cache, out_parsed_block,
|
|
1472
|
+
s = GetDataBlockFromCache(key, block_cache, out_parsed_block,
|
|
1469
1473
|
get_context);
|
|
1470
1474
|
// Value could still be null at this point, so check the cache handle
|
|
1471
1475
|
// and update the read pattern for prefetching
|
|
@@ -1626,15 +1630,15 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::RetrieveBlock(
|
|
|
1626
1630
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
|
1627
1631
|
CachableEntry<TBlocklike>* out_parsed_block, GetContext* get_context,
|
|
1628
1632
|
BlockCacheLookupContext* lookup_context, bool for_compaction,
|
|
1629
|
-
bool use_cache, bool
|
|
1633
|
+
bool use_cache, bool async_read) const {
|
|
1630
1634
|
assert(out_parsed_block);
|
|
1631
1635
|
assert(out_parsed_block->IsEmpty());
|
|
1632
1636
|
|
|
1633
1637
|
Status s;
|
|
1634
1638
|
if (use_cache) {
|
|
1635
1639
|
s = MaybeReadBlockAndLoadToCache(
|
|
1636
|
-
prefetch_buffer, ro, handle, uncompression_dict,
|
|
1637
|
-
|
|
1640
|
+
prefetch_buffer, ro, handle, uncompression_dict, for_compaction,
|
|
1641
|
+
out_parsed_block, get_context, lookup_context,
|
|
1638
1642
|
/*contents=*/nullptr, async_read);
|
|
1639
1643
|
|
|
1640
1644
|
if (!s.ok()) {
|
|
@@ -336,10 +336,9 @@ class BlockBasedTable : public TableReader {
|
|
|
336
336
|
WithBlocklikeCheck<Status, TBlocklike> MaybeReadBlockAndLoadToCache(
|
|
337
337
|
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
|
|
338
338
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
bool async_read) const;
|
|
339
|
+
bool for_compaction, CachableEntry<TBlocklike>* block_entry,
|
|
340
|
+
GetContext* get_context, BlockCacheLookupContext* lookup_context,
|
|
341
|
+
BlockContents* contents, bool async_read) const;
|
|
343
342
|
|
|
344
343
|
// Similar to the above, with one crucial difference: it will retrieve the
|
|
345
344
|
// block from the file even if there are no caches configured (assuming the
|
|
@@ -350,16 +349,14 @@ class BlockBasedTable : public TableReader {
|
|
|
350
349
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
|
351
350
|
CachableEntry<TBlocklike>* block_entry, GetContext* get_context,
|
|
352
351
|
BlockCacheLookupContext* lookup_context, bool for_compaction,
|
|
353
|
-
bool use_cache, bool
|
|
352
|
+
bool use_cache, bool async_read) const;
|
|
354
353
|
|
|
355
354
|
DECLARE_SYNC_AND_ASYNC_CONST(
|
|
356
355
|
void, RetrieveMultipleBlocks, const ReadOptions& options,
|
|
357
356
|
const MultiGetRange* batch,
|
|
358
357
|
const autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE>* handles,
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
results,
|
|
362
|
-
char* scratch, const UncompressionDict& uncompression_dict);
|
|
358
|
+
Status* statuses, CachableEntry<Block>* results, char* scratch,
|
|
359
|
+
const UncompressionDict& uncompression_dict);
|
|
363
360
|
|
|
364
361
|
// Get the iterator from the index reader.
|
|
365
362
|
//
|
|
@@ -379,6 +376,9 @@ class BlockBasedTable : public TableReader {
|
|
|
379
376
|
IndexBlockIter* input_iter, GetContext* get_context,
|
|
380
377
|
BlockCacheLookupContext* lookup_context) const;
|
|
381
378
|
|
|
379
|
+
template <typename TBlocklike>
|
|
380
|
+
Cache::Priority GetCachePriority() const;
|
|
381
|
+
|
|
382
382
|
// Read block cache from block caches (if set): block_cache.
|
|
383
383
|
// On success, Status::OK with be returned and @block will be populated with
|
|
384
384
|
// pointer to the block as well as its block handle.
|
|
@@ -387,8 +387,7 @@ class BlockBasedTable : public TableReader {
|
|
|
387
387
|
template <typename TBlocklike>
|
|
388
388
|
WithBlocklikeCheck<Status, TBlocklike> GetDataBlockFromCache(
|
|
389
389
|
const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
|
|
390
|
-
CachableEntry<TBlocklike>* block,
|
|
391
|
-
GetContext* get_context) const;
|
|
390
|
+
CachableEntry<TBlocklike>* block, GetContext* get_context) const;
|
|
392
391
|
|
|
393
392
|
// Put a maybe compressed block to the corresponding block caches.
|
|
394
393
|
// This method will perform decompression against block_contents if needed
|
|
@@ -444,7 +443,7 @@ class BlockBasedTable : public TableReader {
|
|
|
444
443
|
const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
|
|
445
444
|
bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
|
|
446
445
|
const bool prefetch_all, const bool preload_all,
|
|
447
|
-
std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer);
|
|
446
|
+
std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer, Statistics* stats);
|
|
448
447
|
Status ReadMetaIndexBlock(const ReadOptions& ro,
|
|
449
448
|
FilePrefetchBuffer* prefetch_buffer,
|
|
450
449
|
std::unique_ptr<Block>* metaindex_block,
|
|
@@ -77,15 +77,15 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
|
|
|
77
77
|
const UncompressionDict& dict = uncompression_dict.GetValue()
|
|
78
78
|
? *uncompression_dict.GetValue()
|
|
79
79
|
: UncompressionDict::GetEmptyDict();
|
|
80
|
-
s = RetrieveBlock(
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
80
|
+
s = RetrieveBlock(prefetch_buffer, ro, handle, dict,
|
|
81
|
+
&block.As<IterBlocklike>(), get_context, lookup_context,
|
|
82
|
+
for_compaction,
|
|
83
|
+
/* use_cache */ true, async_read);
|
|
84
84
|
} else {
|
|
85
85
|
s = RetrieveBlock(
|
|
86
86
|
prefetch_buffer, ro, handle, UncompressionDict::GetEmptyDict(),
|
|
87
87
|
&block.As<IterBlocklike>(), get_context, lookup_context, for_compaction,
|
|
88
|
-
/* use_cache */ true,
|
|
88
|
+
/* use_cache */ true, async_read);
|
|
89
89
|
}
|
|
90
90
|
|
|
91
91
|
if (s.IsTryAgain() && async_read) {
|