@nxtedition/rocksdb 8.2.0-alpha.1 → 8.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/binding.cc +11 -74
  2. package/binding.gyp +7 -5
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +4 -0
  4. package/deps/rocksdb/rocksdb/TARGETS +7 -0
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +43 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +8 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +1 -1
  8. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -1
  9. package/deps/rocksdb/rocksdb/cache/cache_test.cc +12 -48
  10. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +26 -18
  11. package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -62
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +119 -44
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.h +34 -29
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +3 -3
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -2
  16. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +148 -209
  17. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +118 -284
  18. package/deps/rocksdb/rocksdb/cache/lru_cache.h +23 -71
  19. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +351 -392
  20. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +5 -2
  21. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +296 -0
  22. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +52 -0
  23. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +22 -19
  24. package/deps/rocksdb/rocksdb/cache/typed_cache.h +56 -20
  25. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +4 -0
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +3 -3
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +19 -25
  29. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +216 -0
  30. package/deps/rocksdb/rocksdb/db/c.cc +90 -1
  31. package/deps/rocksdb/rocksdb/db/column_family.cc +8 -7
  32. package/deps/rocksdb/rocksdb/db/column_family.h +0 -6
  33. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +24 -7
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +18 -12
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +3 -1
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +245 -302
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -2
  40. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +5 -0
  41. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +75 -15
  42. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +2 -3
  43. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -5
  44. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +91 -1
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5 -12
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +16 -4
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +47 -24
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +4 -2
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -3
  51. package/deps/rocksdb/rocksdb/db/db_iter.cc +28 -29
  52. package/deps/rocksdb/rocksdb/db/db_iter.h +0 -3
  53. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +176 -0
  54. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +391 -2
  55. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +26 -0
  56. package/deps/rocksdb/rocksdb/db/db_write_test.cc +13 -5
  57. package/deps/rocksdb/rocksdb/db/dbformat.h +3 -1
  58. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +0 -1
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +0 -6
  60. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +3 -0
  61. package/deps/rocksdb/rocksdb/db/forward_iterator.h +1 -1
  62. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
  63. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +68 -40
  64. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +3 -3
  65. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +115 -0
  66. package/deps/rocksdb/rocksdb/db/internal_stats.cc +169 -72
  67. package/deps/rocksdb/rocksdb/db/internal_stats.h +36 -7
  68. package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
  69. package/deps/rocksdb/rocksdb/db/merge_helper.cc +4 -0
  70. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +151 -0
  71. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +47 -16
  72. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +10 -8
  73. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +91 -93
  74. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +1 -2
  75. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +1 -1
  76. package/deps/rocksdb/rocksdb/db/version_set.cc +30 -14
  77. package/deps/rocksdb/rocksdb/db/version_set.h +1 -0
  78. package/deps/rocksdb/rocksdb/db/write_stall_stats.cc +179 -0
  79. package/deps/rocksdb/rocksdb/db/write_stall_stats.h +47 -0
  80. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +109 -7
  81. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +147 -12
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +31 -0
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -0
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -1
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +42 -59
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +7 -4
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +7 -0
  88. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +6 -10
  89. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +6 -0
  90. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -0
  91. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +127 -36
  92. package/deps/rocksdb/rocksdb/env/fs_posix.cc +8 -0
  93. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +35 -0
  94. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +29 -8
  95. package/deps/rocksdb/rocksdb/file/file_util.cc +14 -10
  96. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +183 -63
  97. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +159 -66
  98. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +3 -1
  99. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +52 -5
  100. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -3
  101. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +134 -73
  102. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +46 -3
  103. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -0
  104. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +0 -6
  105. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +7 -0
  106. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -2
  107. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +6 -1
  108. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +3 -3
  109. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +18 -0
  110. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +28 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  112. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +39 -0
  113. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
  114. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +9 -1
  115. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -2
  116. package/deps/rocksdb/rocksdb/port/stack_trace.cc +17 -7
  117. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -0
  118. package/deps/rocksdb/rocksdb/src.mk +4 -0
  119. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -34
  120. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +11 -12
  121. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +5 -5
  122. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +126 -132
  123. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +16 -16
  124. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +0 -16
  125. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +1 -1
  126. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  127. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -4
  128. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
  129. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
  130. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +370 -0
  131. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +44 -0
  132. package/deps/rocksdb/rocksdb/table/get_context.cc +4 -2
  133. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +555 -267
  134. package/deps/rocksdb/rocksdb/table/merging_iterator.h +10 -5
  135. package/deps/rocksdb/rocksdb/table/table_test.cc +113 -70
  136. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +96 -0
  137. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +117 -0
  138. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +5 -3
  139. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +3 -3
  140. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +1 -1
  141. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +9 -2
  142. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -1
  143. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +11 -0
  144. package/deps/rocksdb/rocksdb.gyp +6 -7
  145. package/index.js +0 -6
  146. package/package.json +1 -1
  147. package/prebuilds/linux-x64/node.napi.node +0 -0
  148. package/deps/liburing/liburing.gyp +0 -20
  149. package/tmp/test.js +0 -7
@@ -12,8 +12,8 @@
12
12
  // NOTE: in 'main' development branch, this should be the *next*
13
13
  // minor or major version number planned for release.
14
14
  #define ROCKSDB_MAJOR 8
15
- #define ROCKSDB_MINOR 0
16
- #define ROCKSDB_PATCH 0
15
+ #define ROCKSDB_MINOR 1
16
+ #define ROCKSDB_PATCH 1
17
17
 
18
18
  // Do not use these. We made the mistake of declaring macros starting with
19
19
  // double underscore. Now we have to live with our choice. We'll deprecate these
@@ -97,15 +97,22 @@ class PinnableWideColumns {
97
97
 
98
98
  void SetPlainValue(const Slice& value);
99
99
  void SetPlainValue(const Slice& value, Cleanable* cleanable);
100
+ void SetPlainValue(PinnableSlice&& value);
101
+ void SetPlainValue(std::string&& value);
100
102
 
101
103
  Status SetWideColumnValue(const Slice& value);
102
104
  Status SetWideColumnValue(const Slice& value, Cleanable* cleanable);
105
+ Status SetWideColumnValue(PinnableSlice&& value);
106
+ Status SetWideColumnValue(std::string&& value);
103
107
 
104
108
  void Reset();
105
109
 
106
110
  private:
107
111
  void CopyValue(const Slice& value);
108
112
  void PinOrCopyValue(const Slice& value, Cleanable* cleanable);
113
+ void MoveValue(PinnableSlice&& value);
114
+ void MoveValue(std::string&& value);
115
+
109
116
  void CreateIndexForPlainValue();
110
117
  Status CreateIndexForWideColumns();
111
118
 
@@ -127,6 +134,18 @@ inline void PinnableWideColumns::PinOrCopyValue(const Slice& value,
127
134
  value_.PinSlice(value, cleanable);
128
135
  }
129
136
 
137
+ inline void PinnableWideColumns::MoveValue(PinnableSlice&& value) {
138
+ value_ = std::move(value);
139
+ }
140
+
141
+ inline void PinnableWideColumns::MoveValue(std::string&& value) {
142
+ std::string* const buf = value_.GetSelf();
143
+ assert(buf);
144
+
145
+ *buf = std::move(value);
146
+ value_.PinSelf();
147
+ }
148
+
130
149
  inline void PinnableWideColumns::CreateIndexForPlainValue() {
131
150
  columns_ = WideColumns{{kDefaultWideColumnName, value_}};
132
151
  }
@@ -142,6 +161,16 @@ inline void PinnableWideColumns::SetPlainValue(const Slice& value,
142
161
  CreateIndexForPlainValue();
143
162
  }
144
163
 
164
+ inline void PinnableWideColumns::SetPlainValue(PinnableSlice&& value) {
165
+ MoveValue(std::move(value));
166
+ CreateIndexForPlainValue();
167
+ }
168
+
169
+ inline void PinnableWideColumns::SetPlainValue(std::string&& value) {
170
+ MoveValue(std::move(value));
171
+ CreateIndexForPlainValue();
172
+ }
173
+
145
174
  inline Status PinnableWideColumns::SetWideColumnValue(const Slice& value) {
146
175
  CopyValue(value);
147
176
  return CreateIndexForWideColumns();
@@ -153,6 +182,16 @@ inline Status PinnableWideColumns::SetWideColumnValue(const Slice& value,
153
182
  return CreateIndexForWideColumns();
154
183
  }
155
184
 
185
+ inline Status PinnableWideColumns::SetWideColumnValue(PinnableSlice&& value) {
186
+ MoveValue(std::move(value));
187
+ return CreateIndexForWideColumns();
188
+ }
189
+
190
+ inline Status PinnableWideColumns::SetWideColumnValue(std::string&& value) {
191
+ MoveValue(std::move(value));
192
+ return CreateIndexForWideColumns();
193
+ }
194
+
156
195
  inline void PinnableWideColumns::Reset() {
157
196
  value_.Reset();
158
197
  columns_.clear();
@@ -69,6 +69,7 @@ PerfContext::PerfContext(const PerfContext& other) {
69
69
  internal_delete_skipped_count = other.internal_delete_skipped_count;
70
70
  internal_recent_skipped_count = other.internal_recent_skipped_count;
71
71
  internal_merge_count = other.internal_merge_count;
72
+ internal_merge_point_lookup_count = other.internal_merge_point_lookup_count;
72
73
  internal_range_del_reseek_count = other.internal_range_del_reseek_count;
73
74
  write_wal_time = other.write_wal_time;
74
75
  get_snapshot_time = other.get_snapshot_time;
@@ -188,6 +189,7 @@ PerfContext::PerfContext(PerfContext&& other) noexcept {
188
189
  internal_delete_skipped_count = other.internal_delete_skipped_count;
189
190
  internal_recent_skipped_count = other.internal_recent_skipped_count;
190
191
  internal_merge_count = other.internal_merge_count;
192
+ internal_merge_point_lookup_count = other.internal_merge_point_lookup_count;
191
193
  internal_range_del_reseek_count = other.internal_range_del_reseek_count;
192
194
  write_wal_time = other.write_wal_time;
193
195
  get_snapshot_time = other.get_snapshot_time;
@@ -309,6 +311,7 @@ PerfContext& PerfContext::operator=(const PerfContext& other) {
309
311
  internal_delete_skipped_count = other.internal_delete_skipped_count;
310
312
  internal_recent_skipped_count = other.internal_recent_skipped_count;
311
313
  internal_merge_count = other.internal_merge_count;
314
+ internal_merge_point_lookup_count = other.internal_merge_point_lookup_count;
312
315
  internal_range_del_reseek_count = other.internal_range_del_reseek_count;
313
316
  write_wal_time = other.write_wal_time;
314
317
  get_snapshot_time = other.get_snapshot_time;
@@ -422,6 +425,7 @@ void PerfContext::Reset() {
422
425
  internal_delete_skipped_count = 0;
423
426
  internal_recent_skipped_count = 0;
424
427
  internal_merge_count = 0;
428
+ internal_merge_point_lookup_count = 0;
425
429
  internal_range_del_reseek_count = 0;
426
430
  write_wal_time = 0;
427
431
 
@@ -556,6 +560,7 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const {
556
560
  PERF_CONTEXT_OUTPUT(internal_delete_skipped_count);
557
561
  PERF_CONTEXT_OUTPUT(internal_recent_skipped_count);
558
562
  PERF_CONTEXT_OUTPUT(internal_merge_count);
563
+ PERF_CONTEXT_OUTPUT(internal_merge_point_lookup_count);
559
564
  PERF_CONTEXT_OUTPUT(internal_range_del_reseek_count);
560
565
  PERF_CONTEXT_OUTPUT(write_wal_time);
561
566
  PERF_CONTEXT_OUTPUT(get_snapshot_time);
@@ -213,7 +213,13 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
213
213
  {BLOB_DB_CACHE_BYTES_READ, "rocksdb.blobdb.cache.bytes.read"},
214
214
  {BLOB_DB_CACHE_BYTES_WRITE, "rocksdb.blobdb.cache.bytes.write"},
215
215
  {READ_ASYNC_MICROS, "rocksdb.read.async.micros"},
216
- {ASYNC_READ_ERROR_COUNT, "rocksdb.async.read.error.count"}};
216
+ {ASYNC_READ_ERROR_COUNT, "rocksdb.async.read.error.count"},
217
+ {SECONDARY_CACHE_FILTER_HITS, "rocksdb.secondary.cache.filter.hits"},
218
+ {SECONDARY_CACHE_INDEX_HITS, "rocksdb.secondary.cache.index.hits"},
219
+ {SECONDARY_CACHE_DATA_HITS, "rocksdb.secondary.cache.data.hits"},
220
+ {TABLE_OPEN_PREFETCH_TAIL_MISS, "rocksdb.table.open.prefetch.tail.miss"},
221
+ {TABLE_OPEN_PREFETCH_TAIL_HIT, "rocksdb.table.open.prefetch.tail.hit"},
222
+ };
217
223
 
218
224
  const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
219
225
  {DB_GET, "rocksdb.db.get.micros"},
@@ -269,6 +275,8 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
269
275
  {MULTIGET_IO_BATCH_SIZE, "rocksdb.multiget.io.batch.size"},
270
276
  {NUM_LEVEL_READ_PER_MULTIGET, "rocksdb.num.level.read.per.multiget"},
271
277
  {ASYNC_PREFETCH_ABORT_MICROS, "rocksdb.async.prefetch.abort.micros"},
278
+ {TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
279
+ "rocksdb.table.open.prefetch.tail.read.bytes"},
272
280
  };
273
281
 
274
282
  std::shared_ptr<Statistics> CreateDBStatistics() {
@@ -1236,8 +1236,8 @@ class TestSecondaryCache : public SecondaryCache {
1236
1236
  std::unique_ptr<SecondaryCacheResultHandle> Lookup(
1237
1237
  const Slice& /*key*/, const Cache::CacheItemHelper* /*helper*/,
1238
1238
  Cache::CreateContext* /*create_context*/, bool /*wait*/,
1239
- bool /*advise_erase*/, bool& is_in_sec_cache) override {
1240
- is_in_sec_cache = true;
1239
+ bool /*advise_erase*/, bool& kept_in_sec_cache) override {
1240
+ kept_in_sec_cache = true;
1241
1241
  return nullptr;
1242
1242
  }
1243
1243
 
@@ -141,20 +141,30 @@ void PrintStack(void* frames[], int num_frames) {
141
141
  }
142
142
 
143
143
  void PrintStack(int first_frames_to_skip) {
144
- #if defined(ROCKSDB_DLL) && defined(OS_LINUX)
145
- // LIB_MODE=shared build produces mediocre information from the above
146
- // backtrace+addr2line stack trace method. Try to use GDB in that case, but
147
- // only on Linux where we know how to attach to a particular thread.
148
- bool linux_dll = true;
144
+ // Default to getting stack traces with GDB, at least on Linux where we
145
+ // know how to attach to a particular thread.
146
+ //
147
+ // * Address space layout randomization (ASLR) interferes with getting good
148
+ // stack information from backtrace+addr2line. This is more likely to show
149
+ // up with LIB_MODE=shared builds (when kernel.randomize_va_space >= 1)
150
+ // but can also show up with LIB_MODE=static builds ((when
151
+ // kernel.randomize_va_space == 2).
152
+ // * It doesn't appear easy to detect when ASLR is in use.
153
+ // * With DEBUG_LEVEL < 2, backtrace() can skip frames that are not skipped
154
+ // in GDB.
155
+ #if defined(OS_LINUX)
156
+ // Default true, override with ROCKSDB_BACKTRACE_STACK=1
157
+ bool gdb_stack_trace = getenv("ROCKSDB_BACKTRACE_STACK") == nullptr;
149
158
  #else
150
- bool linux_dll = false;
159
+ // Default false, override with ROCKSDB_GDB_STACK=1
160
+ bool gdb_stack_trace = getenv("ROCKSDB_GDB_STACK") != nullptr;
151
161
  #endif
152
162
  // Also support invoking interactive debugger on stack trace, with this
153
163
  // envvar set to non-empty
154
164
  char* debug_env = getenv("ROCKSDB_DEBUG");
155
165
  bool debug = debug_env != nullptr && strlen(debug_env) > 0;
156
166
 
157
- if (linux_dll || debug) {
167
+ if (gdb_stack_trace || debug) {
158
168
  // Allow ouside debugger to attach, even with Yama security restrictions
159
169
  #ifdef PR_SET_PTRACER_ANY
160
170
  (void)prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
@@ -227,6 +227,7 @@ class WinFileSystem : public FileSystem {
227
227
  const FileOptions& file_options) const override;
228
228
  FileOptions OptimizeForManifestWrite(
229
229
  const FileOptions& file_options) const override;
230
+ bool use_async_io() override { return false; }
230
231
 
231
232
  protected:
232
233
  static uint64_t FileTimeToUnixTime(const FILETIME& ftTime);
@@ -10,6 +10,7 @@ LIB_SOURCES = \
10
10
  cache/lru_cache.cc \
11
11
  cache/compressed_secondary_cache.cc \
12
12
  cache/secondary_cache.cc \
13
+ cache/secondary_cache_adapter.cc \
13
14
  cache/sharded_cache.cc \
14
15
  db/arena_wrapped_db_iter.cc \
15
16
  db/blob/blob_contents.cc \
@@ -96,6 +97,7 @@ LIB_SOURCES = \
96
97
  db/write_batch.cc \
97
98
  db/write_batch_base.cc \
98
99
  db/write_controller.cc \
100
+ db/write_stall_stats.cc \
99
101
  db/write_thread.cc \
100
102
  env/composite_env.cc \
101
103
  env/env.cc \
@@ -198,6 +200,7 @@ LIB_SOURCES = \
198
200
  table/get_context.cc \
199
201
  table/iterator.cc \
200
202
  table/merging_iterator.cc \
203
+ table/compaction_merging_iterator.cc \
201
204
  table/meta_blocks.cc \
202
205
  table/persistent_cache_helper.cc \
203
206
  table/plain/plain_table_bloom.cc \
@@ -381,6 +384,7 @@ TEST_LIB_SOURCES = \
381
384
  db/db_test_util.cc \
382
385
  db/db_with_timestamp_test_util.cc \
383
386
  test_util/mock_time_env.cc \
387
+ test_util/secondary_cache_test_util.cc \
384
388
  test_util/testharness.cc \
385
389
  test_util/testutil.cc \
386
390
  utilities/agg_merge/test_agg_merge.cc \
@@ -94,7 +94,7 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) {
94
94
  const BlockHandle& handle, const UncompressionDict& uncompression_dict, \
95
95
  CachableEntry<T>* out_parsed_block, GetContext* get_context, \
96
96
  BlockCacheLookupContext* lookup_context, bool for_compaction, \
97
- bool use_cache, bool wait_for_cache, bool async_read) const;
97
+ bool use_cache, bool async_read) const;
98
98
 
99
99
  INSTANTIATE_RETRIEVE_BLOCK(ParsedFullFilterBlock);
100
100
  INSTANTIATE_RETRIEVE_BLOCK(UncompressionDict);
@@ -591,7 +591,7 @@ Status BlockBasedTable::Open(
591
591
  if (!ioptions.allow_mmap_reads) {
592
592
  s = PrefetchTail(ro, file.get(), file_size, force_direct_prefetch,
593
593
  tail_prefetch_stats, prefetch_all, preload_all,
594
- &prefetch_buffer);
594
+ &prefetch_buffer, ioptions.stats);
595
595
  // Return error in prefetch path to users.
596
596
  if (!s.ok()) {
597
597
  return s;
@@ -802,7 +802,7 @@ Status BlockBasedTable::PrefetchTail(
802
802
  const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
803
803
  bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
804
804
  const bool prefetch_all, const bool preload_all,
805
- std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer) {
805
+ std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer, Statistics* stats) {
806
806
  size_t tail_prefetch_size = 0;
807
807
  if (tail_prefetch_stats != nullptr) {
808
808
  // Multiple threads may get a 0 (no history) when running in parallel,
@@ -842,9 +842,12 @@ Status BlockBasedTable::PrefetchTail(
842
842
  }
843
843
 
844
844
  // Use `FilePrefetchBuffer`
845
- prefetch_buffer->reset(
846
- new FilePrefetchBuffer(0 /* readahead_size */, 0 /* max_readahead_size */,
847
- true /* enable */, true /* track_min_offset */));
845
+ prefetch_buffer->reset(new FilePrefetchBuffer(
846
+ 0 /* readahead_size */, 0 /* max_readahead_size */, true /* enable */,
847
+ true /* track_min_offset */, false /* implicit_auto_readahead */,
848
+ 0 /* num_file_reads */, 0 /* num_file_reads_for_auto_readahead */,
849
+ nullptr /* fs */, nullptr /* clock */, stats,
850
+ FilePrefetchBufferUsage::kTableOpenPrefetchTail));
848
851
 
849
852
  IOOptions opts;
850
853
  Status s = file->PrepareIOOptions(ro, opts);
@@ -1251,24 +1254,31 @@ Status BlockBasedTable::ReadMetaIndexBlock(
1251
1254
  }
1252
1255
 
1253
1256
  template <typename TBlocklike>
1254
- WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::GetDataBlockFromCache(
1255
- const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
1256
- CachableEntry<TBlocklike>* out_parsed_block, const bool wait,
1257
- GetContext* get_context) const {
1258
- assert(out_parsed_block);
1259
- assert(out_parsed_block->IsEmpty());
1257
+ Cache::Priority BlockBasedTable::GetCachePriority() const {
1260
1258
  // Here we treat the legacy name "...index_and_filter_blocks..." to mean all
1261
1259
  // metadata blocks that might go into block cache, EXCEPT only those needed
1262
1260
  // for the read path (Get, etc.). TableProperties should not be needed on the
1263
1261
  // read path (prefix extractor setting is an O(1) size special case that we
1264
1262
  // are working not to require from TableProperties), so it is not given
1265
1263
  // high-priority treatment if it should go into BlockCache.
1266
- const Cache::Priority priority =
1267
- rep_->table_options.cache_index_and_filter_blocks_with_high_priority &&
1268
- TBlocklike::kBlockType != BlockType::kData &&
1269
- TBlocklike::kBlockType != BlockType::kProperties
1270
- ? Cache::Priority::HIGH
1271
- : Cache::Priority::LOW;
1264
+ if constexpr (TBlocklike::kBlockType == BlockType::kData ||
1265
+ TBlocklike::kBlockType == BlockType::kProperties) {
1266
+ return Cache::Priority::LOW;
1267
+ } else if (rep_->table_options
1268
+ .cache_index_and_filter_blocks_with_high_priority) {
1269
+ return Cache::Priority::HIGH;
1270
+ } else {
1271
+ return Cache::Priority::LOW;
1272
+ }
1273
+ }
1274
+
1275
+ template <typename TBlocklike>
1276
+ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::GetDataBlockFromCache(
1277
+ const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
1278
+ CachableEntry<TBlocklike>* out_parsed_block,
1279
+ GetContext* get_context) const {
1280
+ assert(out_parsed_block);
1281
+ assert(out_parsed_block->IsEmpty());
1272
1282
 
1273
1283
  Status s;
1274
1284
  Statistics* statistics = rep_->ioptions.statistics.get();
@@ -1277,8 +1287,8 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::GetDataBlockFromCache(
1277
1287
  if (block_cache) {
1278
1288
  assert(!cache_key.empty());
1279
1289
  auto cache_handle = block_cache.LookupFull(
1280
- cache_key, &rep_->create_context, priority, wait, statistics,
1281
- rep_->ioptions.lowest_used_cache_tier);
1290
+ cache_key, &rep_->create_context, GetCachePriority<TBlocklike>(),
1291
+ statistics, rep_->ioptions.lowest_used_cache_tier);
1282
1292
 
1283
1293
  // Avoid updating metrics here if the handle is not complete yet. This
1284
1294
  // happens with MultiGet and secondary cache. So update the metrics only
@@ -1311,11 +1321,6 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::PutDataBlockToCache(
1311
1321
  MemoryAllocator* memory_allocator, GetContext* get_context) const {
1312
1322
  const ImmutableOptions& ioptions = rep_->ioptions;
1313
1323
  const uint32_t format_version = rep_->table_options.format_version;
1314
- const Cache::Priority priority =
1315
- rep_->table_options.cache_index_and_filter_blocks_with_high_priority &&
1316
- TBlocklike::kBlockType != BlockType::kData
1317
- ? Cache::Priority::HIGH
1318
- : Cache::Priority::LOW;
1319
1324
  assert(out_parsed_block);
1320
1325
  assert(out_parsed_block->IsEmpty());
1321
1326
 
@@ -1346,7 +1351,7 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::PutDataBlockToCache(
1346
1351
  size_t charge = block_holder->ApproximateMemoryUsage();
1347
1352
  BlockCacheTypedHandle<TBlocklike>* cache_handle = nullptr;
1348
1353
  s = block_cache.InsertFull(cache_key, block_holder.get(), charge,
1349
- &cache_handle, priority,
1354
+ &cache_handle, GetCachePriority<TBlocklike>(),
1350
1355
  rep_->ioptions.lowest_used_cache_tier);
1351
1356
 
1352
1357
  if (s.ok()) {
@@ -1443,10 +1448,9 @@ WithBlocklikeCheck<Status, TBlocklike>
1443
1448
  BlockBasedTable::MaybeReadBlockAndLoadToCache(
1444
1449
  FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
1445
1450
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1446
- const bool wait, const bool for_compaction,
1447
- CachableEntry<TBlocklike>* out_parsed_block, GetContext* get_context,
1448
- BlockCacheLookupContext* lookup_context, BlockContents* contents,
1449
- bool async_read) const {
1451
+ bool for_compaction, CachableEntry<TBlocklike>* out_parsed_block,
1452
+ GetContext* get_context, BlockCacheLookupContext* lookup_context,
1453
+ BlockContents* contents, bool async_read) const {
1450
1454
  assert(out_parsed_block != nullptr);
1451
1455
  const bool no_io = (ro.read_tier == kBlockCacheTier);
1452
1456
  BlockCacheInterface<TBlocklike> block_cache{
@@ -1465,7 +1469,7 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache(
1465
1469
  key = key_data.AsSlice();
1466
1470
 
1467
1471
  if (!contents) {
1468
- s = GetDataBlockFromCache(key, block_cache, out_parsed_block, wait,
1472
+ s = GetDataBlockFromCache(key, block_cache, out_parsed_block,
1469
1473
  get_context);
1470
1474
  // Value could still be null at this point, so check the cache handle
1471
1475
  // and update the read pattern for prefetching
@@ -1626,15 +1630,15 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::RetrieveBlock(
1626
1630
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1627
1631
  CachableEntry<TBlocklike>* out_parsed_block, GetContext* get_context,
1628
1632
  BlockCacheLookupContext* lookup_context, bool for_compaction,
1629
- bool use_cache, bool wait_for_cache, bool async_read) const {
1633
+ bool use_cache, bool async_read) const {
1630
1634
  assert(out_parsed_block);
1631
1635
  assert(out_parsed_block->IsEmpty());
1632
1636
 
1633
1637
  Status s;
1634
1638
  if (use_cache) {
1635
1639
  s = MaybeReadBlockAndLoadToCache(
1636
- prefetch_buffer, ro, handle, uncompression_dict, wait_for_cache,
1637
- for_compaction, out_parsed_block, get_context, lookup_context,
1640
+ prefetch_buffer, ro, handle, uncompression_dict, for_compaction,
1641
+ out_parsed_block, get_context, lookup_context,
1638
1642
  /*contents=*/nullptr, async_read);
1639
1643
 
1640
1644
  if (!s.ok()) {
@@ -336,10 +336,9 @@ class BlockBasedTable : public TableReader {
336
336
  WithBlocklikeCheck<Status, TBlocklike> MaybeReadBlockAndLoadToCache(
337
337
  FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
338
338
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
339
- const bool wait, const bool for_compaction,
340
- CachableEntry<TBlocklike>* block_entry, GetContext* get_context,
341
- BlockCacheLookupContext* lookup_context, BlockContents* contents,
342
- bool async_read) const;
339
+ bool for_compaction, CachableEntry<TBlocklike>* block_entry,
340
+ GetContext* get_context, BlockCacheLookupContext* lookup_context,
341
+ BlockContents* contents, bool async_read) const;
343
342
 
344
343
  // Similar to the above, with one crucial difference: it will retrieve the
345
344
  // block from the file even if there are no caches configured (assuming the
@@ -350,16 +349,14 @@ class BlockBasedTable : public TableReader {
350
349
  const BlockHandle& handle, const UncompressionDict& uncompression_dict,
351
350
  CachableEntry<TBlocklike>* block_entry, GetContext* get_context,
352
351
  BlockCacheLookupContext* lookup_context, bool for_compaction,
353
- bool use_cache, bool wait_for_cache, bool async_read) const;
352
+ bool use_cache, bool async_read) const;
354
353
 
355
354
  DECLARE_SYNC_AND_ASYNC_CONST(
356
355
  void, RetrieveMultipleBlocks, const ReadOptions& options,
357
356
  const MultiGetRange* batch,
358
357
  const autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE>* handles,
359
- autovector<Status, MultiGetContext::MAX_BATCH_SIZE>* statuses,
360
- autovector<CachableEntry<Block>, MultiGetContext::MAX_BATCH_SIZE>*
361
- results,
362
- char* scratch, const UncompressionDict& uncompression_dict);
358
+ Status* statuses, CachableEntry<Block>* results, char* scratch,
359
+ const UncompressionDict& uncompression_dict);
363
360
 
364
361
  // Get the iterator from the index reader.
365
362
  //
@@ -379,6 +376,9 @@ class BlockBasedTable : public TableReader {
379
376
  IndexBlockIter* input_iter, GetContext* get_context,
380
377
  BlockCacheLookupContext* lookup_context) const;
381
378
 
379
+ template <typename TBlocklike>
380
+ Cache::Priority GetCachePriority() const;
381
+
382
382
  // Read block cache from block caches (if set): block_cache.
383
383
  // On success, Status::OK with be returned and @block will be populated with
384
384
  // pointer to the block as well as its block handle.
@@ -387,8 +387,7 @@ class BlockBasedTable : public TableReader {
387
387
  template <typename TBlocklike>
388
388
  WithBlocklikeCheck<Status, TBlocklike> GetDataBlockFromCache(
389
389
  const Slice& cache_key, BlockCacheInterface<TBlocklike> block_cache,
390
- CachableEntry<TBlocklike>* block, const bool wait,
391
- GetContext* get_context) const;
390
+ CachableEntry<TBlocklike>* block, GetContext* get_context) const;
392
391
 
393
392
  // Put a maybe compressed block to the corresponding block caches.
394
393
  // This method will perform decompression against block_contents if needed
@@ -444,7 +443,7 @@ class BlockBasedTable : public TableReader {
444
443
  const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
445
444
  bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
446
445
  const bool prefetch_all, const bool preload_all,
447
- std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer);
446
+ std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer, Statistics* stats);
448
447
  Status ReadMetaIndexBlock(const ReadOptions& ro,
449
448
  FilePrefetchBuffer* prefetch_buffer,
450
449
  std::unique_ptr<Block>* metaindex_block,
@@ -77,15 +77,15 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
77
77
  const UncompressionDict& dict = uncompression_dict.GetValue()
78
78
  ? *uncompression_dict.GetValue()
79
79
  : UncompressionDict::GetEmptyDict();
80
- s = RetrieveBlock(
81
- prefetch_buffer, ro, handle, dict, &block.As<IterBlocklike>(),
82
- get_context, lookup_context, for_compaction,
83
- /* use_cache */ true, /* wait_for_cache */ true, async_read);
80
+ s = RetrieveBlock(prefetch_buffer, ro, handle, dict,
81
+ &block.As<IterBlocklike>(), get_context, lookup_context,
82
+ for_compaction,
83
+ /* use_cache */ true, async_read);
84
84
  } else {
85
85
  s = RetrieveBlock(
86
86
  prefetch_buffer, ro, handle, UncompressionDict::GetEmptyDict(),
87
87
  &block.As<IterBlocklike>(), get_context, lookup_context, for_compaction,
88
- /* use_cache */ true, /* wait_for_cache */ true, async_read);
88
+ /* use_cache */ true, async_read);
89
89
  }
90
90
 
91
91
  if (s.IsTryAgain() && async_read) {