@nxtedition/rocksdb 8.1.3 → 8.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +13 -1
  2. package/deps/rocksdb/rocksdb/Makefile +2 -2
  3. package/deps/rocksdb/rocksdb/TARGETS +4 -2
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +32 -35
  5. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +0 -30
  6. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +0 -83
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +13 -14
  8. package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +40 -0
  9. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +14 -20
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +8 -9
  11. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +5 -4
  12. package/deps/rocksdb/rocksdb/cache/cache_test.cc +124 -156
  13. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +10 -26
  14. package/deps/rocksdb/rocksdb/cache/charged_cache.h +11 -16
  15. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +35 -32
  16. package/deps/rocksdb/rocksdb/cache/clock_cache.h +19 -21
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +42 -30
  18. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -8
  19. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +91 -143
  20. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +54 -60
  21. package/deps/rocksdb/rocksdb/cache/lru_cache.h +37 -63
  22. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +120 -106
  23. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +14 -5
  24. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -31
  25. package/deps/rocksdb/rocksdb/cache/typed_cache.h +339 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +0 -48
  27. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +18 -15
  28. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +0 -11
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +5 -26
  30. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +7 -8
  31. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +6 -3
  32. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -7
  33. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +19 -47
  34. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -5
  35. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +15 -22
  36. package/deps/rocksdb/rocksdb/db/builder.cc +17 -12
  37. package/deps/rocksdb/rocksdb/db/column_family.cc +0 -1
  38. package/deps/rocksdb/rocksdb/db/column_family.h +0 -6
  39. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +0 -5
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +3 -0
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +0 -2
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +28 -27
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -17
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1 -0
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +254 -139
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -5
  47. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +0 -5
  48. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +98 -9
  49. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -28
  50. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +125 -0
  51. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +65 -4
  52. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +1 -1
  53. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +27 -15
  54. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +78 -49
  55. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +34 -24
  56. package/deps/rocksdb/rocksdb/db/db_iter.cc +8 -2
  57. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  58. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +155 -0
  59. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +12 -12
  60. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +117 -210
  61. package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -10
  62. package/deps/rocksdb/rocksdb/db/db_test_util.h +36 -24
  63. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +28 -0
  64. package/deps/rocksdb/rocksdb/db/flush_job.cc +6 -6
  65. package/deps/rocksdb/rocksdb/db/flush_job.h +3 -2
  66. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +29 -29
  67. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +0 -4
  68. package/deps/rocksdb/rocksdb/db/internal_stats.cc +11 -11
  69. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -2
  70. package/deps/rocksdb/rocksdb/db/log_reader.cc +8 -6
  71. package/deps/rocksdb/rocksdb/db/log_test.cc +35 -2
  72. package/deps/rocksdb/rocksdb/db/memtable.cc +30 -5
  73. package/deps/rocksdb/rocksdb/db/merge_helper.cc +47 -33
  74. package/deps/rocksdb/rocksdb/db/merge_helper.h +14 -6
  75. package/deps/rocksdb/rocksdb/db/table_cache.cc +41 -91
  76. package/deps/rocksdb/rocksdb/db/table_cache.h +17 -19
  77. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -9
  78. package/deps/rocksdb/rocksdb/db/version_builder.cc +12 -9
  79. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -0
  80. package/deps/rocksdb/rocksdb/db/version_set.cc +20 -28
  81. package/deps/rocksdb/rocksdb/db/version_set.h +2 -2
  82. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -1
  83. package/deps/rocksdb/rocksdb/db/write_batch.cc +4 -1
  84. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -0
  85. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +358 -214
  86. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +137 -135
  87. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +21 -0
  88. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +8 -6
  89. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  90. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +9 -0
  91. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -3
  92. package/deps/rocksdb/rocksdb/port/port_posix.h +2 -0
  93. package/{prebuilds → deps/rocksdb/rocksdb/prebuilds}/linux-x64/node.napi.node +0 -0
  94. package/deps/rocksdb/rocksdb/src.mk +2 -1
  95. package/deps/rocksdb/rocksdb/table/block_based/block.h +3 -0
  96. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +25 -67
  97. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +3 -3
  98. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +18 -13
  99. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +156 -223
  100. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +31 -50
  101. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +46 -18
  102. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +3 -3
  103. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +96 -0
  104. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +132 -0
  105. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +28 -0
  106. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -5
  107. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +1 -4
  108. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -7
  109. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -1
  110. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +6 -1
  111. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +19 -18
  112. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +9 -5
  113. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +3 -1
  114. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
  115. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -2
  116. package/deps/rocksdb/rocksdb/table/format.h +1 -1
  117. package/deps/rocksdb/rocksdb/table/get_context.cc +12 -3
  118. package/deps/rocksdb/rocksdb/table/internal_iterator.h +0 -2
  119. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +92 -7
  120. package/deps/rocksdb/rocksdb/table/merging_iterator.h +0 -80
  121. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +66 -1
  122. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +9 -2
  123. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +5 -0
  124. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +1 -1
  125. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +20 -12
  126. package/deps/rocksdb/rocksdb/util/compression.cc +2 -2
  127. package/deps/rocksdb/rocksdb/util/compression.h +11 -2
  128. package/deps/rocksdb/rocksdb/util/xxhash.h +1901 -887
  129. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +35 -57
  130. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +4 -5
  131. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +11 -6
  132. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +6 -5
  133. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +0 -1
  134. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +10 -11
  135. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +31 -31
  136. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  137. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +52 -0
  138. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +1 -0
  139. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +12 -3
  140. package/deps/rocksdb/rocksdb.gyp +0 -3
  141. package/index.js +2 -2
  142. package/package.json +1 -1
  143. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  144. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -182
  145. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +0 -142
  146. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +0 -241
@@ -56,13 +56,12 @@ MergeHelper::MergeHelper(Env* env, const Comparator* user_comparator,
56
56
  }
57
57
  }
58
58
 
59
- Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
60
- const Slice& key, const Slice* value,
61
- const std::vector<Slice>& operands,
62
- std::string* result, Logger* logger,
63
- Statistics* statistics, SystemClock* clock,
64
- Slice* result_operand,
65
- bool update_num_ops_stats) {
59
+ Status MergeHelper::TimedFullMerge(
60
+ const MergeOperator* merge_operator, const Slice& key, const Slice* value,
61
+ const std::vector<Slice>& operands, std::string* result, Logger* logger,
62
+ Statistics* statistics, SystemClock* clock, Slice* result_operand,
63
+ bool update_num_ops_stats,
64
+ MergeOperator::OpFailureScope* op_failure_scope) {
66
65
  assert(merge_operator != nullptr);
67
66
 
68
67
  if (operands.empty()) {
@@ -104,6 +103,14 @@ Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
104
103
  statistics ? timer.ElapsedNanos() : 0);
105
104
  }
106
105
 
106
+ if (op_failure_scope != nullptr) {
107
+ *op_failure_scope = merge_out.op_failure_scope;
108
+ // Apply default per merge_operator.h
109
+ if (*op_failure_scope == MergeOperator::OpFailureScope::kDefault) {
110
+ *op_failure_scope = MergeOperator::OpFailureScope::kTryMerge;
111
+ }
112
+ }
113
+
107
114
  if (!success) {
108
115
  RecordTick(statistics, NUMBER_MERGE_FAILURES);
109
116
  return Status::Corruption("Error: Could not perform merge.");
@@ -115,7 +122,8 @@ Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
115
122
  Status MergeHelper::TimedFullMergeWithEntity(
116
123
  const MergeOperator* merge_operator, const Slice& key, Slice base_entity,
117
124
  const std::vector<Slice>& operands, std::string* result, Logger* logger,
118
- Statistics* statistics, SystemClock* clock, bool update_num_ops_stats) {
125
+ Statistics* statistics, SystemClock* clock, bool update_num_ops_stats,
126
+ MergeOperator::OpFailureScope* op_failure_scope) {
119
127
  WideColumns base_columns;
120
128
 
121
129
  {
@@ -137,11 +145,10 @@ Status MergeHelper::TimedFullMergeWithEntity(
137
145
  std::string merge_result;
138
146
 
139
147
  {
140
- constexpr Slice* result_operand = nullptr;
141
-
142
- const Status s = TimedFullMerge(
143
- merge_operator, key, &value_of_default, operands, &merge_result, logger,
144
- statistics, clock, result_operand, update_num_ops_stats);
148
+ const Status s = TimedFullMerge(merge_operator, key, &value_of_default,
149
+ operands, &merge_result, logger, statistics,
150
+ clock, nullptr /* result_operand */,
151
+ update_num_ops_stats, op_failure_scope);
145
152
  if (!s.ok()) {
146
153
  return s;
147
154
  }
@@ -224,10 +231,6 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
224
231
  s = Status::ShutdownInProgress();
225
232
  return s;
226
233
  }
227
- // Skip range tombstones emitted by the compaction iterator.
228
- if (iter->IsDeleteRangeSentinelKey()) {
229
- continue;
230
- }
231
234
 
232
235
  ParsedInternalKey ikey;
233
236
  assert(keys_.size() == merge_context_.GetNumOperands());
@@ -290,11 +293,10 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
290
293
  return s;
291
294
  }
292
295
 
293
- // TODO(noetzli) If the merge operator returns false, we are currently
294
- // (almost) silently dropping the put/delete. That's probably not what we
295
- // want. Also if we're in compaction and it's a put, it would be nice to
296
- // run compaction filter on it.
296
+ // TODO: if we're in compaction and it's a put, it would be nice to run
297
+ // compaction filter on it.
297
298
  std::string merge_result;
299
+ MergeOperator::OpFailureScope op_failure_scope;
298
300
 
299
301
  if (range_del_agg &&
300
302
  range_del_agg->ShouldDelete(
@@ -303,7 +305,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
303
305
  merge_context_.GetOperands(), &merge_result, logger_,
304
306
  stats_, clock_,
305
307
  /* result_operand */ nullptr,
306
- /* update_num_ops_stats */ false);
308
+ /* update_num_ops_stats */ false, &op_failure_scope);
307
309
  } else if (ikey.type == kTypeValue) {
308
310
  const Slice val = iter->value();
309
311
 
@@ -311,7 +313,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
311
313
  merge_context_.GetOperands(), &merge_result, logger_,
312
314
  stats_, clock_,
313
315
  /* result_operand */ nullptr,
314
- /* update_num_ops_stats */ false);
316
+ /* update_num_ops_stats */ false, &op_failure_scope);
315
317
  } else if (ikey.type == kTypeBlobIndex) {
316
318
  BlobIndex blob_index;
317
319
 
@@ -345,18 +347,18 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
345
347
  merge_context_.GetOperands(), &merge_result, logger_,
346
348
  stats_, clock_,
347
349
  /* result_operand */ nullptr,
348
- /* update_num_ops_stats */ false);
350
+ /* update_num_ops_stats */ false, &op_failure_scope);
349
351
  } else if (ikey.type == kTypeWideColumnEntity) {
350
352
  s = TimedFullMergeWithEntity(
351
353
  user_merge_operator_, ikey.user_key, iter->value(),
352
354
  merge_context_.GetOperands(), &merge_result, logger_, stats_,
353
- clock_, /* update_num_ops_stats */ false);
355
+ clock_, /* update_num_ops_stats */ false, &op_failure_scope);
354
356
  } else {
355
357
  s = TimedFullMerge(user_merge_operator_, ikey.user_key, nullptr,
356
358
  merge_context_.GetOperands(), &merge_result, logger_,
357
359
  stats_, clock_,
358
360
  /* result_operand */ nullptr,
359
- /* update_num_ops_stats */ false);
361
+ /* update_num_ops_stats */ false, &op_failure_scope);
360
362
  }
361
363
 
362
364
  // We store the result in keys_.back() and operands_.back()
@@ -372,10 +374,16 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
372
374
  merge_context_.Clear();
373
375
  keys_.emplace_front(std::move(original_key));
374
376
  merge_context_.PushOperand(merge_result);
375
- }
376
377
 
377
- // move iter to the next entry
378
- iter->Next();
378
+ // move iter to the next entry
379
+ iter->Next();
380
+ } else if (op_failure_scope ==
381
+ MergeOperator::OpFailureScope::kMustMerge) {
382
+ // Change to `Status::MergeInProgress()` to denote output consists of
383
+ // merge operands only. Leave `iter` at the non-merge entry so it will
384
+ // be output after.
385
+ s = Status::MergeInProgress();
386
+ }
379
387
  return s;
380
388
  } else {
381
389
  // hit a merge
@@ -486,10 +494,12 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
486
494
  assert(merge_context_.GetNumOperands() >= 1);
487
495
  assert(merge_context_.GetNumOperands() == keys_.size());
488
496
  std::string merge_result;
489
- s = TimedFullMerge(
490
- user_merge_operator_, orig_ikey.user_key, nullptr,
491
- merge_context_.GetOperands(), &merge_result, logger_, stats_, clock_,
492
- /* result_operand */ nullptr, /* update_num_ops_stats */ false);
497
+ MergeOperator::OpFailureScope op_failure_scope;
498
+ s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, nullptr,
499
+ merge_context_.GetOperands(), &merge_result, logger_,
500
+ stats_, clock_,
501
+ /* result_operand */ nullptr,
502
+ /* update_num_ops_stats */ false, &op_failure_scope);
493
503
  if (s.ok()) {
494
504
  // The original key encountered
495
505
  // We are certain that keys_ is not empty here (see assertions couple of
@@ -501,6 +511,10 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
501
511
  merge_context_.Clear();
502
512
  keys_.emplace_front(std::move(original_key));
503
513
  merge_context_.PushOperand(merge_result);
514
+ } else if (op_failure_scope == MergeOperator::OpFailureScope::kMustMerge) {
515
+ // Change to `Status::MergeInProgress()` to denote output consists of
516
+ // merge operands only.
517
+ s = Status::MergeInProgress();
504
518
  }
505
519
  } else {
506
520
  // We haven't seen the beginning of the key nor a Put/Delete.
@@ -48,19 +48,22 @@ class MergeHelper {
48
48
  // the latency is sensitive.
49
49
  // Returns one of the following statuses:
50
50
  // - OK: Entries were successfully merged.
51
- // - Corruption: Merge operator reported unsuccessful merge.
51
+ // - Corruption: Merge operator reported unsuccessful merge. The scope of the
52
+ // damage will be stored in `*op_failure_scope` when `op_failure_scope` is
53
+ // not nullptr
52
54
  static Status TimedFullMerge(const MergeOperator* merge_operator,
53
55
  const Slice& key, const Slice* value,
54
56
  const std::vector<Slice>& operands,
55
57
  std::string* result, Logger* logger,
56
58
  Statistics* statistics, SystemClock* clock,
57
- Slice* result_operand,
58
- bool update_num_ops_stats);
59
+ Slice* result_operand, bool update_num_ops_stats,
60
+ MergeOperator::OpFailureScope* op_failure_scope);
59
61
 
60
62
  static Status TimedFullMergeWithEntity(
61
63
  const MergeOperator* merge_operator, const Slice& key, Slice base_entity,
62
64
  const std::vector<Slice>& operands, std::string* result, Logger* logger,
63
- Statistics* statistics, SystemClock* clock, bool update_num_ops_stats);
65
+ Statistics* statistics, SystemClock* clock, bool update_num_ops_stats,
66
+ MergeOperator::OpFailureScope* op_failure_scope);
64
67
 
65
68
  // During compaction, merge entries until we hit
66
69
  // - a corrupted key
@@ -69,6 +72,12 @@ class MergeHelper {
69
72
  // - a specific sequence number (snapshot boundary),
70
73
  // - REMOVE_AND_SKIP_UNTIL returned from compaction filter,
71
74
  // or - the end of iteration
75
+ //
76
+ // The result(s) of the merge can be accessed in `MergeHelper::keys()` and
77
+ // `MergeHelper::values()`, which are invalidated the next time `MergeUntil()`
78
+ // is called. `MergeOutputIterator` is specially designed to iterate the
79
+ // results of a `MergeHelper`'s most recent `MergeUntil()`.
80
+ //
72
81
  // iter: (IN) points to the first merge type entry
73
82
  // (OUT) points to the first entry not included in the merge process
74
83
  // range_del_agg: (IN) filters merge operands covered by range tombstones.
@@ -85,8 +94,7 @@ class MergeHelper {
85
94
  //
86
95
  // Returns one of the following statuses:
87
96
  // - OK: Entries were successfully merged.
88
- // - MergeInProgress: Put/Delete not encountered, and didn't reach the start
89
- // of key's history. Output consists of merge operands only.
97
+ // - MergeInProgress: Output consists of merge operands only.
90
98
  // - Corruption: Merge operator reported unsuccessful merge or a corrupted
91
99
  // key has been encountered and not expected (applies only when compiling
92
100
  // with asserts removed).
@@ -31,16 +31,6 @@
31
31
  #include "util/coding.h"
32
32
  #include "util/stop_watch.h"
33
33
 
34
- namespace ROCKSDB_NAMESPACE {
35
- namespace {
36
- template <class T>
37
- static void DeleteEntry(const Slice& /*key*/, void* value) {
38
- T* typed_value = reinterpret_cast<T*>(value);
39
- delete typed_value;
40
- }
41
- } // anonymous namespace
42
- } // namespace ROCKSDB_NAMESPACE
43
-
44
34
  // Generate the regular and coroutine versions of some methods by
45
35
  // including table_cache_sync_and_async.h twice
46
36
  // Macros in the header will expand differently based on whether
@@ -58,12 +48,6 @@ namespace ROCKSDB_NAMESPACE {
58
48
 
59
49
  namespace {
60
50
 
61
- static void UnrefEntry(void* arg1, void* arg2) {
62
- Cache* cache = reinterpret_cast<Cache*>(arg1);
63
- Cache::Handle* h = reinterpret_cast<Cache::Handle*>(arg2);
64
- cache->Release(h);
65
- }
66
-
67
51
  static Slice GetSliceForFileNumber(const uint64_t* file_number) {
68
52
  return Slice(reinterpret_cast<const char*>(file_number),
69
53
  sizeof(*file_number));
@@ -105,14 +89,6 @@ TableCache::TableCache(const ImmutableOptions& ioptions,
105
89
 
106
90
  TableCache::~TableCache() {}
107
91
 
108
- TableReader* TableCache::GetTableReaderFromHandle(Cache::Handle* handle) {
109
- return reinterpret_cast<TableReader*>(cache_->Value(handle));
110
- }
111
-
112
- void TableCache::ReleaseHandle(Cache::Handle* handle) {
113
- cache_->Release(handle);
114
- }
115
-
116
92
  Status TableCache::GetTableReader(
117
93
  const ReadOptions& ro, const FileOptions& file_options,
118
94
  const InternalKeyComparator& internal_comparator,
@@ -178,17 +154,10 @@ Status TableCache::GetTableReader(
178
154
  return s;
179
155
  }
180
156
 
181
- void TableCache::EraseHandle(const FileDescriptor& fd, Cache::Handle* handle) {
182
- ReleaseHandle(handle);
183
- uint64_t number = fd.GetNumber();
184
- Slice key = GetSliceForFileNumber(&number);
185
- cache_->Erase(key);
186
- }
187
-
188
157
  Status TableCache::FindTable(
189
158
  const ReadOptions& ro, const FileOptions& file_options,
190
159
  const InternalKeyComparator& internal_comparator,
191
- const FileMetaData& file_meta, Cache::Handle** handle,
160
+ const FileMetaData& file_meta, TypedHandle** handle,
192
161
  const std::shared_ptr<const SliceTransform>& prefix_extractor,
193
162
  const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist,
194
163
  bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
@@ -196,7 +165,7 @@ Status TableCache::FindTable(
196
165
  PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock);
197
166
  uint64_t number = file_meta.fd.GetNumber();
198
167
  Slice key = GetSliceForFileNumber(&number);
199
- *handle = cache_->Lookup(key);
168
+ *handle = cache_.Lookup(key);
200
169
  TEST_SYNC_POINT_CALLBACK("TableCache::FindTable:0",
201
170
  const_cast<bool*>(&no_io));
202
171
 
@@ -206,7 +175,7 @@ Status TableCache::FindTable(
206
175
  }
207
176
  MutexLock load_lock(loader_mutex_.get(key));
208
177
  // We check the cache again under loading mutex
209
- *handle = cache_->Lookup(key);
178
+ *handle = cache_.Lookup(key);
210
179
  if (*handle != nullptr) {
211
180
  return Status::OK();
212
181
  }
@@ -224,8 +193,7 @@ Status TableCache::FindTable(
224
193
  // We do not cache error results so that if the error is transient,
225
194
  // or somebody repairs the file, we recover automatically.
226
195
  } else {
227
- s = cache_->Insert(key, table_reader.get(), 1, &DeleteEntry<TableReader>,
228
- handle);
196
+ s = cache_.Insert(key, table_reader.get(), 1, handle);
229
197
  if (s.ok()) {
230
198
  // Release ownership of table reader.
231
199
  table_reader.release();
@@ -251,7 +219,7 @@ InternalIterator* TableCache::NewIterator(
251
219
 
252
220
  Status s;
253
221
  TableReader* table_reader = nullptr;
254
- Cache::Handle* handle = nullptr;
222
+ TypedHandle* handle = nullptr;
255
223
  if (table_reader_ptr != nullptr) {
256
224
  *table_reader_ptr = nullptr;
257
225
  }
@@ -266,7 +234,7 @@ InternalIterator* TableCache::NewIterator(
266
234
  level, true /* prefetch_index_and_filter_in_cache */,
267
235
  max_file_size_for_l0_meta_pin, file_meta.temperature);
268
236
  if (s.ok()) {
269
- table_reader = GetTableReaderFromHandle(handle);
237
+ table_reader = cache_.Value(handle);
270
238
  }
271
239
  }
272
240
  InternalIterator* result = nullptr;
@@ -280,7 +248,7 @@ InternalIterator* TableCache::NewIterator(
280
248
  file_options.compaction_readahead_size, allow_unprepared_value);
281
249
  }
282
250
  if (handle != nullptr) {
283
- result->RegisterCleanup(&UnrefEntry, cache_, handle);
251
+ cache_.RegisterReleaseAsCleanup(handle, *result);
284
252
  handle = nullptr; // prevent from releasing below
285
253
  }
286
254
 
@@ -330,7 +298,7 @@ InternalIterator* TableCache::NewIterator(
330
298
  }
331
299
 
332
300
  if (handle != nullptr) {
333
- ReleaseHandle(handle);
301
+ cache_.Release(handle);
334
302
  }
335
303
  if (!s.ok()) {
336
304
  assert(result == nullptr);
@@ -348,12 +316,12 @@ Status TableCache::GetRangeTombstoneIterator(
348
316
  const FileDescriptor& fd = file_meta.fd;
349
317
  Status s;
350
318
  TableReader* t = fd.table_reader;
351
- Cache::Handle* handle = nullptr;
319
+ TypedHandle* handle = nullptr;
352
320
  if (t == nullptr) {
353
321
  s = FindTable(options, file_options_, internal_comparator, file_meta,
354
322
  &handle);
355
323
  if (s.ok()) {
356
- t = GetTableReaderFromHandle(handle);
324
+ t = cache_.Value(handle);
357
325
  }
358
326
  }
359
327
  if (s.ok()) {
@@ -362,9 +330,9 @@ Status TableCache::GetRangeTombstoneIterator(
362
330
  }
363
331
  if (handle) {
364
332
  if (*out_iter) {
365
- (*out_iter)->RegisterCleanup(&UnrefEntry, cache_, handle);
333
+ cache_.RegisterReleaseAsCleanup(handle, **out_iter);
366
334
  } else {
367
- ReleaseHandle(handle);
335
+ cache_.Release(handle);
368
336
  }
369
337
  }
370
338
  return s;
@@ -411,16 +379,10 @@ bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key,
411
379
  bool found = false;
412
380
 
413
381
  row_cache_key.TrimAppend(prefix_size, user_key.data(), user_key.size());
414
- if (auto row_handle =
415
- ioptions_.row_cache->Lookup(row_cache_key.GetUserKey())) {
382
+ RowCacheInterface row_cache{ioptions_.row_cache.get()};
383
+ if (auto row_handle = row_cache.Lookup(row_cache_key.GetUserKey())) {
416
384
  // Cleanable routine to release the cache entry
417
385
  Cleanable value_pinner;
418
- auto release_cache_entry_func = [](void* cache_to_clean,
419
- void* cache_handle) {
420
- ((Cache*)cache_to_clean)->Release((Cache::Handle*)cache_handle);
421
- };
422
- auto found_row_cache_entry =
423
- static_cast<const std::string*>(ioptions_.row_cache->Value(row_handle));
424
386
  // If it comes here value is located on the cache.
425
387
  // found_row_cache_entry points to the value on cache,
426
388
  // and value_pinner has cleanup procedure for the cached entry.
@@ -429,9 +391,8 @@ bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key,
429
391
  // cleanup routine under value_pinner will be delegated to
430
392
  // get_context.pinnable_slice_. Cache entry is released when
431
393
  // get_context.pinnable_slice_ is reset.
432
- value_pinner.RegisterCleanup(release_cache_entry_func,
433
- ioptions_.row_cache.get(), row_handle);
434
- replayGetContextLog(*found_row_cache_entry, user_key, get_context,
394
+ row_cache.RegisterReleaseAsCleanup(row_handle, value_pinner);
395
+ replayGetContextLog(*row_cache.Value(row_handle), user_key, get_context,
435
396
  &value_pinner);
436
397
  RecordTick(ioptions_.stats, ROW_CACHE_HIT);
437
398
  found = true;
@@ -470,7 +431,7 @@ Status TableCache::Get(
470
431
  #endif // ROCKSDB_LITE
471
432
  Status s;
472
433
  TableReader* t = fd.table_reader;
473
- Cache::Handle* handle = nullptr;
434
+ TypedHandle* handle = nullptr;
474
435
  if (!done) {
475
436
  assert(s.ok());
476
437
  if (t == nullptr) {
@@ -481,7 +442,7 @@ Status TableCache::Get(
481
442
  level, true /* prefetch_index_and_filter_in_cache */,
482
443
  max_file_size_for_l0_meta_pin, file_meta.temperature);
483
444
  if (s.ok()) {
484
- t = GetTableReaderFromHandle(handle);
445
+ t = cache_.Value(handle);
485
446
  }
486
447
  }
487
448
  SequenceNumber* max_covering_tombstone_seq =
@@ -517,18 +478,17 @@ Status TableCache::Get(
517
478
  #ifndef ROCKSDB_LITE
518
479
  // Put the replay log in row cache only if something was found.
519
480
  if (!done && s.ok() && row_cache_entry && !row_cache_entry->empty()) {
481
+ RowCacheInterface row_cache{ioptions_.row_cache.get()};
520
482
  size_t charge = row_cache_entry->capacity() + sizeof(std::string);
521
- void* row_ptr = new std::string(std::move(*row_cache_entry));
483
+ auto row_ptr = new std::string(std::move(*row_cache_entry));
522
484
  // If row cache is full, it's OK to continue.
523
- ioptions_.row_cache
524
- ->Insert(row_cache_key.GetUserKey(), row_ptr, charge,
525
- &DeleteEntry<std::string>)
485
+ row_cache.Insert(row_cache_key.GetUserKey(), row_ptr, charge)
526
486
  .PermitUncheckedError();
527
487
  }
528
488
  #endif // ROCKSDB_LITE
529
489
 
530
490
  if (handle != nullptr) {
531
- ReleaseHandle(handle);
491
+ cache_.Release(handle);
532
492
  }
533
493
  return s;
534
494
  }
@@ -561,7 +521,7 @@ Status TableCache::MultiGetFilter(
561
521
  const FileMetaData& file_meta,
562
522
  const std::shared_ptr<const SliceTransform>& prefix_extractor,
563
523
  HistogramImpl* file_read_hist, int level,
564
- MultiGetContext::Range* mget_range, Cache::Handle** table_handle) {
524
+ MultiGetContext::Range* mget_range, TypedHandle** table_handle) {
565
525
  auto& fd = file_meta.fd;
566
526
  #ifndef ROCKSDB_LITE
567
527
  IterKey row_cache_key;
@@ -577,7 +537,7 @@ Status TableCache::MultiGetFilter(
577
537
  #endif // ROCKSDB_LITE
578
538
  Status s;
579
539
  TableReader* t = fd.table_reader;
580
- Cache::Handle* handle = nullptr;
540
+ TypedHandle* handle = nullptr;
581
541
  MultiGetContext::Range tombstone_range(*mget_range, mget_range->begin(),
582
542
  mget_range->end());
583
543
  if (t == nullptr) {
@@ -588,7 +548,7 @@ Status TableCache::MultiGetFilter(
588
548
  level, true /* prefetch_index_and_filter_in_cache */,
589
549
  /*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature);
590
550
  if (s.ok()) {
591
- t = GetTableReaderFromHandle(handle);
551
+ t = cache_.Value(handle);
592
552
  }
593
553
  *table_handle = handle;
594
554
  }
@@ -602,7 +562,7 @@ Status TableCache::MultiGetFilter(
602
562
  UpdateRangeTombstoneSeqnums(options, t, tombstone_range);
603
563
  }
604
564
  if (mget_range->empty() && handle) {
605
- ReleaseHandle(handle);
565
+ cache_.Release(handle);
606
566
  *table_handle = nullptr;
607
567
  }
608
568
 
@@ -623,16 +583,16 @@ Status TableCache::GetTableProperties(
623
583
  return Status::OK();
624
584
  }
625
585
 
626
- Cache::Handle* table_handle = nullptr;
586
+ TypedHandle* table_handle = nullptr;
627
587
  Status s = FindTable(ReadOptions(), file_options, internal_comparator,
628
588
  file_meta, &table_handle, prefix_extractor, no_io);
629
589
  if (!s.ok()) {
630
590
  return s;
631
591
  }
632
592
  assert(table_handle);
633
- auto table = GetTableReaderFromHandle(table_handle);
593
+ auto table = cache_.Value(table_handle);
634
594
  *properties = table->GetTableProperties();
635
- ReleaseHandle(table_handle);
595
+ cache_.Release(table_handle);
636
596
  return s;
637
597
  }
638
598
 
@@ -641,18 +601,18 @@ Status TableCache::ApproximateKeyAnchors(
641
601
  const FileMetaData& file_meta, std::vector<TableReader::Anchor>& anchors) {
642
602
  Status s;
643
603
  TableReader* t = file_meta.fd.table_reader;
644
- Cache::Handle* handle = nullptr;
604
+ TypedHandle* handle = nullptr;
645
605
  if (t == nullptr) {
646
606
  s = FindTable(ro, file_options_, internal_comparator, file_meta, &handle);
647
607
  if (s.ok()) {
648
- t = GetTableReaderFromHandle(handle);
608
+ t = cache_.Value(handle);
649
609
  }
650
610
  }
651
611
  if (s.ok() && t != nullptr) {
652
612
  s = t->ApproximateKeyAnchors(ro, anchors);
653
613
  }
654
614
  if (handle != nullptr) {
655
- ReleaseHandle(handle);
615
+ cache_.Release(handle);
656
616
  }
657
617
  return s;
658
618
  }
@@ -668,29 +628,19 @@ size_t TableCache::GetMemoryUsageByTableReader(
668
628
  return table_reader->ApproximateMemoryUsage();
669
629
  }
670
630
 
671
- Cache::Handle* table_handle = nullptr;
631
+ TypedHandle* table_handle = nullptr;
672
632
  Status s = FindTable(ReadOptions(), file_options, internal_comparator,
673
633
  file_meta, &table_handle, prefix_extractor, true);
674
634
  if (!s.ok()) {
675
635
  return 0;
676
636
  }
677
637
  assert(table_handle);
678
- auto table = GetTableReaderFromHandle(table_handle);
638
+ auto table = cache_.Value(table_handle);
679
639
  auto ret = table->ApproximateMemoryUsage();
680
- ReleaseHandle(table_handle);
640
+ cache_.Release(table_handle);
681
641
  return ret;
682
642
  }
683
643
 
684
- bool TableCache::HasEntry(Cache* cache, uint64_t file_number) {
685
- Cache::Handle* handle = cache->Lookup(GetSliceForFileNumber(&file_number));
686
- if (handle) {
687
- cache->Release(handle);
688
- return true;
689
- } else {
690
- return false;
691
- }
692
- }
693
-
694
644
  void TableCache::Evict(Cache* cache, uint64_t file_number) {
695
645
  cache->Erase(GetSliceForFileNumber(&file_number));
696
646
  }
@@ -701,7 +651,7 @@ uint64_t TableCache::ApproximateOffsetOf(
701
651
  const std::shared_ptr<const SliceTransform>& prefix_extractor) {
702
652
  uint64_t result = 0;
703
653
  TableReader* table_reader = file_meta.fd.table_reader;
704
- Cache::Handle* table_handle = nullptr;
654
+ TypedHandle* table_handle = nullptr;
705
655
  if (table_reader == nullptr) {
706
656
  const bool for_compaction = (caller == TableReaderCaller::kCompaction);
707
657
  Status s =
@@ -709,7 +659,7 @@ uint64_t TableCache::ApproximateOffsetOf(
709
659
  &table_handle, prefix_extractor, false /* no_io */,
710
660
  !for_compaction /* record_read_stats */);
711
661
  if (s.ok()) {
712
- table_reader = GetTableReaderFromHandle(table_handle);
662
+ table_reader = cache_.Value(table_handle);
713
663
  }
714
664
  }
715
665
 
@@ -717,7 +667,7 @@ uint64_t TableCache::ApproximateOffsetOf(
717
667
  result = table_reader->ApproximateOffsetOf(key, caller);
718
668
  }
719
669
  if (table_handle != nullptr) {
720
- ReleaseHandle(table_handle);
670
+ cache_.Release(table_handle);
721
671
  }
722
672
 
723
673
  return result;
@@ -729,7 +679,7 @@ uint64_t TableCache::ApproximateSize(
729
679
  const std::shared_ptr<const SliceTransform>& prefix_extractor) {
730
680
  uint64_t result = 0;
731
681
  TableReader* table_reader = file_meta.fd.table_reader;
732
- Cache::Handle* table_handle = nullptr;
682
+ TypedHandle* table_handle = nullptr;
733
683
  if (table_reader == nullptr) {
734
684
  const bool for_compaction = (caller == TableReaderCaller::kCompaction);
735
685
  Status s =
@@ -737,7 +687,7 @@ uint64_t TableCache::ApproximateSize(
737
687
  &table_handle, prefix_extractor, false /* no_io */,
738
688
  !for_compaction /* record_read_stats */);
739
689
  if (s.ok()) {
740
- table_reader = GetTableReaderFromHandle(table_handle);
690
+ table_reader = cache_.Value(table_handle);
741
691
  }
742
692
  }
743
693
 
@@ -745,7 +695,7 @@ uint64_t TableCache::ApproximateSize(
745
695
  result = table_reader->ApproximateSize(start, end, caller);
746
696
  }
747
697
  if (table_handle != nullptr) {
748
- ReleaseHandle(table_handle);
698
+ cache_.Release(table_handle);
749
699
  }
750
700
 
751
701
  return result;