@nxtedition/rocksdb 8.2.0-alpha.1 → 8.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/binding.cc +11 -74
  2. package/binding.gyp +7 -5
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +4 -0
  4. package/deps/rocksdb/rocksdb/TARGETS +7 -0
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +43 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +8 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +1 -1
  8. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -1
  9. package/deps/rocksdb/rocksdb/cache/cache_test.cc +12 -48
  10. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +26 -18
  11. package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -62
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +119 -44
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.h +34 -29
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +3 -3
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -2
  16. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +148 -209
  17. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +118 -284
  18. package/deps/rocksdb/rocksdb/cache/lru_cache.h +23 -71
  19. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +351 -392
  20. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +5 -2
  21. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +296 -0
  22. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +52 -0
  23. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +22 -19
  24. package/deps/rocksdb/rocksdb/cache/typed_cache.h +56 -20
  25. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +4 -0
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +3 -3
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +19 -25
  29. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +216 -0
  30. package/deps/rocksdb/rocksdb/db/c.cc +90 -1
  31. package/deps/rocksdb/rocksdb/db/column_family.cc +8 -7
  32. package/deps/rocksdb/rocksdb/db/column_family.h +0 -6
  33. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +24 -7
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +18 -12
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +3 -1
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +245 -302
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -2
  40. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +5 -0
  41. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +75 -15
  42. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +2 -3
  43. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -5
  44. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +91 -1
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5 -12
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +16 -4
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +47 -24
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +4 -2
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -3
  51. package/deps/rocksdb/rocksdb/db/db_iter.cc +28 -29
  52. package/deps/rocksdb/rocksdb/db/db_iter.h +0 -3
  53. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +176 -0
  54. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +391 -2
  55. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +26 -0
  56. package/deps/rocksdb/rocksdb/db/db_write_test.cc +13 -5
  57. package/deps/rocksdb/rocksdb/db/dbformat.h +3 -1
  58. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +0 -1
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +0 -6
  60. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +3 -0
  61. package/deps/rocksdb/rocksdb/db/forward_iterator.h +1 -1
  62. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
  63. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +68 -40
  64. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +3 -3
  65. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +115 -0
  66. package/deps/rocksdb/rocksdb/db/internal_stats.cc +169 -72
  67. package/deps/rocksdb/rocksdb/db/internal_stats.h +36 -7
  68. package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
  69. package/deps/rocksdb/rocksdb/db/merge_helper.cc +4 -0
  70. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +151 -0
  71. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +47 -16
  72. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +10 -8
  73. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +91 -93
  74. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +1 -2
  75. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +1 -1
  76. package/deps/rocksdb/rocksdb/db/version_set.cc +30 -14
  77. package/deps/rocksdb/rocksdb/db/version_set.h +1 -0
  78. package/deps/rocksdb/rocksdb/db/write_stall_stats.cc +179 -0
  79. package/deps/rocksdb/rocksdb/db/write_stall_stats.h +47 -0
  80. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +109 -7
  81. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +147 -12
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +31 -0
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -0
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -1
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +42 -59
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +7 -4
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +7 -0
  88. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +6 -10
  89. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +6 -0
  90. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -0
  91. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +127 -36
  92. package/deps/rocksdb/rocksdb/env/fs_posix.cc +8 -0
  93. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +35 -0
  94. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +29 -8
  95. package/deps/rocksdb/rocksdb/file/file_util.cc +14 -10
  96. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +183 -63
  97. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +159 -66
  98. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +3 -1
  99. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +52 -5
  100. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -3
  101. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +134 -73
  102. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +46 -3
  103. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -0
  104. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +0 -6
  105. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +7 -0
  106. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -2
  107. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +6 -1
  108. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +3 -3
  109. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +18 -0
  110. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +28 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  112. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +39 -0
  113. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
  114. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +9 -1
  115. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -2
  116. package/deps/rocksdb/rocksdb/port/stack_trace.cc +17 -7
  117. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -0
  118. package/deps/rocksdb/rocksdb/src.mk +4 -0
  119. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -34
  120. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +11 -12
  121. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +5 -5
  122. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +126 -132
  123. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +16 -16
  124. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +0 -16
  125. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +1 -1
  126. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  127. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -4
  128. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
  129. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
  130. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +370 -0
  131. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +44 -0
  132. package/deps/rocksdb/rocksdb/table/get_context.cc +4 -2
  133. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +555 -267
  134. package/deps/rocksdb/rocksdb/table/merging_iterator.h +10 -5
  135. package/deps/rocksdb/rocksdb/table/table_test.cc +113 -70
  136. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +96 -0
  137. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +117 -0
  138. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +5 -3
  139. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +3 -3
  140. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +1 -1
  141. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +9 -2
  142. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -1
  143. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +11 -0
  144. package/deps/rocksdb/rocksdb.gyp +6 -7
  145. package/index.js +0 -6
  146. package/package.json +1 -1
  147. package/prebuilds/linux-x64/node.napi.node +0 -0
  148. package/deps/liburing/liburing.gyp +0 -20
  149. package/tmp/test.js +0 -7
@@ -32,9 +32,8 @@ namespace ROCKSDB_NAMESPACE {
32
32
  DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
33
33
  (const ReadOptions& options, const MultiGetRange* batch,
34
34
  const autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE>* handles,
35
- autovector<Status, MultiGetContext::MAX_BATCH_SIZE>* statuses,
36
- autovector<CachableEntry<Block>, MultiGetContext::MAX_BATCH_SIZE>* results,
37
- char* scratch, const UncompressionDict& uncompression_dict) const {
35
+ Status* statuses, CachableEntry<Block>* results, char* scratch,
36
+ const UncompressionDict& uncompression_dict) const {
38
37
  RandomAccessFileReader* file = rep_->file.get();
39
38
  const Footer& footer = rep_->footer;
40
39
  const ImmutableOptions& ioptions = rep_->ioptions;
@@ -52,13 +51,14 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
52
51
  continue;
53
52
  }
54
53
 
55
- (*statuses)[idx_in_batch] =
54
+ statuses[idx_in_batch] =
56
55
  RetrieveBlock(nullptr, options, handle, uncompression_dict,
57
- &(*results)[idx_in_batch].As<Block_kData>(),
56
+ &results[idx_in_batch].As<Block_kData>(),
58
57
  mget_iter->get_context, &lookup_data_block_context,
59
58
  /* for_compaction */ false, /* use_cache */ true,
60
- /* wait_for_cache */ true, /* async_read */ false);
59
+ /* async_read */ false);
61
60
  }
61
+ assert(idx_in_batch == handles->size());
62
62
  CO_RETURN;
63
63
  }
64
64
 
@@ -261,12 +261,12 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
261
261
  if (options.fill_cache) {
262
262
  BlockCacheLookupContext lookup_data_block_context(
263
263
  TableReaderCaller::kUserMultiGet);
264
- CachableEntry<Block>* block_entry = &(*results)[idx_in_batch];
264
+ CachableEntry<Block>* block_entry = &results[idx_in_batch];
265
265
  // MaybeReadBlockAndLoadToCache will insert into the block caches if
266
266
  // necessary. Since we're passing the serialized block contents, it
267
267
  // will avoid looking up the block cache
268
268
  s = MaybeReadBlockAndLoadToCache(
269
- nullptr, options, handle, uncompression_dict, /*wait=*/true,
269
+ nullptr, options, handle, uncompression_dict,
270
270
  /*for_compaction=*/false, &block_entry->As<Block_kData>(),
271
271
  mget_iter->get_context, &lookup_data_block_context,
272
272
  &serialized_block, /*async_read=*/false);
@@ -301,11 +301,11 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
301
301
  contents = std::move(serialized_block);
302
302
  }
303
303
  if (s.ok()) {
304
- (*results)[idx_in_batch].SetOwnedValue(std::make_unique<Block>(
304
+ results[idx_in_batch].SetOwnedValue(std::make_unique<Block>(
305
305
  std::move(contents), read_amp_bytes_per_bit, ioptions.stats));
306
306
  }
307
307
  }
308
- (*statuses)[idx_in_batch] = s;
308
+ statuses[idx_in_batch] = s;
309
309
  }
310
310
  }
311
311
 
@@ -355,152 +355,147 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
355
355
 
356
356
  uint64_t prev_offset = std::numeric_limits<uint64_t>::max();
357
357
  autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE> block_handles;
358
- autovector<CachableEntry<Block>, MultiGetContext::MAX_BATCH_SIZE> results;
359
- autovector<Status, MultiGetContext::MAX_BATCH_SIZE> statuses;
358
+ std::array<CachableEntry<Block>, MultiGetContext::MAX_BATCH_SIZE> results;
359
+ std::array<Status, MultiGetContext::MAX_BATCH_SIZE> statuses;
360
360
  MultiGetContext::Mask reused_mask = 0;
361
361
  char stack_buf[kMultiGetReadStackBufSize];
362
362
  std::unique_ptr<char[]> block_buf;
363
363
  {
364
364
  MultiGetRange data_block_range(sst_file_range, sst_file_range.begin(),
365
365
  sst_file_range.end());
366
- std::vector<Cache::Handle*> cache_handles;
367
- bool wait_for_cache_results = false;
368
-
369
366
  CachableEntry<UncompressionDict> uncompression_dict;
370
367
  Status uncompression_dict_status;
371
368
  uncompression_dict_status.PermitUncheckedError();
372
369
  bool uncompression_dict_inited = false;
373
370
  size_t total_len = 0;
374
- ReadOptions ro = read_options;
375
- ro.read_tier = kBlockCacheTier;
376
-
377
- for (auto miter = data_block_range.begin();
378
- miter != data_block_range.end(); ++miter) {
379
- const Slice& key = miter->ikey;
380
- iiter->Seek(miter->ikey);
381
371
 
382
- IndexValue v;
383
- if (iiter->Valid()) {
384
- v = iiter->value();
385
- }
386
- if (!iiter->Valid() ||
387
- (!v.first_internal_key.empty() && !skip_filters &&
388
- UserComparatorWrapper(rep_->internal_comparator.user_comparator())
389
- .CompareWithoutTimestamp(
390
- ExtractUserKey(key),
391
- ExtractUserKey(v.first_internal_key)) < 0)) {
392
- // The requested key falls between highest key in previous block and
393
- // lowest key in current block.
394
- if (!iiter->status().IsNotFound()) {
395
- *(miter->s) = iiter->status();
372
+ // GetContext for any key will do, as the stats will be aggregated
373
+ // anyway
374
+ GetContext* get_context = sst_file_range.begin()->get_context;
375
+
376
+ {
377
+ using BCI = BlockCacheInterface<Block_kData>;
378
+ BCI block_cache{rep_->table_options.block_cache.get()};
379
+ std::array<BCI::TypedAsyncLookupHandle, MultiGetContext::MAX_BATCH_SIZE>
380
+ async_handles;
381
+ std::array<CacheKey, MultiGetContext::MAX_BATCH_SIZE> cache_keys;
382
+ size_t cache_lookup_count = 0;
383
+
384
+ for (auto miter = data_block_range.begin();
385
+ miter != data_block_range.end(); ++miter) {
386
+ const Slice& key = miter->ikey;
387
+ iiter->Seek(miter->ikey);
388
+
389
+ IndexValue v;
390
+ if (iiter->Valid()) {
391
+ v = iiter->value();
392
+ }
393
+ if (!iiter->Valid() ||
394
+ (!v.first_internal_key.empty() && !skip_filters &&
395
+ UserComparatorWrapper(
396
+ rep_->internal_comparator.user_comparator())
397
+ .CompareWithoutTimestamp(
398
+ ExtractUserKey(key),
399
+ ExtractUserKey(v.first_internal_key)) < 0)) {
400
+ // The requested key falls between highest key in previous block and
401
+ // lowest key in current block.
402
+ if (!iiter->status().IsNotFound()) {
403
+ *(miter->s) = iiter->status();
404
+ }
405
+ data_block_range.SkipKey(miter);
406
+ sst_file_range.SkipKey(miter);
407
+ continue;
396
408
  }
397
- data_block_range.SkipKey(miter);
398
- sst_file_range.SkipKey(miter);
399
- continue;
400
- }
401
409
 
402
- if (!uncompression_dict_inited && rep_->uncompression_dict_reader) {
403
- uncompression_dict_status =
404
- rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
405
- nullptr /* prefetch_buffer */, no_io,
406
- read_options.verify_checksums,
407
- sst_file_range.begin()->get_context, &lookup_context,
408
- &uncompression_dict);
409
- uncompression_dict_inited = true;
410
- }
410
+ if (!uncompression_dict_inited && rep_->uncompression_dict_reader) {
411
+ uncompression_dict_status =
412
+ rep_->uncompression_dict_reader
413
+ ->GetOrReadUncompressionDictionary(
414
+ nullptr /* prefetch_buffer */, no_io,
415
+ read_options.verify_checksums, get_context,
416
+ &lookup_context, &uncompression_dict);
417
+ uncompression_dict_inited = true;
418
+ }
411
419
 
412
- if (!uncompression_dict_status.ok()) {
413
- assert(!uncompression_dict_status.IsNotFound());
414
- *(miter->s) = uncompression_dict_status;
415
- data_block_range.SkipKey(miter);
416
- sst_file_range.SkipKey(miter);
417
- continue;
418
- }
420
+ if (!uncompression_dict_status.ok()) {
421
+ assert(!uncompression_dict_status.IsNotFound());
422
+ *(miter->s) = uncompression_dict_status;
423
+ data_block_range.SkipKey(miter);
424
+ sst_file_range.SkipKey(miter);
425
+ continue;
426
+ }
419
427
 
420
- statuses.emplace_back();
421
- results.emplace_back();
422
- if (v.handle.offset() == prev_offset) {
423
- // This key can reuse the previous block (later on).
424
- // Mark previous as "reused"
425
- reused_mask |= MultiGetContext::Mask{1} << (block_handles.size() - 1);
426
- // Use null handle to indicate this one reuses same block as
427
- // previous.
428
- block_handles.emplace_back(BlockHandle::NullBlockHandle());
429
- continue;
430
- }
431
- // Lookup the cache for the given data block referenced by an index
432
- // iterator value (i.e BlockHandle). If it exists in the cache,
433
- // initialize block to the contents of the data block.
434
- prev_offset = v.handle.offset();
435
- BlockHandle handle = v.handle;
436
- BlockCacheLookupContext lookup_data_block_context(
437
- TableReaderCaller::kUserMultiGet);
438
- const UncompressionDict& dict = uncompression_dict.GetValue()
439
- ? *uncompression_dict.GetValue()
440
- : UncompressionDict::GetEmptyDict();
441
- Status s = RetrieveBlock(
442
- nullptr, ro, handle, dict, &(results.back()).As<Block_kData>(),
443
- miter->get_context, &lookup_data_block_context,
444
- /* for_compaction */ false, /* use_cache */ true,
445
- /* wait_for_cache */ false, /* async_read */ false);
446
- if (s.IsIncomplete()) {
447
- s = Status::OK();
448
- }
449
- if (s.ok() && !results.back().IsEmpty()) {
450
- // Since we have a valid handle, check the value. If its nullptr,
451
- // it means the cache is waiting for the final result and we're
452
- // supposed to call WaitAll() to wait for the result.
453
- if (results.back().GetValue() != nullptr) {
454
- // Found it in the cache. Add NULL handle to indicate there is
455
- // nothing to read from disk.
456
- if (results.back().GetCacheHandle()) {
457
- results.back().UpdateCachedValue();
458
- }
428
+ if (v.handle.offset() == prev_offset) {
429
+ // This key can reuse the previous block (later on).
430
+ // Mark previous as "reused"
431
+ reused_mask |= MultiGetContext::Mask{1}
432
+ << (block_handles.size() - 1);
433
+ // Use null handle to indicate this one reuses same block as
434
+ // previous.
459
435
  block_handles.emplace_back(BlockHandle::NullBlockHandle());
460
- } else {
461
- // We have to wait for the cache lookup to finish in the
462
- // background, and then we may have to read the block from disk
463
- // anyway
464
- assert(results.back().GetCacheHandle());
465
- wait_for_cache_results = true;
466
- block_handles.emplace_back(handle);
467
- cache_handles.emplace_back(results.back().GetCacheHandle());
436
+ continue;
437
+ }
438
+ prev_offset = v.handle.offset();
439
+ block_handles.emplace_back(v.handle);
440
+
441
+ if (block_cache) {
442
+ // Lookup the cache for the given data block referenced by an index
443
+ // iterator value (i.e BlockHandle). If it exists in the cache,
444
+ // initialize block to the contents of the data block.
445
+ // TODO?
446
+ // BlockCacheLookupContext lookup_data_block_context(
447
+ // TableReaderCaller::kUserMultiGet);
448
+
449
+ // An async version of MaybeReadBlockAndLoadToCache /
450
+ // GetDataBlockFromCache
451
+ BCI::TypedAsyncLookupHandle& async_handle =
452
+ async_handles[cache_lookup_count];
453
+ cache_keys[cache_lookup_count] =
454
+ GetCacheKey(rep_->base_cache_key, v.handle);
455
+ async_handle.key = cache_keys[cache_lookup_count].AsSlice();
456
+ // NB: StartAsyncLookupFull populates async_handle.helper
457
+ async_handle.create_context = &rep_->create_context;
458
+ async_handle.priority = GetCachePriority<Block_kData>();
459
+ async_handle.stats = rep_->ioptions.statistics.get();
460
+
461
+ block_cache.StartAsyncLookupFull(
462
+ async_handle, rep_->ioptions.lowest_used_cache_tier);
463
+ ++cache_lookup_count;
464
+ // TODO: stats?
468
465
  }
469
- } else {
470
- block_handles.emplace_back(handle);
471
- total_len += BlockSizeWithTrailer(handle);
472
466
  }
473
- }
474
467
 
475
- if (wait_for_cache_results) {
476
- Cache* block_cache = rep_->table_options.block_cache.get();
477
- block_cache->WaitAll(cache_handles);
468
+ if (block_cache) {
469
+ block_cache.get()->WaitAll(&async_handles[0], cache_lookup_count);
470
+ }
471
+ size_t lookup_idx = 0;
478
472
  for (size_t i = 0; i < block_handles.size(); ++i) {
479
473
  // If this block was a success or failure or not needed because
480
474
  // the corresponding key is in the same block as a prior key, skip
481
- if (block_handles[i] == BlockHandle::NullBlockHandle() ||
482
- results[i].IsEmpty()) {
475
+ if (block_handles[i] == BlockHandle::NullBlockHandle()) {
483
476
  continue;
484
477
  }
485
- results[i].UpdateCachedValue();
486
- void* val = results[i].GetValue();
487
- Cache::Handle* handle = results[i].GetCacheHandle();
488
- // GetContext for any key will do, as the stats will be aggregated
489
- // anyway
490
- GetContext* get_context = sst_file_range.begin()->get_context;
491
- if (!val) {
492
- // The async cache lookup failed - could be due to an error
493
- // or a false positive. We need to read the data block from
494
- // the SST file
495
- results[i].Reset();
478
+ if (!block_cache) {
496
479
  total_len += BlockSizeWithTrailer(block_handles[i]);
497
- UpdateCacheMissMetrics(BlockType::kData, get_context);
498
480
  } else {
499
- block_handles[i] = BlockHandle::NullBlockHandle();
500
- UpdateCacheHitMetrics(BlockType::kData, get_context,
501
- block_cache->GetUsage(handle));
481
+ BCI::TypedHandle* h = async_handles[lookup_idx].Result();
482
+ if (h) {
483
+ // Cache hit
484
+ results[i].SetCachedValue(block_cache.Value(h), block_cache.get(),
485
+ h);
486
+ // Don't need to fetch
487
+ block_handles[i] = BlockHandle::NullBlockHandle();
488
+ UpdateCacheHitMetrics(BlockType::kData, get_context,
489
+ block_cache.get()->GetUsage(h));
490
+ } else {
491
+ // Cache miss
492
+ total_len += BlockSizeWithTrailer(block_handles[i]);
493
+ UpdateCacheMissMetrics(BlockType::kData, get_context);
494
+ }
495
+ ++lookup_idx;
502
496
  }
503
497
  }
498
+ assert(lookup_idx == cache_lookup_count);
504
499
  }
505
500
 
506
501
  if (total_len) {
@@ -530,11 +525,10 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
530
525
  }
531
526
  }
532
527
  CO_AWAIT(RetrieveMultipleBlocks)
533
- (read_options, &data_block_range, &block_handles, &statuses, &results,
534
- scratch, dict);
535
- if (sst_file_range.begin()->get_context) {
536
- ++(sst_file_range.begin()
537
- ->get_context->get_context_stats_.num_sst_read);
528
+ (read_options, &data_block_range, &block_handles, &statuses[0],
529
+ &results[0], scratch, dict);
530
+ if (get_context) {
531
+ ++(get_context->get_context_stats_.num_sst_read);
538
532
  }
539
533
  }
540
534
  }
@@ -50,36 +50,36 @@ namespace {
50
50
  // For getting SecondaryCache-compatible helpers from a BlockType. This is
51
51
  // useful for accessing block cache in untyped contexts, such as for generic
52
52
  // cache warming in table builder.
53
- constexpr std::array<const Cache::CacheItemHelper*,
54
- static_cast<unsigned>(BlockType::kInvalid) + 1>
53
+ const std::array<const Cache::CacheItemHelper*,
54
+ static_cast<unsigned>(BlockType::kInvalid) + 1>
55
55
  kCacheItemFullHelperForBlockType{{
56
- &BlockCacheInterface<Block_kData>::kFullHelper,
57
- &BlockCacheInterface<ParsedFullFilterBlock>::kFullHelper,
58
- &BlockCacheInterface<Block_kFilterPartitionIndex>::kFullHelper,
56
+ BlockCacheInterface<Block_kData>::GetFullHelper(),
57
+ BlockCacheInterface<ParsedFullFilterBlock>::GetFullHelper(),
58
+ BlockCacheInterface<Block_kFilterPartitionIndex>::GetFullHelper(),
59
59
  nullptr, // kProperties
60
- &BlockCacheInterface<UncompressionDict>::kFullHelper,
61
- &BlockCacheInterface<Block_kRangeDeletion>::kFullHelper,
60
+ BlockCacheInterface<UncompressionDict>::GetFullHelper(),
61
+ BlockCacheInterface<Block_kRangeDeletion>::GetFullHelper(),
62
62
  nullptr, // kHashIndexPrefixes
63
63
  nullptr, // kHashIndexMetadata
64
64
  nullptr, // kMetaIndex (not yet stored in block cache)
65
- &BlockCacheInterface<Block_kIndex>::kFullHelper,
65
+ BlockCacheInterface<Block_kIndex>::GetFullHelper(),
66
66
  nullptr, // kInvalid
67
67
  }};
68
68
 
69
69
  // For getting basic helpers from a BlockType (no SecondaryCache support)
70
- constexpr std::array<const Cache::CacheItemHelper*,
71
- static_cast<unsigned>(BlockType::kInvalid) + 1>
70
+ const std::array<const Cache::CacheItemHelper*,
71
+ static_cast<unsigned>(BlockType::kInvalid) + 1>
72
72
  kCacheItemBasicHelperForBlockType{{
73
- &BlockCacheInterface<Block_kData>::kBasicHelper,
74
- &BlockCacheInterface<ParsedFullFilterBlock>::kBasicHelper,
75
- &BlockCacheInterface<Block_kFilterPartitionIndex>::kBasicHelper,
73
+ BlockCacheInterface<Block_kData>::GetBasicHelper(),
74
+ BlockCacheInterface<ParsedFullFilterBlock>::GetBasicHelper(),
75
+ BlockCacheInterface<Block_kFilterPartitionIndex>::GetBasicHelper(),
76
76
  nullptr, // kProperties
77
- &BlockCacheInterface<UncompressionDict>::kBasicHelper,
78
- &BlockCacheInterface<Block_kRangeDeletion>::kBasicHelper,
77
+ BlockCacheInterface<UncompressionDict>::GetBasicHelper(),
78
+ BlockCacheInterface<Block_kRangeDeletion>::GetBasicHelper(),
79
79
  nullptr, // kHashIndexPrefixes
80
80
  nullptr, // kHashIndexMetadata
81
81
  nullptr, // kMetaIndex (not yet stored in block cache)
82
- &BlockCacheInterface<Block_kIndex>::kBasicHelper,
82
+ BlockCacheInterface<Block_kIndex>::GetBasicHelper(),
83
83
  nullptr, // kInvalid
84
84
  }};
85
85
  } // namespace
@@ -176,22 +176,6 @@ class CachableEntry {
176
176
  assert(!own_value_);
177
177
  }
178
178
 
179
- void UpdateCachedValue() {
180
- assert(cache_ != nullptr);
181
- assert(cache_handle_ != nullptr);
182
-
183
- value_ = static_cast<T*>(cache_->Value(cache_handle_));
184
- }
185
-
186
- bool IsReady() {
187
- if (!own_value_) {
188
- assert(cache_ != nullptr);
189
- assert(cache_handle_ != nullptr);
190
- return cache_->IsReady(cache_handle_);
191
- }
192
- return true;
193
- }
194
-
195
179
  // Since this class is essentially an elaborate pointer, it's sometimes
196
180
  // useful to be able to upcast or downcast the base type of the pointer,
197
181
  // especially when interacting with typed_cache.h.
@@ -33,7 +33,7 @@ Status FilterBlockReaderCommon<TBlocklike>::ReadFilterBlock(
33
33
  UncompressionDict::GetEmptyDict(), filter_block,
34
34
  get_context, lookup_context,
35
35
  /* for_compaction */ false, use_cache,
36
- /* wait_for_cache */ true, /* async_read */ false);
36
+ /* async_read */ false);
37
37
 
38
38
  return s;
39
39
  }
@@ -29,7 +29,7 @@ Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock(
29
29
  prefetch_buffer, read_options, rep->footer.index_handle(),
30
30
  UncompressionDict::GetEmptyDict(), &index_block->As<Block_kIndex>(),
31
31
  get_context, lookup_context, /* for_compaction */ false, use_cache,
32
- /* wait_for_cache */ true, /* async_read */ false);
32
+ /* async_read */ false);
33
33
 
34
34
  return s;
35
35
  }
@@ -318,7 +318,7 @@ Status PartitionedFilterBlockReader::GetFilterPartitionBlock(
318
318
  UncompressionDict::GetEmptyDict(), filter_block,
319
319
  get_context, lookup_context,
320
320
  /* for_compaction */ false, /* use_cache */ true,
321
- /* wait_for_cache */ true, /* async_read */ false);
321
+ /* async_read */ false);
322
322
 
323
323
  return s;
324
324
  }
@@ -518,9 +518,8 @@ Status PartitionedFilterBlockReader::CacheDependencies(const ReadOptions& ro,
518
518
  // filter blocks
519
519
  s = table()->MaybeReadBlockAndLoadToCache(
520
520
  prefetch_buffer.get(), ro, handle, UncompressionDict::GetEmptyDict(),
521
- /* wait */ true, /* for_compaction */ false, &block,
522
- nullptr /* get_context */, &lookup_context, nullptr /* contents */,
523
- false);
521
+ /* for_compaction */ false, &block, nullptr /* get_context */,
522
+ &lookup_context, nullptr /* contents */, false);
524
523
  if (!s.ok()) {
525
524
  return s;
526
525
  }
@@ -192,7 +192,7 @@ Status PartitionIndexReader::CacheDependencies(const ReadOptions& ro,
192
192
  // filter blocks
193
193
  Status s = table()->MaybeReadBlockAndLoadToCache(
194
194
  prefetch_buffer.get(), ro, handle, UncompressionDict::GetEmptyDict(),
195
- /*wait=*/true, /*for_compaction=*/false, &block.As<Block_kIndex>(),
195
+ /*for_compaction=*/false, &block.As<Block_kIndex>(),
196
196
  /*get_context=*/nullptr, &lookup_context, /*contents=*/nullptr,
197
197
  /*async_read=*/false);
198
198
 
@@ -62,7 +62,7 @@ Status UncompressionDictReader::ReadUncompressionDictionary(
62
62
  prefetch_buffer, read_options, rep->compression_dict_handle,
63
63
  UncompressionDict::GetEmptyDict(), uncompression_dict, get_context,
64
64
  lookup_context,
65
- /* for_compaction */ false, use_cache, /* wait_for_cache */ true,
65
+ /* for_compaction */ false, use_cache,
66
66
  /* async_read */ false);
67
67
 
68
68
  if (!s.ok()) {