@nxtedition/rocksdb 7.0.23 → 7.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/binding.cc +3 -1
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +5 -0
  3. package/deps/rocksdb/rocksdb/Makefile +6 -2
  4. package/deps/rocksdb/rocksdb/TARGETS +14 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +4 -1
  6. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +20 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +2 -2
  8. package/deps/rocksdb/rocksdb/cache/cache_test.cc +44 -31
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +491 -722
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.h +468 -2
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +51 -52
  13. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +28 -16
  14. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +12 -1
  15. package/deps/rocksdb/rocksdb/cache/lru_cache.h +1 -0
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +170 -36
  17. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +63 -36
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +4 -6
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +57 -38
  21. package/deps/rocksdb/rocksdb/db/blob/blob_read_request.h +58 -0
  22. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +164 -74
  23. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +42 -29
  24. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +419 -62
  25. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +208 -8
  26. package/deps/rocksdb/rocksdb/db/c.cc +68 -0
  27. package/deps/rocksdb/rocksdb/db/c_test.c +95 -2
  28. package/deps/rocksdb/rocksdb/db/column_family.cc +12 -3
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +92 -15
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +76 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +52 -1
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +30 -1
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +126 -0
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +203 -1584
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +93 -26
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +87 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +314 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +328 -0
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +4 -1
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +7 -3
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +174 -33
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +474 -7
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +825 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +46 -0
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +42 -0
  48. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +223 -0
  49. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +255 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +1253 -0
  51. package/deps/rocksdb/rocksdb/db/corruption_test.cc +32 -8
  52. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3 -1
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -8
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +376 -0
  55. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +103 -78
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +4 -6
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -3
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +21 -6
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +19 -1
  61. package/deps/rocksdb/rocksdb/db/db_iter.cc +91 -14
  62. package/deps/rocksdb/rocksdb/db/db_iter.h +5 -0
  63. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +33 -0
  64. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +79 -0
  65. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +2 -0
  66. package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
  67. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +5 -2
  68. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +185 -0
  69. package/deps/rocksdb/rocksdb/db/dbformat.cc +1 -4
  70. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -8
  71. package/deps/rocksdb/rocksdb/db/internal_stats.cc +71 -29
  72. package/deps/rocksdb/rocksdb/db/internal_stats.h +160 -5
  73. package/deps/rocksdb/rocksdb/db/log_reader.cc +29 -3
  74. package/deps/rocksdb/rocksdb/db/log_reader.h +12 -3
  75. package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -3
  76. package/deps/rocksdb/rocksdb/db/version_edit.cc +6 -0
  77. package/deps/rocksdb/rocksdb/db/version_set.cc +93 -129
  78. package/deps/rocksdb/rocksdb/db/version_set.h +4 -4
  79. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
  80. package/deps/rocksdb/rocksdb/db/version_set_test.cc +42 -35
  81. package/deps/rocksdb/rocksdb/db/write_batch.cc +10 -2
  82. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +10 -4
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -3
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -2
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +5 -1
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +140 -8
  89. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +12 -0
  90. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +46 -7
  91. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +7 -0
  92. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +27 -7
  93. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +8 -0
  94. package/deps/rocksdb/rocksdb/env/env_posix.cc +14 -0
  95. package/deps/rocksdb/rocksdb/env/env_test.cc +130 -1
  96. package/deps/rocksdb/rocksdb/env/fs_posix.cc +7 -1
  97. package/deps/rocksdb/rocksdb/env/io_posix.cc +18 -50
  98. package/deps/rocksdb/rocksdb/env/io_posix.h +53 -6
  99. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +8 -10
  100. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +3 -7
  101. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +239 -259
  102. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +84 -19
  103. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +24 -4
  104. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +1 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +31 -1
  106. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +11 -7
  107. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +2 -0
  108. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +14 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +20 -0
  110. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +37 -13
  111. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +7 -0
  112. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +14 -0
  113. package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +9 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +13 -13
  115. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -2
  116. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +38 -0
  117. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +7 -1
  118. package/deps/rocksdb/rocksdb/port/win/env_win.cc +17 -0
  119. package/deps/rocksdb/rocksdb/port/win/env_win.h +8 -0
  120. package/deps/rocksdb/rocksdb/port/win/io_win.cc +6 -3
  121. package/deps/rocksdb/rocksdb/src.mk +5 -0
  122. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -2
  123. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
  124. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +5 -2
  125. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
  126. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +15 -12
  127. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +5 -4
  128. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +2 -1
  129. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1 -1
  130. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
  131. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -2
  132. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -0
  133. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -2
  134. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +24 -4
  135. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
  136. package/deps/rocksdb/rocksdb/util/compression.h +2 -0
  137. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +18 -1
  138. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +67 -4
  139. package/deps/rocksdb/rocksdb/util/threadpool_imp.h +8 -0
  140. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +15 -12
  141. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -2
  142. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +1 -1
  143. package/deps/rocksdb/rocksdb.gyp +5 -1
  144. package/package.json +1 -1
  145. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  146. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -194,21 +194,21 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
194
194
  // MultiGetBlob
195
195
  bytes_read = 0;
196
196
  size_t total_size = 0;
197
- autovector<std::reference_wrapper<const Slice>> key_refs;
198
- for (const auto& key_ref : keys) {
199
- key_refs.emplace_back(std::cref(key_ref));
200
- }
201
- autovector<uint64_t> offsets{blob_offsets[0], blob_offsets[1],
202
- blob_offsets[2]};
203
- autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]};
197
+
204
198
  std::array<Status, num_blobs> statuses_buf;
205
- autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
206
- &statuses_buf[2]};
207
199
  std::array<PinnableSlice, num_blobs> value_buf;
208
- autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
209
- &value_buf[2]};
210
- reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
211
- values, &bytes_read);
200
+ std::array<BlobReadRequest, num_blobs> requests_buf;
201
+ autovector<BlobReadRequest*> blob_reqs;
202
+
203
+ for (size_t i = 0; i < num_blobs; ++i) {
204
+ requests_buf[i] =
205
+ BlobReadRequest(keys[i], blob_offsets[i], blob_sizes[i],
206
+ kNoCompression, &value_buf[i], &statuses_buf[i]);
207
+ blob_reqs.push_back(&requests_buf[i]);
208
+ }
209
+
210
+ reader->MultiGetBlob(read_options, blob_reqs, &bytes_read);
211
+
212
212
  for (size_t i = 0; i < num_blobs; ++i) {
213
213
  ASSERT_OK(statuses_buf[i]);
214
214
  ASSERT_EQ(value_buf[i], blobs[i]);
@@ -300,15 +300,21 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
300
300
  blob_offsets[0],
301
301
  blob_offsets[1] - (keys[1].size() - key_refs[1].get().size()),
302
302
  blob_offsets[2]};
303
- autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]};
303
+
304
304
  std::array<Status, num_blobs> statuses_buf;
305
- autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
306
- &statuses_buf[2]};
307
305
  std::array<PinnableSlice, num_blobs> value_buf;
308
- autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
309
- &value_buf[2]};
310
- reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
311
- values, &bytes_read);
306
+ std::array<BlobReadRequest, num_blobs> requests_buf;
307
+ autovector<BlobReadRequest*> blob_reqs;
308
+
309
+ for (size_t i = 0; i < num_blobs; ++i) {
310
+ requests_buf[i] =
311
+ BlobReadRequest(key_refs[i], offsets[i], blob_sizes[i],
312
+ kNoCompression, &value_buf[i], &statuses_buf[i]);
313
+ blob_reqs.push_back(&requests_buf[i]);
314
+ }
315
+
316
+ reader->MultiGetBlob(read_options, blob_reqs, &bytes_read);
317
+
312
318
  for (size_t i = 0; i < num_blobs; ++i) {
313
319
  if (i == 1) {
314
320
  ASSERT_TRUE(statuses_buf[i].IsCorruption());
@@ -339,17 +345,21 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
339
345
  Slice wrong_key_slice(incorrect_key, sizeof(incorrect_key) - 1);
340
346
  key_refs[2] = std::cref(wrong_key_slice);
341
347
 
342
- autovector<uint64_t> offsets{blob_offsets[0], blob_offsets[1],
343
- blob_offsets[2]};
344
- autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]};
345
348
  std::array<Status, num_blobs> statuses_buf;
346
- autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
347
- &statuses_buf[2]};
348
349
  std::array<PinnableSlice, num_blobs> value_buf;
349
- autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
350
- &value_buf[2]};
351
- reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
352
- values, &bytes_read);
350
+ std::array<BlobReadRequest, num_blobs> requests_buf;
351
+
352
+ for (size_t i = 0; i < num_blobs; ++i) {
353
+ requests_buf[i] =
354
+ BlobReadRequest(key_refs[i], blob_offsets[i], blob_sizes[i],
355
+ kNoCompression, &value_buf[i], &statuses_buf[i]);
356
+ }
357
+
358
+ autovector<BlobReadRequest*> blob_reqs = {
359
+ &requests_buf[0], &requests_buf[1], &requests_buf[2]};
360
+
361
+ reader->MultiGetBlob(read_options, blob_reqs, &bytes_read);
362
+
353
363
  for (size_t i = 0; i < num_blobs; ++i) {
354
364
  if (i == num_blobs - 1) {
355
365
  ASSERT_TRUE(statuses_buf[i].IsCorruption());
@@ -376,17 +386,26 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
376
386
  for (const auto& key_ref : keys) {
377
387
  key_refs.emplace_back(std::cref(key_ref));
378
388
  }
379
- autovector<uint64_t> offsets{blob_offsets[0], blob_offsets[1],
380
- blob_offsets[2]};
381
- autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1] + 1, blob_sizes[2]};
389
+
382
390
  std::array<Status, num_blobs> statuses_buf;
383
- autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
384
- &statuses_buf[2]};
385
391
  std::array<PinnableSlice, num_blobs> value_buf;
386
- autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
387
- &value_buf[2]};
388
- reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
389
- values, &bytes_read);
392
+ std::array<BlobReadRequest, num_blobs> requests_buf;
393
+
394
+ requests_buf[0] =
395
+ BlobReadRequest(key_refs[0], blob_offsets[0], blob_sizes[0],
396
+ kNoCompression, &value_buf[0], &statuses_buf[0]);
397
+ requests_buf[1] =
398
+ BlobReadRequest(key_refs[1], blob_offsets[1], blob_sizes[1] + 1,
399
+ kNoCompression, &value_buf[1], &statuses_buf[1]);
400
+ requests_buf[2] =
401
+ BlobReadRequest(key_refs[2], blob_offsets[2], blob_sizes[2],
402
+ kNoCompression, &value_buf[2], &statuses_buf[2]);
403
+
404
+ autovector<BlobReadRequest*> blob_reqs = {
405
+ &requests_buf[0], &requests_buf[1], &requests_buf[2]};
406
+
407
+ reader->MultiGetBlob(read_options, blob_reqs, &bytes_read);
408
+
390
409
  for (size_t i = 0; i < num_blobs; ++i) {
391
410
  if (i != 1) {
392
411
  ASSERT_OK(statuses_buf[i]);
@@ -0,0 +1,58 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #pragma once
7
+
8
+ #include <cinttypes>
9
+
10
+ #include "rocksdb/compression_type.h"
11
+ #include "rocksdb/slice.h"
12
+ #include "rocksdb/status.h"
13
+ #include "util/autovector.h"
14
+
15
+ namespace ROCKSDB_NAMESPACE {
16
+
17
+ // A read Blob request structure for use in BlobSource::MultiGetBlob and
18
+ // BlobFileReader::MultiGetBlob.
19
+ struct BlobReadRequest {
20
+ // User key to lookup the paired blob
21
+ const Slice* user_key = nullptr;
22
+
23
+ // File offset in bytes
24
+ uint64_t offset = 0;
25
+
26
+ // Length to read in bytes
27
+ size_t len = 0;
28
+
29
+ // Blob compression type
30
+ CompressionType compression = kNoCompression;
31
+
32
+ // Output parameter set by MultiGetBlob() to point to the data buffer, and
33
+ // the number of valid bytes
34
+ PinnableSlice* result = nullptr;
35
+
36
+ // Status of read
37
+ Status* status = nullptr;
38
+
39
+ BlobReadRequest(const Slice& _user_key, uint64_t _offset, size_t _len,
40
+ CompressionType _compression, PinnableSlice* _result,
41
+ Status* _status)
42
+ : user_key(&_user_key),
43
+ offset(_offset),
44
+ len(_len),
45
+ compression(_compression),
46
+ result(_result),
47
+ status(_status) {}
48
+
49
+ BlobReadRequest() = default;
50
+ BlobReadRequest(const BlobReadRequest& other) = default;
51
+ BlobReadRequest& operator=(const BlobReadRequest& other) = default;
52
+ };
53
+
54
+ using BlobFileReadRequests =
55
+ std::tuple<uint64_t /* file_number */, uint64_t /* file_size */,
56
+ autovector<BlobReadRequest>>;
57
+
58
+ } // namespace ROCKSDB_NAMESPACE
@@ -10,7 +10,9 @@
10
10
 
11
11
  #include "db/blob/blob_file_reader.h"
12
12
  #include "db/blob/blob_log_format.h"
13
+ #include "monitoring/statistics.h"
13
14
  #include "options/cf_options.h"
15
+ #include "table/get_context.h"
14
16
  #include "table/multiget_context.h"
15
17
 
16
18
  namespace ROCKSDB_NAMESPACE {
@@ -28,7 +30,7 @@ BlobSource::BlobSource(const ImmutableOptions* immutable_options,
28
30
  BlobSource::~BlobSource() = default;
29
31
 
30
32
  Status BlobSource::GetBlobFromCache(const Slice& cache_key,
31
- CachableEntry<std::string>* blob) const {
33
+ CacheHandleGuard<std::string>* blob) const {
32
34
  assert(blob);
33
35
  assert(blob->IsEmpty());
34
36
  assert(blob_cache_);
@@ -37,9 +39,7 @@ Status BlobSource::GetBlobFromCache(const Slice& cache_key,
37
39
  Cache::Handle* cache_handle = nullptr;
38
40
  cache_handle = GetEntryFromCache(cache_key);
39
41
  if (cache_handle != nullptr) {
40
- blob->SetCachedValue(
41
- static_cast<std::string*>(blob_cache_->Value(cache_handle)),
42
- blob_cache_.get(), cache_handle);
42
+ *blob = CacheHandleGuard<std::string>(blob_cache_.get(), cache_handle);
43
43
  return Status::OK();
44
44
  }
45
45
 
@@ -49,7 +49,7 @@ Status BlobSource::GetBlobFromCache(const Slice& cache_key,
49
49
  }
50
50
 
51
51
  Status BlobSource::PutBlobIntoCache(const Slice& cache_key,
52
- CachableEntry<std::string>* cached_blob,
52
+ CacheHandleGuard<std::string>* cached_blob,
53
53
  PinnableSlice* blob) const {
54
54
  assert(blob);
55
55
  assert(!cache_key.empty());
@@ -72,12 +72,45 @@ Status BlobSource::PutBlobIntoCache(const Slice& cache_key,
72
72
  priority);
73
73
  if (s.ok()) {
74
74
  assert(cache_handle != nullptr);
75
- cached_blob->SetCachedValue(buf, blob_cache_.get(), cache_handle);
75
+ *cached_blob =
76
+ CacheHandleGuard<std::string>(blob_cache_.get(), cache_handle);
76
77
  }
77
78
 
78
79
  return s;
79
80
  }
80
81
 
82
+ Cache::Handle* BlobSource::GetEntryFromCache(const Slice& key) const {
83
+ Cache::Handle* cache_handle = nullptr;
84
+ cache_handle = blob_cache_->Lookup(key, statistics_);
85
+ if (cache_handle != nullptr) {
86
+ PERF_COUNTER_ADD(blob_cache_hit_count, 1);
87
+ RecordTick(statistics_, BLOB_DB_CACHE_HIT);
88
+ RecordTick(statistics_, BLOB_DB_CACHE_BYTES_READ,
89
+ blob_cache_->GetUsage(cache_handle));
90
+ } else {
91
+ RecordTick(statistics_, BLOB_DB_CACHE_MISS);
92
+ }
93
+ return cache_handle;
94
+ }
95
+
96
+ Status BlobSource::InsertEntryIntoCache(const Slice& key, std::string* value,
97
+ size_t charge,
98
+ Cache::Handle** cache_handle,
99
+ Cache::Priority priority) const {
100
+ const Status s =
101
+ blob_cache_->Insert(key, value, charge, &DeleteCacheEntry<std::string>,
102
+ cache_handle, priority);
103
+ if (s.ok()) {
104
+ assert(*cache_handle != nullptr);
105
+ RecordTick(statistics_, BLOB_DB_CACHE_ADD);
106
+ RecordTick(statistics_, BLOB_DB_CACHE_BYTES_WRITE,
107
+ blob_cache_->GetUsage(*cache_handle));
108
+ } else {
109
+ RecordTick(statistics_, BLOB_DB_CACHE_ADD_FAILURES);
110
+ }
111
+ return s;
112
+ }
113
+
81
114
  Status BlobSource::GetBlob(const ReadOptions& read_options,
82
115
  const Slice& user_key, uint64_t file_number,
83
116
  uint64_t offset, uint64_t file_size,
@@ -91,32 +124,51 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
91
124
 
92
125
  const CacheKey cache_key = GetCacheKey(file_number, file_size, offset);
93
126
 
94
- CachableEntry<std::string> blob_entry;
127
+ CacheHandleGuard<std::string> blob_handle;
95
128
 
96
129
  // First, try to get the blob from the cache
97
130
  //
98
131
  // If blob cache is enabled, we'll try to read from it.
99
132
  if (blob_cache_) {
100
133
  Slice key = cache_key.AsSlice();
101
- s = GetBlobFromCache(key, &blob_entry);
102
- if (s.ok() && blob_entry.GetValue()) {
134
+ s = GetBlobFromCache(key, &blob_handle);
135
+ if (s.ok() && blob_handle.GetValue()) {
136
+ {
137
+ value->Reset();
138
+ // To avoid copying the cached blob into the buffer provided by the
139
+ // application, we can simply transfer ownership of the cache handle to
140
+ // the target PinnableSlice. This has the potential to save a lot of
141
+ // CPU, especially with large blob values.
142
+ value->PinSlice(
143
+ *blob_handle.GetValue(),
144
+ [](void* arg1, void* arg2) {
145
+ Cache* const cache = static_cast<Cache*>(arg1);
146
+ Cache::Handle* const handle = static_cast<Cache::Handle*>(arg2);
147
+ cache->Release(handle);
148
+ },
149
+ blob_handle.GetCache(), blob_handle.GetCacheHandle());
150
+ // Make the CacheHandleGuard relinquish ownership of the handle.
151
+ blob_handle.TransferTo(nullptr);
152
+ }
153
+
103
154
  // For consistency, the size of on-disk (possibly compressed) blob record
104
155
  // is assigned to bytes_read.
156
+ uint64_t adjustment =
157
+ read_options.verify_checksums
158
+ ? BlobLogRecord::CalculateAdjustmentForRecordHeader(
159
+ user_key.size())
160
+ : 0;
161
+ assert(offset >= adjustment);
162
+
163
+ uint64_t record_size = value_size + adjustment;
105
164
  if (bytes_read) {
106
- uint64_t adjustment =
107
- read_options.verify_checksums
108
- ? BlobLogRecord::CalculateAdjustmentForRecordHeader(
109
- user_key.size())
110
- : 0;
111
- assert(offset >= adjustment);
112
- *bytes_read = value_size + adjustment;
165
+ *bytes_read = record_size;
113
166
  }
114
- value->PinSelf(*blob_entry.GetValue());
115
167
  return s;
116
168
  }
117
169
  }
118
170
 
119
- assert(blob_entry.IsEmpty());
171
+ assert(blob_handle.IsEmpty());
120
172
 
121
173
  const bool no_io = read_options.read_tier == kBlockCacheTier;
122
174
  if (no_io) {
@@ -139,19 +191,23 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
139
191
  return Status::Corruption("Compression type mismatch when reading blob");
140
192
  }
141
193
 
194
+ uint64_t read_size = 0;
142
195
  s = blob_file_reader.GetValue()->GetBlob(
143
196
  read_options, user_key, offset, value_size, compression_type,
144
- prefetch_buffer, value, bytes_read);
197
+ prefetch_buffer, value, &read_size);
145
198
  if (!s.ok()) {
146
199
  return s;
147
200
  }
201
+ if (bytes_read) {
202
+ *bytes_read = read_size;
203
+ }
148
204
  }
149
205
 
150
206
  if (blob_cache_ && read_options.fill_cache) {
151
207
  // If filling cache is allowed and a cache is configured, try to put the
152
208
  // blob to the cache.
153
209
  Slice key = cache_key.AsSlice();
154
- s = PutBlobIntoCache(key, &blob_entry, value);
210
+ s = PutBlobIntoCache(key, &blob_handle, value);
155
211
  if (!s.ok()) {
156
212
  return s;
157
213
  }
@@ -161,31 +217,51 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
161
217
  return s;
162
218
  }
163
219
 
164
- void BlobSource::MultiGetBlob(
165
- const ReadOptions& read_options,
166
- const autovector<std::reference_wrapper<const Slice>>& user_keys,
167
- uint64_t file_number, uint64_t file_size,
168
- const autovector<uint64_t>& offsets,
169
- const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
170
- autovector<PinnableSlice*>& blobs, uint64_t* bytes_read) {
171
- size_t num_blobs = user_keys.size();
220
+ void BlobSource::MultiGetBlob(const ReadOptions& read_options,
221
+ autovector<BlobFileReadRequests>& blob_reqs,
222
+ uint64_t* bytes_read) {
223
+ assert(blob_reqs.size() > 0);
224
+
225
+ uint64_t total_bytes_read = 0;
226
+ uint64_t bytes_read_in_file = 0;
227
+
228
+ for (auto& [file_number, file_size, blob_reqs_in_file] : blob_reqs) {
229
+ // sort blob_reqs_in_file by file offset.
230
+ std::sort(
231
+ blob_reqs_in_file.begin(), blob_reqs_in_file.end(),
232
+ [](const BlobReadRequest& lhs, const BlobReadRequest& rhs) -> bool {
233
+ return lhs.offset < rhs.offset;
234
+ });
235
+
236
+ MultiGetBlobFromOneFile(read_options, file_number, file_size,
237
+ blob_reqs_in_file, &bytes_read_in_file);
238
+
239
+ total_bytes_read += bytes_read_in_file;
240
+ }
241
+
242
+ if (bytes_read) {
243
+ *bytes_read = total_bytes_read;
244
+ }
245
+ }
246
+
247
+ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options,
248
+ uint64_t file_number,
249
+ uint64_t file_size,
250
+ autovector<BlobReadRequest>& blob_reqs,
251
+ uint64_t* bytes_read) {
252
+ const size_t num_blobs = blob_reqs.size();
172
253
  assert(num_blobs > 0);
173
254
  assert(num_blobs <= MultiGetContext::MAX_BATCH_SIZE);
174
- assert(num_blobs == offsets.size());
175
- assert(num_blobs == value_sizes.size());
176
- assert(num_blobs == statuses.size());
177
- assert(num_blobs == blobs.size());
178
255
 
179
256
  #ifndef NDEBUG
180
- for (size_t i = 0; i < offsets.size() - 1; ++i) {
181
- assert(offsets[i] <= offsets[i + 1]);
257
+ for (size_t i = 0; i < num_blobs - 1; ++i) {
258
+ assert(blob_reqs[i].offset <= blob_reqs[i + 1].offset);
182
259
  }
183
260
  #endif // !NDEBUG
184
261
 
185
262
  using Mask = uint64_t;
186
263
  Mask cache_hit_mask = 0;
187
264
 
188
- Status s;
189
265
  uint64_t total_bytes = 0;
190
266
  const OffsetableCacheKey base_cache_key(db_id_, db_session_id_, file_number,
191
267
  file_size);
@@ -193,27 +269,48 @@ void BlobSource::MultiGetBlob(
193
269
  if (blob_cache_) {
194
270
  size_t cached_blob_count = 0;
195
271
  for (size_t i = 0; i < num_blobs; ++i) {
196
- CachableEntry<std::string> blob_entry;
197
- const CacheKey cache_key = base_cache_key.WithOffset(offsets[i]);
272
+ auto& req = blob_reqs[i];
273
+
274
+ CacheHandleGuard<std::string> blob_handle;
275
+ const CacheKey cache_key = base_cache_key.WithOffset(req.offset);
198
276
  const Slice key = cache_key.AsSlice();
199
277
 
200
- s = GetBlobFromCache(key, &blob_entry);
201
- if (s.ok() && blob_entry.GetValue()) {
202
- assert(statuses[i]);
203
- *statuses[i] = s;
204
- blobs[i]->PinSelf(*blob_entry.GetValue());
278
+ const Status s = GetBlobFromCache(key, &blob_handle);
279
+
280
+ if (s.ok() && blob_handle.GetValue()) {
281
+ assert(req.status);
282
+ *req.status = s;
283
+
284
+ {
285
+ req.result->Reset();
286
+ // To avoid copying the cached blob into the buffer provided by the
287
+ // application, we can simply transfer ownership of the cache handle
288
+ // to the target PinnableSlice. This has the potential to save a lot
289
+ // of CPU, especially with large blob values.
290
+ req.result->PinSlice(
291
+ *blob_handle.GetValue(),
292
+ [](void* arg1, void* arg2) {
293
+ Cache* const cache = static_cast<Cache*>(arg1);
294
+ Cache::Handle* const handle = static_cast<Cache::Handle*>(arg2);
295
+ cache->Release(handle);
296
+ },
297
+ blob_handle.GetCache(), blob_handle.GetCacheHandle());
298
+ // Make the CacheHandleGuard relinquish ownership of the handle.
299
+ blob_handle.TransferTo(nullptr);
300
+ }
205
301
 
206
302
  // Update the counter for the number of valid blobs read from the cache.
207
303
  ++cached_blob_count;
304
+
208
305
  // For consistency, the size of each on-disk (possibly compressed) blob
209
306
  // record is accumulated to total_bytes.
210
307
  uint64_t adjustment =
211
308
  read_options.verify_checksums
212
309
  ? BlobLogRecord::CalculateAdjustmentForRecordHeader(
213
- user_keys[i].get().size())
310
+ req.user_key->size())
214
311
  : 0;
215
- assert(offsets[i] >= adjustment);
216
- total_bytes += value_sizes[i] + adjustment;
312
+ assert(req.offset >= adjustment);
313
+ total_bytes += req.len + adjustment;
217
314
  cache_hit_mask |= (Mask{1} << i); // cache hit
218
315
  }
219
316
  }
@@ -231,8 +328,8 @@ void BlobSource::MultiGetBlob(
231
328
  if (no_io) {
232
329
  for (size_t i = 0; i < num_blobs; ++i) {
233
330
  if (!(cache_hit_mask & (Mask{1} << i))) {
234
- assert(statuses[i]);
235
- *statuses[i] =
331
+ assert(blob_reqs[i].status);
332
+ *blob_reqs[i].status =
236
333
  Status::Incomplete("Cannot read blob(s): no disk I/O allowed");
237
334
  }
238
335
  }
@@ -241,50 +338,43 @@ void BlobSource::MultiGetBlob(
241
338
 
242
339
  {
243
340
  // Find the rest of blobs from the file since I/O is allowed.
244
- autovector<std::reference_wrapper<const Slice>> _user_keys;
245
- autovector<uint64_t> _offsets;
246
- autovector<uint64_t> _value_sizes;
247
- autovector<Status*> _statuses;
248
- autovector<PinnableSlice*> _blobs;
341
+ autovector<BlobReadRequest*> _blob_reqs;
249
342
  uint64_t _bytes_read = 0;
250
343
 
251
344
  for (size_t i = 0; i < num_blobs; ++i) {
252
345
  if (!(cache_hit_mask & (Mask{1} << i))) {
253
- _user_keys.emplace_back(user_keys[i]);
254
- _offsets.push_back(offsets[i]);
255
- _value_sizes.push_back(value_sizes[i]);
256
- _statuses.push_back(statuses[i]);
257
- _blobs.push_back(blobs[i]);
346
+ _blob_reqs.push_back(&blob_reqs[i]);
258
347
  }
259
348
  }
260
349
 
261
350
  CacheHandleGuard<BlobFileReader> blob_file_reader;
262
- s = blob_file_cache_->GetBlobFileReader(file_number, &blob_file_reader);
351
+ Status s =
352
+ blob_file_cache_->GetBlobFileReader(file_number, &blob_file_reader);
263
353
  if (!s.ok()) {
264
- for (size_t i = 0; i < _blobs.size(); ++i) {
265
- assert(_statuses[i]);
266
- *_statuses[i] = s;
354
+ for (size_t i = 0; i < _blob_reqs.size(); ++i) {
355
+ assert(_blob_reqs[i]->status);
356
+ *_blob_reqs[i]->status = s;
267
357
  }
268
358
  return;
269
359
  }
270
360
 
271
361
  assert(blob_file_reader.GetValue());
272
362
 
273
- blob_file_reader.GetValue()->MultiGetBlob(read_options, _user_keys,
274
- _offsets, _value_sizes, _statuses,
275
- _blobs, &_bytes_read);
363
+ blob_file_reader.GetValue()->MultiGetBlob(read_options, _blob_reqs,
364
+ &_bytes_read);
276
365
 
277
- if (read_options.fill_cache) {
366
+ if (blob_cache_ && read_options.fill_cache) {
278
367
  // If filling cache is allowed and a cache is configured, try to put
279
368
  // the blob(s) to the cache.
280
- for (size_t i = 0; i < _blobs.size(); ++i) {
281
- if (_statuses[i]->ok()) {
282
- CachableEntry<std::string> blob_entry;
283
- const CacheKey cache_key = base_cache_key.WithOffset(_offsets[i]);
369
+ for (size_t i = 0; i < _blob_reqs.size(); ++i) {
370
+ if (_blob_reqs[i]->status->ok()) {
371
+ CacheHandleGuard<std::string> blob_handle;
372
+ const CacheKey cache_key =
373
+ base_cache_key.WithOffset(_blob_reqs[i]->offset);
284
374
  const Slice key = cache_key.AsSlice();
285
- s = PutBlobIntoCache(key, &blob_entry, _blobs[i]);
375
+ s = PutBlobIntoCache(key, &blob_handle, _blob_reqs[i]->result);
286
376
  if (!s.ok()) {
287
- *_statuses[i] = s;
377
+ *_blob_reqs[i]->status = s;
288
378
  }
289
379
  }
290
380
  }
@@ -302,10 +392,10 @@ bool BlobSource::TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
302
392
  const CacheKey cache_key = GetCacheKey(file_number, file_size, offset);
303
393
  const Slice key = cache_key.AsSlice();
304
394
 
305
- CachableEntry<std::string> blob_entry;
306
- const Status s = GetBlobFromCache(key, &blob_entry);
395
+ CacheHandleGuard<std::string> blob_handle;
396
+ const Status s = GetBlobFromCache(key, &blob_handle);
307
397
 
308
- if (s.ok() && blob_entry.GetValue() != nullptr) {
398
+ if (s.ok() && blob_handle.GetValue() != nullptr) {
309
399
  return true;
310
400
  }
311
401