@nxtedition/rocksdb 7.1.14 → 7.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/binding.cc +1 -0
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +72 -18
  3. package/deps/rocksdb/rocksdb/Makefile +91 -11
  4. package/deps/rocksdb/rocksdb/TARGETS +8 -4
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +5 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +13 -8
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
  8. package/deps/rocksdb/rocksdb/cache/cache_test.cc +116 -57
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +958 -459
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.h +407 -622
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +104 -40
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +23 -8
  13. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +350 -184
  14. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +12 -2
  15. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +2 -0
  16. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +130 -43
  17. package/deps/rocksdb/rocksdb/cache/lru_cache.h +24 -2
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +423 -98
  19. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +19 -2
  20. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +10 -7
  21. package/deps/rocksdb/rocksdb/crash_test.mk +2 -2
  22. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +46 -26
  23. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +9 -3
  24. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +90 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +56 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -10
  27. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +64 -59
  28. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +11 -8
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +92 -62
  30. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +159 -136
  31. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -13
  32. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +129 -57
  33. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +81 -3
  34. package/deps/rocksdb/rocksdb/db/c.cc +29 -0
  35. package/deps/rocksdb/rocksdb/db/column_family.cc +10 -1
  36. package/deps/rocksdb/rocksdb/db/column_family_test.cc +21 -0
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -36
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +344 -102
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +163 -28
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +52 -17
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +35 -30
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -3
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +167 -11
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +8 -8
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +10 -13
  46. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +0 -117
  47. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +6 -49
  48. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +18 -11
  50. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +4 -10
  51. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +1 -1
  52. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +12 -0
  53. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -93
  54. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +28 -32
  55. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +1 -1
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -9
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -33
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -5
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +11 -0
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -2
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2 -1
  63. package/deps/rocksdb/rocksdb/db/db_iter.cc +76 -138
  64. package/deps/rocksdb/rocksdb/db/db_iter.h +26 -23
  65. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1 -1
  66. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +931 -0
  67. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +2 -2
  68. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -0
  69. package/deps/rocksdb/rocksdb/db/db_test2.cc +44 -22
  70. package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -14
  71. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +155 -0
  72. package/deps/rocksdb/rocksdb/db/db_write_test.cc +45 -0
  73. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -1
  74. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -0
  75. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -1
  76. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +24 -12
  77. package/deps/rocksdb/rocksdb/db/internal_stats.cc +7 -1
  78. package/deps/rocksdb/rocksdb/db/internal_stats.h +3 -0
  79. package/deps/rocksdb/rocksdb/db/memtable.cc +79 -18
  80. package/deps/rocksdb/rocksdb/db/memtable.h +5 -0
  81. package/deps/rocksdb/rocksdb/db/memtable_list.cc +26 -4
  82. package/deps/rocksdb/rocksdb/db/memtable_list.h +2 -1
  83. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +113 -0
  84. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +110 -0
  85. package/deps/rocksdb/rocksdb/db/{periodic_work_scheduler_test.cc → periodic_task_scheduler_test.cc} +33 -39
  86. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +12 -20
  87. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +6 -5
  88. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +12 -8
  89. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +20 -5
  90. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +14 -0
  91. package/deps/rocksdb/rocksdb/db/repair.cc +17 -8
  92. package/deps/rocksdb/rocksdb/db/repair_test.cc +2 -1
  93. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +49 -66
  94. package/deps/rocksdb/rocksdb/db/table_cache.cc +92 -63
  95. package/deps/rocksdb/rocksdb/db/table_cache.h +16 -9
  96. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  97. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +2 -2
  98. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -3
  99. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
  100. package/deps/rocksdb/rocksdb/db/version_builder.cc +1 -1
  101. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -2
  102. package/deps/rocksdb/rocksdb/db/version_set.cc +379 -145
  103. package/deps/rocksdb/rocksdb/db/version_set.h +26 -24
  104. package/deps/rocksdb/rocksdb/db/version_set_test.cc +9 -9
  105. package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
  106. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +10 -2
  107. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +2 -0
  108. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -8
  109. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +5 -8
  110. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +2 -0
  111. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +71 -0
  112. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +14 -0
  113. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +23 -0
  114. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +26 -1
  115. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +105 -34
  116. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +16 -8
  117. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -0
  118. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +4 -8
  119. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -8
  120. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +282 -25
  121. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  122. package/deps/rocksdb/rocksdb/env/io_posix.cc +3 -1
  123. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +367 -177
  124. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +144 -56
  125. package/deps/rocksdb/rocksdb/file/filename.cc +3 -3
  126. package/deps/rocksdb/rocksdb/file/filename.h +4 -2
  127. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +415 -0
  128. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +2 -0
  129. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +36 -45
  130. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +21 -3
  131. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +11 -11
  132. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +15 -1
  133. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +163 -68
  134. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -12
  135. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +23 -5
  136. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +21 -17
  137. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +17 -0
  138. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +3 -3
  139. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +17 -6
  140. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +20 -0
  142. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +3 -3
  143. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -0
  144. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  145. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +3 -0
  146. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -1
  147. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +2 -1
  148. package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -2
  149. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +4 -2
  150. package/deps/rocksdb/rocksdb/monitoring/histogram.h +2 -0
  151. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +15 -1
  152. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +17 -0
  153. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +14 -3
  154. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +3 -0
  155. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +50 -0
  156. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
  157. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +31 -32
  158. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -1
  159. package/deps/rocksdb/rocksdb/options/options.cc +2 -2
  160. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -1
  161. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -0
  162. package/deps/rocksdb/rocksdb/src.mk +4 -2
  163. package/deps/rocksdb/rocksdb/table/block_based/block.h +9 -8
  164. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +110 -99
  165. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +12 -10
  166. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +11 -2
  167. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +138 -83
  168. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +25 -24
  169. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +31 -30
  170. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -13
  171. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +4 -4
  172. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -3
  173. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -3
  174. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +17 -19
  175. package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
  176. package/deps/rocksdb/rocksdb/table/format.cc +26 -29
  177. package/deps/rocksdb/rocksdb/table/format.h +44 -26
  178. package/deps/rocksdb/rocksdb/table/get_context.cc +17 -12
  179. package/deps/rocksdb/rocksdb/table/internal_iterator.h +7 -0
  180. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +4 -0
  181. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +950 -104
  182. package/deps/rocksdb/rocksdb/table/merging_iterator.h +28 -1
  183. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +3 -2
  184. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -1
  185. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +10 -9
  186. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +22 -20
  187. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +1 -1
  188. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +1 -1
  189. package/deps/rocksdb/rocksdb/table/table_builder.h +9 -21
  190. package/deps/rocksdb/rocksdb/table/table_test.cc +12 -12
  191. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +4 -4
  192. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +1 -0
  193. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +116 -34
  194. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +6 -1
  195. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +1 -1
  196. package/deps/rocksdb/rocksdb/util/autovector.h +12 -0
  197. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +3 -2
  198. package/deps/rocksdb/rocksdb/util/stderr_logger.cc +30 -0
  199. package/deps/rocksdb/rocksdb/util/stderr_logger.h +5 -18
  200. package/deps/rocksdb/rocksdb/util/timer.h +2 -3
  201. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +9 -2
  202. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +1 -1
  203. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +1 -1
  204. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +34 -53
  205. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +9 -14
  206. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -4
  207. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +4 -0
  208. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +1 -1
  209. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +4 -3
  210. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +3 -1
  211. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +26 -8
  212. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +114 -16
  213. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
  214. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +59 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +3 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +39 -0
  217. package/deps/rocksdb/rocksdb.gyp +0 -1
  218. package/index.js +6 -10
  219. package/package.json +1 -1
  220. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  221. package/prebuilds/linux-x64/node.napi.node +0 -0
  222. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +0 -168
  223. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +0 -90
@@ -20,7 +20,8 @@ namespace ROCKSDB_NAMESPACE {
20
20
  // Single cache shard interface.
21
21
  class CacheShard {
22
22
  public:
23
- CacheShard() = default;
23
+ explicit CacheShard(CacheMetadataChargePolicy metadata_charge_policy)
24
+ : metadata_charge_policy_(metadata_charge_policy) {}
24
25
  virtual ~CacheShard() = default;
25
26
 
26
27
  using DeleterFn = Cache::DeleterFn;
@@ -47,6 +48,8 @@ class CacheShard {
47
48
  virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
48
49
  virtual size_t GetUsage() const = 0;
49
50
  virtual size_t GetPinnedUsage() const = 0;
51
+ virtual size_t GetOccupancyCount() const = 0;
52
+ virtual size_t GetTableAddressCount() const = 0;
50
53
  // Handles iterating over roughly `average_entries_per_lock` entries, using
51
54
  // `state` to somehow record where it last ended up. Caller initially uses
52
55
  // *state == 0 and implementation sets *state = UINT32_MAX to indicate
@@ -57,13 +60,9 @@ class CacheShard {
57
60
  uint32_t average_entries_per_lock, uint32_t* state) = 0;
58
61
  virtual void EraseUnRefEntries() = 0;
59
62
  virtual std::string GetPrintableOptions() const { return ""; }
60
- void set_metadata_charge_policy(
61
- CacheMetadataChargePolicy metadata_charge_policy) {
62
- metadata_charge_policy_ = metadata_charge_policy;
63
- }
64
63
 
65
64
  protected:
66
- CacheMetadataChargePolicy metadata_charge_policy_ = kDontChargeCacheMetadata;
65
+ const CacheMetadataChargePolicy metadata_charge_policy_;
67
66
  };
68
67
 
69
68
  // Generic cache interface which shards cache by hash of keys. 2^num_shard_bits
@@ -106,6 +105,8 @@ class ShardedCache : public Cache {
106
105
  virtual size_t GetUsage() const override;
107
106
  virtual size_t GetUsage(Handle* handle) const override;
108
107
  virtual size_t GetPinnedUsage() const override;
108
+ virtual size_t GetOccupancyCount() const override;
109
+ virtual size_t GetTableAddressCount() const override;
109
110
  virtual void ApplyToAllEntries(
110
111
  const std::function<void(const Slice& key, void* value, size_t charge,
111
112
  DeleterFn deleter)>& callback,
@@ -127,6 +128,8 @@ class ShardedCache : public Cache {
127
128
  std::atomic<uint64_t> last_id_;
128
129
  };
129
130
 
130
- extern int GetDefaultCacheShardBits(size_t capacity);
131
+ // 512KB is traditional minimum shard size.
132
+ int GetDefaultCacheShardBits(size_t capacity,
133
+ size_t min_shard_size = 512U * 1024U);
131
134
 
132
135
  } // namespace ROCKSDB_NAMESPACE
@@ -78,7 +78,7 @@ blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
78
78
  $(CRASHTEST_PY) --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS)
79
79
 
80
80
  blackbox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
81
- $(CRASHTEST_PY) --enable_tiered_storage blackbox $(CRASH_TEST_EXT_ARGS)
81
+ $(CRASHTEST_PY) --test_tiered_storage blackbox $(CRASH_TEST_EXT_ARGS)
82
82
 
83
83
  ifeq ($(CRASH_TEST_KILL_ODD),)
84
84
  CRASH_TEST_KILL_ODD=888887
@@ -103,5 +103,5 @@ whitebox_crash_test_with_ts: $(DB_STRESS_CMD)
103
103
  $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
104
104
 
105
105
  whitebox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
106
- $(CRASHTEST_PY) --enable_tiered_storage whitebox --random_kill_odd \
106
+ $(CRASHTEST_PY) --test_tiered_storage whitebox --random_kill_odd \
107
107
  $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
@@ -58,40 +58,60 @@ Status ArenaWrappedDBIter::Refresh() {
58
58
  uint64_t cur_sv_number = cfd_->GetSuperVersionNumber();
59
59
  TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1");
60
60
  TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2");
61
- while (true) {
62
- if (sv_number_ != cur_sv_number) {
63
- Env* env = db_iter_->env();
64
- db_iter_->~DBIter();
65
- arena_.~Arena();
66
- new (&arena_) Arena();
61
+ auto reinit_internal_iter = [&]() {
62
+ Env* env = db_iter_->env();
63
+ db_iter_->~DBIter();
64
+ arena_.~Arena();
65
+ new (&arena_) Arena();
67
66
 
68
- SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_);
69
- SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
70
- if (read_callback_) {
71
- read_callback_->Refresh(latest_seq);
72
- }
73
- Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options,
74
- sv->current, latest_seq,
75
- sv->mutable_cf_options.max_sequential_skip_in_iterations,
76
- cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_,
77
- allow_refresh_);
67
+ SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_);
68
+ SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
69
+ if (read_callback_) {
70
+ read_callback_->Refresh(latest_seq);
71
+ }
72
+ Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options,
73
+ sv->current, latest_seq,
74
+ sv->mutable_cf_options.max_sequential_skip_in_iterations,
75
+ cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_,
76
+ allow_refresh_);
78
77
 
79
- InternalIterator* internal_iter = db_impl_->NewInternalIterator(
80
- read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(),
81
- latest_seq, /* allow_unprepared_value */ true);
82
- SetIterUnderDBIter(internal_iter);
78
+ InternalIterator* internal_iter = db_impl_->NewInternalIterator(
79
+ read_options_, cfd_, sv, &arena_, latest_seq,
80
+ /* allow_unprepared_value */ true, /* db_iter */ this);
81
+ SetIterUnderDBIter(internal_iter);
82
+ };
83
+ while (true) {
84
+ if (sv_number_ != cur_sv_number) {
85
+ reinit_internal_iter();
83
86
  break;
84
87
  } else {
85
88
  SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
86
89
  // Refresh range-tombstones in MemTable
87
90
  if (!read_options_.ignore_range_deletions) {
88
91
  SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_);
89
- ReadRangeDelAggregator* range_del_agg =
90
- db_iter_->GetRangeDelAggregator();
91
- std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter;
92
- range_del_iter.reset(sv->mem->NewRangeTombstoneIterator(
93
- read_options_, latest_seq, false /* immutable_memtable */));
94
- range_del_agg->AddTombstones(std::move(range_del_iter));
92
+ auto t = sv->mem->NewRangeTombstoneIterator(
93
+ read_options_, latest_seq, false /* immutable_memtable */);
94
+ if (!t || t->empty()) {
95
+ if (memtable_range_tombstone_iter_) {
96
+ delete *memtable_range_tombstone_iter_;
97
+ *memtable_range_tombstone_iter_ = nullptr;
98
+ }
99
+ delete t;
100
+ } else { // current mutable memtable has range tombstones
101
+ if (!memtable_range_tombstone_iter_) {
102
+ delete t;
103
+ cfd_->ReturnThreadLocalSuperVersion(sv);
104
+ // The memtable under DBIter did not have range tombstone before
105
+ // refresh.
106
+ reinit_internal_iter();
107
+ break;
108
+ } else {
109
+ delete *memtable_range_tombstone_iter_;
110
+ *memtable_range_tombstone_iter_ = new TruncatedRangeDelIterator(
111
+ std::unique_ptr<FragmentedRangeTombstoneIterator>(t),
112
+ &cfd_->internal_comparator(), nullptr, nullptr);
113
+ }
114
+ }
95
115
  cfd_->ReturnThreadLocalSuperVersion(sv);
96
116
  }
97
117
  // Refresh latest sequence number
@@ -44,9 +44,7 @@ class ArenaWrappedDBIter : public Iterator {
44
44
  // Get the arena to be used to allocate memory for DBIter to be wrapped,
45
45
  // as well as child iterators in it.
46
46
  virtual Arena* GetArena() { return &arena_; }
47
- virtual ReadRangeDelAggregator* GetRangeDelAggregator() {
48
- return db_iter_->GetRangeDelAggregator();
49
- }
47
+
50
48
  const ReadOptions& GetReadOptions() { return read_options_; }
51
49
 
52
50
  // Set the internal iterator wrapped inside the DB Iterator. Usually it is
@@ -55,6 +53,10 @@ class ArenaWrappedDBIter : public Iterator {
55
53
  db_iter_->SetIter(iter);
56
54
  }
57
55
 
56
+ void SetMemtableRangetombstoneIter(TruncatedRangeDelIterator** iter) {
57
+ memtable_range_tombstone_iter_ = iter;
58
+ }
59
+
58
60
  bool Valid() const override { return db_iter_->Valid(); }
59
61
  void SeekToFirst() override { db_iter_->SeekToFirst(); }
60
62
  void SeekToLast() override { db_iter_->SeekToLast(); }
@@ -68,6 +70,7 @@ class ArenaWrappedDBIter : public Iterator {
68
70
  void Prev() override { db_iter_->Prev(); }
69
71
  Slice key() const override { return db_iter_->key(); }
70
72
  Slice value() const override { return db_iter_->value(); }
73
+ const WideColumns& columns() const override { return db_iter_->columns(); }
71
74
  Status status() const override { return db_iter_->status(); }
72
75
  Slice timestamp() const override { return db_iter_->timestamp(); }
73
76
  bool IsBlob() const { return db_iter_->IsBlob(); }
@@ -104,6 +107,9 @@ class ArenaWrappedDBIter : public Iterator {
104
107
  ReadCallback* read_callback_;
105
108
  bool expose_blob_index_ = false;
106
109
  bool allow_refresh_ = true;
110
+ // If this is nullptr, it means the mutable memtable does not contain range
111
+ // tombstone when added under this DBIter.
112
+ TruncatedRangeDelIterator** memtable_range_tombstone_iter_ = nullptr;
107
113
  };
108
114
 
109
115
  // Generate the arena wrapped iterator class.
@@ -0,0 +1,90 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #include "db/blob/blob_contents.h"
7
+
8
+ #include <cassert>
9
+
10
+ #include "cache/cache_entry_roles.h"
11
+ #include "cache/cache_helpers.h"
12
+ #include "port/malloc.h"
13
+
14
+ namespace ROCKSDB_NAMESPACE {
15
+
16
+ std::unique_ptr<BlobContents> BlobContents::Create(
17
+ CacheAllocationPtr&& allocation, size_t size) {
18
+ return std::unique_ptr<BlobContents>(
19
+ new BlobContents(std::move(allocation), size));
20
+ }
21
+
22
+ size_t BlobContents::ApproximateMemoryUsage() const {
23
+ size_t usage = 0;
24
+
25
+ if (allocation_) {
26
+ MemoryAllocator* const allocator = allocation_.get_deleter().allocator;
27
+
28
+ if (allocator) {
29
+ usage += allocator->UsableSize(allocation_.get(), data_.size());
30
+ } else {
31
+ #ifdef ROCKSDB_MALLOC_USABLE_SIZE
32
+ usage += malloc_usable_size(allocation_.get());
33
+ #else
34
+ usage += data_.size();
35
+ #endif
36
+ }
37
+ }
38
+
39
+ #ifdef ROCKSDB_MALLOC_USABLE_SIZE
40
+ usage += malloc_usable_size(const_cast<BlobContents*>(this));
41
+ #else
42
+ usage += sizeof(*this);
43
+ #endif
44
+
45
+ return usage;
46
+ }
47
+
48
+ size_t BlobContents::SizeCallback(void* obj) {
49
+ assert(obj);
50
+
51
+ return static_cast<const BlobContents*>(obj)->size();
52
+ }
53
+
54
+ Status BlobContents::SaveToCallback(void* from_obj, size_t from_offset,
55
+ size_t length, void* out) {
56
+ assert(from_obj);
57
+
58
+ const BlobContents* buf = static_cast<const BlobContents*>(from_obj);
59
+ assert(buf->size() >= from_offset + length);
60
+
61
+ memcpy(out, buf->data().data() + from_offset, length);
62
+
63
+ return Status::OK();
64
+ }
65
+
66
+ Cache::CacheItemHelper* BlobContents::GetCacheItemHelper() {
67
+ static Cache::CacheItemHelper cache_helper(
68
+ &SizeCallback, &SaveToCallback,
69
+ GetCacheEntryDeleterForRole<BlobContents, CacheEntryRole::kBlobValue>());
70
+
71
+ return &cache_helper;
72
+ }
73
+
74
+ Status BlobContents::CreateCallback(CacheAllocationPtr&& allocation,
75
+ const void* buf, size_t size,
76
+ void** out_obj, size_t* charge) {
77
+ assert(allocation);
78
+
79
+ memcpy(allocation.get(), buf, size);
80
+
81
+ std::unique_ptr<BlobContents> obj = Create(std::move(allocation), size);
82
+ BlobContents* const contents = obj.release();
83
+
84
+ *out_obj = contents;
85
+ *charge = contents->ApproximateMemoryUsage();
86
+
87
+ return Status::OK();
88
+ }
89
+
90
+ } // namespace ROCKSDB_NAMESPACE
@@ -0,0 +1,56 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #pragma once
7
+
8
+ #include <memory>
9
+
10
+ #include "memory/memory_allocator.h"
11
+ #include "rocksdb/cache.h"
12
+ #include "rocksdb/rocksdb_namespace.h"
13
+ #include "rocksdb/slice.h"
14
+ #include "rocksdb/status.h"
15
+
16
+ namespace ROCKSDB_NAMESPACE {
17
+
18
+ // A class representing a single uncompressed value read from a blob file.
19
+ class BlobContents {
20
+ public:
21
+ static std::unique_ptr<BlobContents> Create(CacheAllocationPtr&& allocation,
22
+ size_t size);
23
+
24
+ BlobContents(const BlobContents&) = delete;
25
+ BlobContents& operator=(const BlobContents&) = delete;
26
+
27
+ BlobContents(BlobContents&&) = default;
28
+ BlobContents& operator=(BlobContents&&) = default;
29
+
30
+ ~BlobContents() = default;
31
+
32
+ const Slice& data() const { return data_; }
33
+ size_t size() const { return data_.size(); }
34
+
35
+ size_t ApproximateMemoryUsage() const;
36
+
37
+ // Callbacks for secondary cache
38
+ static size_t SizeCallback(void* obj);
39
+
40
+ static Status SaveToCallback(void* from_obj, size_t from_offset,
41
+ size_t length, void* out);
42
+
43
+ static Cache::CacheItemHelper* GetCacheItemHelper();
44
+
45
+ static Status CreateCallback(CacheAllocationPtr&& allocation, const void* buf,
46
+ size_t size, void** out_obj, size_t* charge);
47
+
48
+ private:
49
+ BlobContents(CacheAllocationPtr&& allocation, size_t size)
50
+ : allocation_(std::move(allocation)), data_(allocation_.get(), size) {}
51
+
52
+ CacheAllocationPtr allocation_;
53
+ Slice data_;
54
+ };
55
+
56
+ } // namespace ROCKSDB_NAMESPACE
@@ -7,6 +7,7 @@
7
7
 
8
8
  #include <cassert>
9
9
 
10
+ #include "db/blob/blob_contents.h"
10
11
  #include "db/blob/blob_file_addition.h"
11
12
  #include "db/blob/blob_file_completion_callback.h"
12
13
  #include "db/blob/blob_index.h"
@@ -408,16 +409,28 @@ Status BlobFileBuilder::PutBlobIntoCacheIfNeeded(const Slice& blob,
408
409
 
409
410
  // Objects to be put into the cache have to be heap-allocated and
410
411
  // self-contained, i.e. own their contents. The Cache has to be able to
411
- // take unique ownership of them. Therefore, we copy the blob into a
412
- // string directly, and insert that into the cache.
413
- std::unique_ptr<std::string> buf = std::make_unique<std::string>();
414
- buf->assign(blob.data(), blob.size());
415
-
416
- // TODO: support custom allocators and provide a better estimated memory
417
- // usage using malloc_usable_size.
418
- s = blob_cache->Insert(key, buf.get(), buf->size(),
419
- &DeleteCacheEntry<std::string>,
420
- nullptr /* cache_handle */, priority);
412
+ // take unique ownership of them.
413
+ CacheAllocationPtr allocation =
414
+ AllocateBlock(blob.size(), blob_cache->memory_allocator());
415
+ memcpy(allocation.get(), blob.data(), blob.size());
416
+ std::unique_ptr<BlobContents> buf =
417
+ BlobContents::Create(std::move(allocation), blob.size());
418
+
419
+ Cache::CacheItemHelper* const cache_item_helper =
420
+ BlobContents::GetCacheItemHelper();
421
+ assert(cache_item_helper);
422
+
423
+ if (immutable_options_->lowest_used_cache_tier ==
424
+ CacheTier::kNonVolatileBlockTier) {
425
+ s = blob_cache->Insert(key, buf.get(), cache_item_helper,
426
+ buf->ApproximateMemoryUsage(),
427
+ nullptr /* cache_handle */, priority);
428
+ } else {
429
+ s = blob_cache->Insert(key, buf.get(), buf->ApproximateMemoryUsage(),
430
+ cache_item_helper->del_cb,
431
+ nullptr /* cache_handle */, priority);
432
+ }
433
+
421
434
  if (s.ok()) {
422
435
  RecordTick(statistics, BLOB_DB_CACHE_ADD);
423
436
  RecordTick(statistics, BLOB_DB_CACHE_BYTES_WRITE, buf->size());
@@ -8,6 +8,7 @@
8
8
  #include <cassert>
9
9
  #include <string>
10
10
 
11
+ #include "db/blob/blob_contents.h"
11
12
  #include "db/blob/blob_log_format.h"
12
13
  #include "file/file_prefetch_buffer.h"
13
14
  #include "file/filename.h"
@@ -283,14 +284,12 @@ BlobFileReader::BlobFileReader(
283
284
 
284
285
  BlobFileReader::~BlobFileReader() = default;
285
286
 
286
- Status BlobFileReader::GetBlob(const ReadOptions& read_options,
287
- const Slice& user_key, uint64_t offset,
288
- uint64_t value_size,
289
- CompressionType compression_type,
290
- FilePrefetchBuffer* prefetch_buffer,
291
- PinnableSlice* value,
292
- uint64_t* bytes_read) const {
293
- assert(value);
287
+ Status BlobFileReader::GetBlob(
288
+ const ReadOptions& read_options, const Slice& user_key, uint64_t offset,
289
+ uint64_t value_size, CompressionType compression_type,
290
+ FilePrefetchBuffer* prefetch_buffer, MemoryAllocator* allocator,
291
+ std::unique_ptr<BlobContents>* result, uint64_t* bytes_read) const {
292
+ assert(result);
294
293
 
295
294
  const uint64_t key_size = user_key.size();
296
295
 
@@ -361,8 +360,8 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
361
360
  const Slice value_slice(record_slice.data() + adjustment, value_size);
362
361
 
363
362
  {
364
- const Status s = UncompressBlobIfNeeded(value_slice, compression_type,
365
- clock_, statistics_, value);
363
+ const Status s = UncompressBlobIfNeeded(
364
+ value_slice, compression_type, allocator, clock_, statistics_, result);
366
365
  if (!s.ok()) {
367
366
  return s;
368
367
  }
@@ -375,16 +374,18 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
375
374
  return Status::OK();
376
375
  }
377
376
 
378
- void BlobFileReader::MultiGetBlob(const ReadOptions& read_options,
379
- autovector<BlobReadRequest*>& blob_reqs,
380
- uint64_t* bytes_read) const {
377
+ void BlobFileReader::MultiGetBlob(
378
+ const ReadOptions& read_options, MemoryAllocator* allocator,
379
+ autovector<std::pair<BlobReadRequest*, std::unique_ptr<BlobContents>>>&
380
+ blob_reqs,
381
+ uint64_t* bytes_read) const {
381
382
  const size_t num_blobs = blob_reqs.size();
382
383
  assert(num_blobs > 0);
383
384
  assert(num_blobs <= MultiGetContext::MAX_BATCH_SIZE);
384
385
 
385
386
  #ifndef NDEBUG
386
387
  for (size_t i = 0; i < num_blobs - 1; ++i) {
387
- assert(blob_reqs[i]->offset <= blob_reqs[i + 1]->offset);
388
+ assert(blob_reqs[i].first->offset <= blob_reqs[i + 1].first->offset);
388
389
  }
389
390
  #endif // !NDEBUG
390
391
 
@@ -393,16 +394,21 @@ void BlobFileReader::MultiGetBlob(const ReadOptions& read_options,
393
394
  uint64_t total_len = 0;
394
395
  read_reqs.reserve(num_blobs);
395
396
  for (size_t i = 0; i < num_blobs; ++i) {
396
- const size_t key_size = blob_reqs[i]->user_key->size();
397
- const uint64_t offset = blob_reqs[i]->offset;
398
- const uint64_t value_size = blob_reqs[i]->len;
397
+ BlobReadRequest* const req = blob_reqs[i].first;
398
+ assert(req);
399
+ assert(req->user_key);
400
+ assert(req->status);
401
+
402
+ const size_t key_size = req->user_key->size();
403
+ const uint64_t offset = req->offset;
404
+ const uint64_t value_size = req->len;
399
405
 
400
406
  if (!IsValidBlobOffset(offset, key_size, value_size, file_size_)) {
401
- *blob_reqs[i]->status = Status::Corruption("Invalid blob offset");
407
+ *req->status = Status::Corruption("Invalid blob offset");
402
408
  continue;
403
409
  }
404
- if (blob_reqs[i]->compression != compression_type_) {
405
- *blob_reqs[i]->status =
410
+ if (req->compression != compression_type_) {
411
+ *req->status =
406
412
  Status::Corruption("Compression type mismatch when reading a blob");
407
413
  continue;
408
414
  }
@@ -411,12 +417,12 @@ void BlobFileReader::MultiGetBlob(const ReadOptions& read_options,
411
417
  read_options.verify_checksums
412
418
  ? BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size)
413
419
  : 0;
414
- assert(blob_reqs[i]->offset >= adjustment);
420
+ assert(req->offset >= adjustment);
415
421
  adjustments.push_back(adjustment);
416
422
 
417
423
  FSReadRequest read_req = {};
418
- read_req.offset = blob_reqs[i]->offset - adjustment;
419
- read_req.len = blob_reqs[i]->len + adjustment;
424
+ read_req.offset = req->offset - adjustment;
425
+ read_req.len = req->len + adjustment;
420
426
  read_reqs.emplace_back(read_req);
421
427
  total_len += read_req.len;
422
428
  }
@@ -450,8 +456,11 @@ void BlobFileReader::MultiGetBlob(const ReadOptions& read_options,
450
456
  for (auto& req : read_reqs) {
451
457
  req.status.PermitUncheckedError();
452
458
  }
453
- for (auto& req : blob_reqs) {
459
+ for (auto& blob_req : blob_reqs) {
460
+ BlobReadRequest* const req = blob_req.first;
461
+ assert(req);
454
462
  assert(req->status);
463
+
455
464
  if (!req->status->IsCorruption()) {
456
465
  // Avoid overwriting corruption status.
457
466
  *req->status = s;
@@ -464,38 +473,42 @@ void BlobFileReader::MultiGetBlob(const ReadOptions& read_options,
464
473
 
465
474
  uint64_t total_bytes = 0;
466
475
  for (size_t i = 0, j = 0; i < num_blobs; ++i) {
467
- assert(blob_reqs[i]->status);
468
- if (!blob_reqs[i]->status->ok()) {
476
+ BlobReadRequest* const req = blob_reqs[i].first;
477
+ assert(req);
478
+ assert(req->user_key);
479
+ assert(req->status);
480
+
481
+ if (!req->status->ok()) {
469
482
  continue;
470
483
  }
471
484
 
472
485
  assert(j < read_reqs.size());
473
- auto& req = read_reqs[j++];
474
- const auto& record_slice = req.result;
475
- if (req.status.ok() && record_slice.size() != req.len) {
476
- req.status = IOStatus::Corruption("Failed to read data from blob file");
486
+ auto& read_req = read_reqs[j++];
487
+ const auto& record_slice = read_req.result;
488
+ if (read_req.status.ok() && record_slice.size() != read_req.len) {
489
+ read_req.status =
490
+ IOStatus::Corruption("Failed to read data from blob file");
477
491
  }
478
492
 
479
- *blob_reqs[i]->status = req.status;
480
- if (!blob_reqs[i]->status->ok()) {
493
+ *req->status = read_req.status;
494
+ if (!req->status->ok()) {
481
495
  continue;
482
496
  }
483
497
 
484
498
  // Verify checksums if enabled
485
499
  if (read_options.verify_checksums) {
486
- *blob_reqs[i]->status =
487
- VerifyBlob(record_slice, *blob_reqs[i]->user_key, blob_reqs[i]->len);
488
- if (!blob_reqs[i]->status->ok()) {
500
+ *req->status = VerifyBlob(record_slice, *req->user_key, req->len);
501
+ if (!req->status->ok()) {
489
502
  continue;
490
503
  }
491
504
  }
492
505
 
493
506
  // Uncompress blob if needed
494
- Slice value_slice(record_slice.data() + adjustments[i], blob_reqs[i]->len);
495
- *blob_reqs[i]->status =
496
- UncompressBlobIfNeeded(value_slice, compression_type_, clock_,
497
- statistics_, blob_reqs[i]->result);
498
- if (blob_reqs[i]->status->ok()) {
507
+ Slice value_slice(record_slice.data() + adjustments[i], req->len);
508
+ *req->status =
509
+ UncompressBlobIfNeeded(value_slice, compression_type_, allocator,
510
+ clock_, statistics_, &blob_reqs[i].second);
511
+ if (req->status->ok()) {
499
512
  total_bytes += record_slice.size();
500
513
  }
501
514
  }
@@ -549,15 +562,18 @@ Status BlobFileReader::VerifyBlob(const Slice& record_slice,
549
562
  return Status::OK();
550
563
  }
551
564
 
552
- Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
553
- CompressionType compression_type,
554
- SystemClock* clock,
555
- Statistics* statistics,
556
- PinnableSlice* value) {
557
- assert(value);
565
+ Status BlobFileReader::UncompressBlobIfNeeded(
566
+ const Slice& value_slice, CompressionType compression_type,
567
+ MemoryAllocator* allocator, SystemClock* clock, Statistics* statistics,
568
+ std::unique_ptr<BlobContents>* result) {
569
+ assert(result);
558
570
 
559
571
  if (compression_type == kNoCompression) {
560
- SaveValue(value_slice, value);
572
+ CacheAllocationPtr allocation =
573
+ AllocateBlock(value_slice.size(), allocator);
574
+ memcpy(allocation.get(), value_slice.data(), value_slice.size());
575
+
576
+ *result = BlobContents::Create(std::move(allocation), value_slice.size());
561
577
 
562
578
  return Status::OK();
563
579
  }
@@ -568,7 +584,6 @@ Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
568
584
 
569
585
  size_t uncompressed_size = 0;
570
586
  constexpr uint32_t compression_format_version = 2;
571
- constexpr MemoryAllocator* allocator = nullptr;
572
587
 
573
588
  CacheAllocationPtr output;
574
589
 
@@ -587,19 +602,9 @@ Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
587
602
  return Status::Corruption("Unable to uncompress blob");
588
603
  }
589
604
 
590
- SaveValue(Slice(output.get(), uncompressed_size), value);
605
+ *result = BlobContents::Create(std::move(output), uncompressed_size);
591
606
 
592
607
  return Status::OK();
593
608
  }
594
609
 
595
- void BlobFileReader::SaveValue(const Slice& src, PinnableSlice* dst) {
596
- assert(dst);
597
-
598
- if (dst->IsPinned()) {
599
- dst->Reset();
600
- }
601
-
602
- dst->PinSelf(src);
603
- }
604
-
605
610
  } // namespace ROCKSDB_NAMESPACE
@@ -23,7 +23,7 @@ class HistogramImpl;
23
23
  struct ReadOptions;
24
24
  class Slice;
25
25
  class FilePrefetchBuffer;
26
- class PinnableSlice;
26
+ class BlobContents;
27
27
  class Statistics;
28
28
 
29
29
  class BlobFileReader {
@@ -44,13 +44,17 @@ class BlobFileReader {
44
44
  Status GetBlob(const ReadOptions& read_options, const Slice& user_key,
45
45
  uint64_t offset, uint64_t value_size,
46
46
  CompressionType compression_type,
47
- FilePrefetchBuffer* prefetch_buffer, PinnableSlice* value,
47
+ FilePrefetchBuffer* prefetch_buffer,
48
+ MemoryAllocator* allocator,
49
+ std::unique_ptr<BlobContents>* result,
48
50
  uint64_t* bytes_read) const;
49
51
 
50
52
  // offsets must be sorted in ascending order by caller.
51
- void MultiGetBlob(const ReadOptions& read_options,
52
- autovector<BlobReadRequest*>& blob_reqs,
53
- uint64_t* bytes_read) const;
53
+ void MultiGetBlob(
54
+ const ReadOptions& read_options, MemoryAllocator* allocator,
55
+ autovector<std::pair<BlobReadRequest*, std::unique_ptr<BlobContents>>>&
56
+ blob_reqs,
57
+ uint64_t* bytes_read) const;
54
58
 
55
59
  CompressionType GetCompressionType() const { return compression_type_; }
56
60
 
@@ -89,11 +93,10 @@ class BlobFileReader {
89
93
 
90
94
  static Status UncompressBlobIfNeeded(const Slice& value_slice,
91
95
  CompressionType compression_type,
96
+ MemoryAllocator* allocator,
92
97
  SystemClock* clock,
93
98
  Statistics* statistics,
94
- PinnableSlice* value);
95
-
96
- static void SaveValue(const Slice& src, PinnableSlice* dst);
99
+ std::unique_ptr<BlobContents>* result);
97
100
 
98
101
  std::unique_ptr<RandomAccessFileReader> file_reader_;
99
102
  uint64_t file_size_;