@nxtedition/rocksdb 13.5.12 → 14.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/binding.cc +33 -2
  2. package/binding.gyp +2 -2
  3. package/chained-batch.js +9 -16
  4. package/deps/rocksdb/rocksdb/BUCK +18 -1
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
  6. package/deps/rocksdb/rocksdb/Makefile +20 -9
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
  15. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
  16. package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
  17. package/deps/rocksdb/rocksdb/db/c.cc +207 -0
  18. package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
  19. package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
  20. package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
  21. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
  39. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
  46. package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
  47. package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
  48. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
  49. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
  50. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
  51. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
  52. package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
  53. package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
  54. package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
  55. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
  56. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
  57. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  58. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
  60. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
  61. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
  62. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
  63. package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
  64. package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
  65. package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
  66. package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
  67. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
  68. package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
  69. package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
  70. package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
  71. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  72. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
  73. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
  74. package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
  75. package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
  76. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
  77. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
  78. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
  79. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
  80. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
  81. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
  89. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
  90. package/deps/rocksdb/rocksdb/env/env.cc +12 -0
  91. package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
  92. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
  93. package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
  94. package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
  95. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
  96. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
  97. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
  98. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
  99. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
  100. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
  101. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
  102. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
  103. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
  104. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
  106. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
  107. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
  108. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
  110. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
  112. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
  113. package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
  115. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
  116. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
  117. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
  118. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
  119. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
  120. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
  122. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
  123. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
  124. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
  125. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
  126. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  127. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  128. package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
  129. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
  130. package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
  131. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
  132. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
  133. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
  134. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
  135. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
  136. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
  137. package/deps/rocksdb/rocksdb/options/options.cc +2 -0
  138. package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
  139. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
  140. package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
  141. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
  142. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
  143. package/deps/rocksdb/rocksdb/src.mk +8 -2
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
  146. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
  147. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
  148. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
  149. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
  150. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
  151. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
  152. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
  153. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
  154. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
  155. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
  156. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
  157. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
  158. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
  159. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  160. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
  161. package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
  162. package/deps/rocksdb/rocksdb/table/format.cc +6 -12
  163. package/deps/rocksdb/rocksdb/table/format.h +10 -0
  164. package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
  165. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
  166. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
  167. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
  168. package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
  169. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
  170. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
  171. package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
  172. package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
  173. package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
  174. package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
  175. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
  176. package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
  177. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
  178. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
  179. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
  180. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
  181. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
  182. package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
  183. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
  184. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
  185. package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
  186. package/deps/rocksdb/rocksdb/util/coding.h +3 -3
  187. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
  188. package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
  189. package/deps/rocksdb/rocksdb/util/compression.h +5 -0
  190. package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
  191. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
  192. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
  193. package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
  194. package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
  195. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
  196. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
  197. package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
  198. package/deps/rocksdb/rocksdb/util/status.cc +1 -0
  199. package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
  200. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
  201. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
  202. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
  203. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
  204. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
  205. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
  206. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
  207. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
  208. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
  209. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
  210. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
  211. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
  212. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
  213. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
  214. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
  217. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
  218. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
  219. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
  220. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
  221. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
  222. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
  223. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
  224. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
  225. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
  226. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
  227. package/index.js +3 -3
  228. package/package.json +1 -1
  229. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  230. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  231. package/util.h +38 -12
  232. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
@@ -45,7 +45,9 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
45
45
  need_upper_bound_check_(need_upper_bound_check),
46
46
  async_read_in_progress_(false),
47
47
  is_last_level_(table->IsLastLevel()),
48
- block_iter_points_to_real_block_(false) {}
48
+ block_iter_points_to_real_block_(false) {
49
+ multi_scan_status_.PermitUncheckedError();
50
+ }
49
51
 
50
52
  ~BlockBasedTableIterator() override { ClearBlockHandles(); }
51
53
 
@@ -57,7 +59,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
57
59
  bool NextAndGetResult(IterateResult* result) override;
58
60
  void Prev() override;
59
61
  bool Valid() const override {
60
- return !is_out_of_bound_ &&
62
+ return !is_out_of_bound_ && multi_scan_status_.ok() &&
61
63
  (is_at_first_key_from_index_ ||
62
64
  (block_iter_points_to_real_block_ && block_iter_.Valid()));
63
65
  }
@@ -136,6 +138,9 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
136
138
  return block_iter_.value();
137
139
  }
138
140
  Status status() const override {
141
+ if (!multi_scan_status_.ok()) {
142
+ return multi_scan_status_;
143
+ }
139
144
  // In case of block cache readahead lookup, it won't add the block to
140
145
  // block_handles if it's index is invalid. So index_iter_->status check can
141
146
  // be skipped.
@@ -145,10 +150,13 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
145
150
  assert(!multi_scan_);
146
151
  return index_iter_->status();
147
152
  } else if (block_iter_points_to_real_block_) {
153
+ // This is the common case.
148
154
  return block_iter_.status();
149
155
  } else if (async_read_in_progress_) {
150
156
  assert(!multi_scan_);
151
157
  return Status::TryAgain("Async read in progress");
158
+ } else if (multi_scan_) {
159
+ return multi_scan_status_;
152
160
  } else {
153
161
  return Status::OK();
154
162
  }
@@ -160,6 +168,8 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
160
168
  } else if (block_upper_bound_check_ ==
161
169
  BlockUpperBound::kUpperBoundBeyondCurBlock) {
162
170
  assert(!is_out_of_bound_);
171
+ // MultiScan does not do block level upper bound check yet.
172
+ assert(!multi_scan_);
163
173
  return IterBoundCheck::kInbound;
164
174
  } else {
165
175
  return IterBoundCheck::kUnknown;
@@ -225,7 +235,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
225
235
  }
226
236
  }
227
237
 
228
- void Prepare(const std::vector<ScanOptions>* scan_opts) override;
238
+ void Prepare(const MultiScanArgs* scan_opts) override;
229
239
 
230
240
  FilePrefetchBuffer* prefetch_buffer() {
231
241
  return block_prefetcher_.prefetch_buffer();
@@ -233,6 +243,16 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
233
243
 
234
244
  std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter_;
235
245
 
246
+ bool TEST_IsBlockPinnedByMultiScan(size_t block_idx) {
247
+ if (!multi_scan_) {
248
+ return false;
249
+ }
250
+ if (block_idx >= multi_scan_->pinned_data_blocks.size()) {
251
+ return false;
252
+ }
253
+ return !multi_scan_->pinned_data_blocks[block_idx].IsEmpty();
254
+ }
255
+
236
256
  private:
237
257
  enum class IterDirection {
238
258
  kForward,
@@ -371,29 +391,107 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
371
391
  // *** END States used by both regular scan and multiscan
372
392
 
373
393
  // *** BEGIN MultiScan related states ***
394
+ struct AsyncReadState {
395
+ std::unique_ptr<char[]> buf{nullptr};
396
+ // Indices into pinned_data_blocks that this request reads.
397
+ std::vector<size_t> block_indices;
398
+ // BlockHandle for each block in block_indices.
399
+ std::vector<BlockHandle> blocks;
400
+ void* io_handle{nullptr};
401
+ IOHandleDeleter del_fn{nullptr};
402
+ // offset for this async read request.
403
+ uint64_t offset{0};
404
+
405
+ // These two states are populated from the FSReadRequest
406
+ // by ReadAsync callback
407
+ Status status;
408
+ Slice result;
409
+
410
+ // For direct I/O support
411
+ AlignedBuf aligned_buf{nullptr};
412
+
413
+ bool finished{false};
414
+
415
+ AsyncReadState() = default;
416
+ DECLARE_DEFAULT_MOVES(AsyncReadState);
417
+ // Delete copy operations
418
+ AsyncReadState(const AsyncReadState&) = delete;
419
+ AsyncReadState& operator=(const AsyncReadState&) = delete;
420
+
421
+ void CleanUpIOHandle() {
422
+ if (io_handle != nullptr) {
423
+ assert(del_fn);
424
+ del_fn(io_handle);
425
+ io_handle = nullptr;
426
+ }
427
+ finished = true;
428
+ }
429
+
430
+ ~AsyncReadState() {
431
+ // Should be cleaned up before destruction.
432
+ assert(io_handle == nullptr);
433
+ }
434
+ };
435
+
374
436
  struct MultiScanState {
375
- // bool prepared_ = false;
376
- const std::vector<ScanOptions>* scan_opts;
437
+ // For Aborting async I/Os in destructor.
438
+ const std::shared_ptr<FileSystem> fs;
439
+ const MultiScanArgs* scan_opts;
377
440
  std::vector<CachableEntry<Block>> pinned_data_blocks;
378
-
379
- // Indicies into multiscan_pinned_data_blocks_ for data blocks that are
380
- // relevant for each scan range.
441
+ // The separator of each data block in above pinned_data_blocks vector.
442
+ // Its size is same as pinned_data_blocks.
443
+ // The value of separator is larger than or equal to the last key in the
444
+ // corresponding data block.
445
+ std::vector<std::string> data_block_separators;
446
+ // Track previously seeked key in multi-scan.
447
+ // This is used to ensure that the seek key is keep moving forward, as
448
+ // blocks that are smaller than the seek key are unpinned from memory.
449
+ std::string prev_seek_key_;
450
+
451
+ // Indicies into pinned_data_blocks for data blocks for each scan range.
381
452
  // inclusive start, exclusive end
382
- std::vector<std::tuple<size_t, size_t>> block_ranges_per_scan;
453
+ std::vector<std::tuple<size_t, size_t>> block_index_ranges_per_scan;
383
454
  size_t next_scan_idx;
384
455
  size_t cur_data_block_idx;
385
456
 
457
+ // States for async reads.
458
+ //
459
+ // Each async state correspond to an async read request.
460
+ // Each async read request may read content for multiple blocks
461
+ // (potentially coalesced). In PollForBlock(idx), we will poll for the
462
+ // completion of the async read request responsible for
463
+ // pinned_data_blocks[idx], and populate `pinned_data_blocks` with all the
464
+ // blocks read. To find out the async read request responsible for
465
+ // pinned_data_blocks[idx], we store the mapping in
466
+ // block_idx_to_readreq_idx. Index i is in block_idx_to_readreq_idx and
467
+ // block_idx_to_readreq_idx[i] = j iff pinned_data_blocks[i] is read by
468
+ // async_states[j].
469
+ std::vector<AsyncReadState> async_states;
470
+ UnorderedMap<size_t, size_t> block_idx_to_readreq_idx;
471
+ size_t prefetch_max_idx;
472
+
386
473
  MultiScanState(
387
- const std::vector<ScanOptions>* _scan_opts,
474
+ const std::shared_ptr<FileSystem>& _fs, const MultiScanArgs* _scan_opts,
388
475
  std::vector<CachableEntry<Block>>&& _pinned_data_blocks,
389
- std::vector<std::tuple<size_t, size_t>>&& _block_ranges_per_scan)
390
- : scan_opts(_scan_opts),
476
+ std::vector<std::string>&& _data_block_separators,
477
+ std::vector<std::tuple<size_t, size_t>>&& _block_index_ranges_per_scan,
478
+ UnorderedMap<size_t, size_t>&& _block_idx_to_readreq_idx,
479
+ std::vector<AsyncReadState>&& _async_states, size_t _prefetch_max_idx)
480
+ : fs(_fs),
481
+ scan_opts(_scan_opts),
391
482
  pinned_data_blocks(std::move(_pinned_data_blocks)),
392
- block_ranges_per_scan(std::move(_block_ranges_per_scan)),
483
+ data_block_separators(std::move(_data_block_separators)),
484
+ block_index_ranges_per_scan(std::move(_block_index_ranges_per_scan)),
393
485
  next_scan_idx(0),
394
- cur_data_block_idx(0) {}
486
+ cur_data_block_idx(0),
487
+ async_states(std::move(_async_states)),
488
+ block_idx_to_readreq_idx(std::move(_block_idx_to_readreq_idx)),
489
+ prefetch_max_idx(_prefetch_max_idx) {}
490
+
491
+ ~MultiScanState();
395
492
  };
396
493
 
494
+ Status multi_scan_status_;
397
495
  std::unique_ptr<MultiScanState> multi_scan_;
398
496
  // *** END MultiScan related APIs and states ***
399
497
 
@@ -513,10 +611,121 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
513
611
  // *** END APIs relevant to auto tuning of readahead_size ***
514
612
 
515
613
  // *** BEGIN APIs relevant to multiscan ***
516
- // Returns true iff seek is successful.
517
- bool SeekMultiScan(const Slice* target);
614
+
615
+ // Wrapper for SeekMultiScanImpl for handling out of bound
616
+ void SeekMultiScan(const Slice* target);
617
+
618
+ // Return true if the result is out of bound
619
+ bool SeekMultiScanImpl(const Slice* seek_target);
518
620
 
519
621
  void FindBlockForwardInMultiScan();
622
+
623
+ // Unpins blocks from the immediately previous scan range.
624
+ void UnpinPreviousScanBlocks(size_t current_scan_idx);
625
+
626
+ void PrepareReadAsyncCallBack(FSReadRequest& req, void* cb_arg) {
627
+ // Record status, result and sanity check offset from `req`.
628
+ AsyncReadState* async_state = static_cast<AsyncReadState*>(cb_arg);
629
+
630
+ async_state->status = req.status;
631
+ async_state->result = req.result;
632
+
633
+ if (async_state->status.ok()) {
634
+ assert(async_state->offset == req.offset);
635
+ if (async_state->offset != req.offset) {
636
+ async_state->status = Status::InvalidArgument(
637
+ "offset mismatch between async read request " +
638
+ std::to_string(async_state->offset) + " and async callback " +
639
+ std::to_string(req.offset));
640
+ }
641
+ } else {
642
+ assert(async_state->status.IsAborted());
643
+ }
644
+ }
645
+
646
+ void MultiScanSeekTargetFromBlock(const Slice* seek_target, size_t block_idx);
647
+ void MultiScanUnexpectedSeekTarget(const Slice* seek_target,
648
+ size_t block_idx);
649
+
650
+ // Return true, if there is an error, or end of file
651
+ bool MultiScanLoadDataBlock(size_t idx) {
652
+ if (idx >= multi_scan_->prefetch_max_idx) {
653
+ // TODO: Fix the max_prefetch_size support for multiple files.
654
+ // The goal is to limit the memory usage, prefetch could be done
655
+ // incrementally.
656
+ if (multi_scan_->scan_opts->max_prefetch_size == 0) {
657
+ // If max_prefetch_size is not set, treat this as end of file.
658
+ ResetDataIter();
659
+ assert(!is_out_of_bound_);
660
+ assert(!Valid());
661
+ } else {
662
+ // If max_prefetch_size is set, treat this as error.
663
+ multi_scan_status_ = Status::PrefetchLimitReached();
664
+ }
665
+ return true;
666
+ }
667
+
668
+ if (!multi_scan_->async_states.empty()) {
669
+ multi_scan_status_ = PollForBlock(idx);
670
+ if (!multi_scan_status_.ok()) {
671
+ return true;
672
+ }
673
+ }
674
+ // This block should have been initialized
675
+ assert(multi_scan_->pinned_data_blocks[idx].GetValue());
676
+ // Note that the block_iter_ takes ownership of the pinned data block
677
+ // TODO: we can delegate the clean up like with pinned_iters_mgr_ if
678
+ // need to pin blocks longer.
679
+ table_->NewDataBlockIterator<DataBlockIter>(
680
+ read_options_, multi_scan_->pinned_data_blocks[idx], &block_iter_,
681
+ Status::OK());
682
+ return false;
683
+ }
684
+
685
+ // After PollForBlock(idx), the async request that contains
686
+ // pinned_data_blocks[idx] should be done, and all blocks contained in this
687
+ // read request will be initialzed in pinned_data_blocks and pinned in block
688
+ // cache.
689
+ Status PollForBlock(size_t idx);
690
+
691
+ // Helper function to create and pin a block in cache from buffer data
692
+ // Handles decompressor setup with dictionary loading and block
693
+ // creation/pinning. The buffer_start_offset is the file offset where
694
+ // buffer_data starts.
695
+ Status CreateAndPinBlockFromBuffer(const BlockHandle& block,
696
+ uint64_t buffer_start_offset,
697
+ const Slice& buffer_data,
698
+ CachableEntry<Block>& pinned_block_entry);
699
+
700
+ Status CollectBlockHandles(
701
+ const std::vector<ScanOptions>& scan_opts,
702
+ std::vector<BlockHandle>* scan_block_handles,
703
+ std::vector<std::tuple<size_t, size_t>>* block_index_ranges_per_scan,
704
+ std::vector<std::string>* data_block_boundary_keys);
705
+
706
+ Status FilterAndPinCachedBlocks(
707
+ const std::vector<BlockHandle>& scan_block_handles,
708
+ const MultiScanArgs* multiscan_opts,
709
+ std::vector<size_t>* block_indices_to_read,
710
+ std::vector<CachableEntry<Block>>* pinned_data_blocks_guard,
711
+ size_t* prefetched_max_idx);
712
+
713
+ void PrepareIORequests(
714
+ const std::vector<size_t>& block_indices_to_read,
715
+ const std::vector<BlockHandle>& scan_block_handles,
716
+ const MultiScanArgs* multiscan_opts,
717
+ std::vector<FSReadRequest>* read_reqs,
718
+ UnorderedMap<size_t, size_t>* block_idx_to_readreq_idx,
719
+ std::vector<std::vector<size_t>>* coalesced_block_indices);
720
+
721
+ Status ExecuteIO(
722
+ const std::vector<BlockHandle>& scan_block_handles,
723
+ const MultiScanArgs* multiscan_opts,
724
+ const std::vector<std::vector<size_t>>& coalesced_block_indices,
725
+ std::vector<FSReadRequest>* read_reqs,
726
+ std::vector<AsyncReadState>* async_states,
727
+ std::vector<CachableEntry<Block>>* pinned_data_blocks_guard);
728
+
520
729
  // *** END APIs relevant to multiscan ***
521
730
  };
522
731
  } // namespace ROCKSDB_NAMESPACE
@@ -109,8 +109,8 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) {
109
109
  CachableEntry<T>* out_parsed_block) const; \
110
110
  template Status BlockBasedTable::CreateAndPinBlockInCache<T>( \
111
111
  const ReadOptions& ro, const BlockHandle& handle, \
112
- BlockContents* block_contents, CachableEntry<T>* out_parsed_block) \
113
- const;
112
+ UnownedPtr<Decompressor> decomp, BlockContents* block_contents, \
113
+ CachableEntry<T>* out_parsed_block) const;
114
114
 
115
115
  INSTANTIATE_BLOCKLIKE_TEMPLATES(ParsedFullFilterBlock);
116
116
  INSTANTIATE_BLOCKLIKE_TEMPLATES(DecompressorDict);
@@ -1333,25 +1333,59 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
1333
1333
  s = FindMetaBlock(meta_iter, kUserDefinedIndexPrefix + udi_name,
1334
1334
  &udi_block_handle);
1335
1335
  if (!s.ok()) {
1336
- return s;
1337
- }
1338
- // Read the block, and allocate on heap or pin in cache. The UDI block is
1339
- // not compressed. RetrieveBlock will verify the checksum.
1340
- s = RetrieveBlock(prefetch_buffer, ro, udi_block_handle,
1341
- rep_->decompressor.get(), &rep_->udi_block,
1342
- /*get_context=*/nullptr, lookup_context,
1343
- /*for_compaction=*/false, use_cache, /*async_read=*/false,
1344
- /*use_block_cache_for_lookup=*/false);
1345
- if (!s.ok()) {
1346
- return s;
1336
+ RecordTick(rep_->ioptions.statistics.get(),
1337
+ SST_USER_DEFINED_INDEX_LOAD_FAIL_COUNT);
1338
+ if (table_options.fail_if_no_udi_on_open) {
1339
+ ROCKS_LOG_ERROR(rep_->ioptions.logger,
1340
+ "Failed to find the the UDI block %s in file %s; %s",
1341
+ udi_name.c_str(), rep_->file->file_name().c_str(),
1342
+ s.ToString().c_str());
1343
+ // MAke the status more informative
1344
+ s = Status::Corruption(s.ToString(), rep_->file->file_name());
1345
+ return s;
1346
+ } else {
1347
+ // Emit a warning, but ignore the error status
1348
+ ROCKS_LOG_WARN(rep_->ioptions.logger,
1349
+ "Failed to find the the UDI block %s in file %s; %s",
1350
+ udi_name.c_str(), rep_->file->file_name().c_str(),
1351
+ s.ToString().c_str());
1352
+ s = Status::OK();
1353
+ }
1347
1354
  }
1348
- assert(!rep_->udi_block.IsEmpty());
1349
1355
 
1350
- std::unique_ptr<UserDefinedIndexReader> udi_reader =
1351
- table_options.user_defined_index_factory->NewReader(
1352
- rep_->udi_block.GetValue()->data);
1353
- index_reader = std::make_unique<UserDefinedIndexReaderWrapper>(
1354
- udi_name, std::move(index_reader), std::move(udi_reader));
1356
+ // If the UDI block size is 0, that means there's effectively no user
1357
+ // defined index. In that case, skip setting up the reader.
1358
+ if (udi_block_handle.size() > 0) {
1359
+ // Read the block, and allocate on heap or pin in cache. The UDI block is
1360
+ // not compressed. RetrieveBlock will verify the checksum.
1361
+ if (s.ok()) {
1362
+ s = RetrieveBlock(prefetch_buffer, ro, udi_block_handle,
1363
+ rep_->decompressor.get(), &rep_->udi_block,
1364
+ /*get_context=*/nullptr, lookup_context,
1365
+ /*for_compaction=*/false, use_cache,
1366
+ /*async_read=*/false,
1367
+ /*use_block_cache_for_lookup=*/false);
1368
+ }
1369
+ if (s.ok()) {
1370
+ assert(!rep_->udi_block.IsEmpty());
1371
+
1372
+ std::unique_ptr<UserDefinedIndexReader> udi_reader;
1373
+ UserDefinedIndexOption udi_option;
1374
+ udi_option.comparator = rep_->internal_comparator.user_comparator();
1375
+ s = table_options.user_defined_index_factory->NewReader(
1376
+ udi_option, rep_->udi_block.GetValue()->data, udi_reader);
1377
+ if (s.ok()) {
1378
+ if (udi_reader) {
1379
+ index_reader = std::make_unique<UserDefinedIndexReaderWrapper>(
1380
+ udi_name, std::move(index_reader), std::move(udi_reader));
1381
+ } else {
1382
+ s = Status::Corruption("Failed to create UDI reader for " +
1383
+ udi_name + " in file " +
1384
+ rep_->file->file_name());
1385
+ }
1386
+ }
1387
+ }
1388
+ }
1355
1389
  }
1356
1390
 
1357
1391
  rep_->index_reader = std::move(index_reader);
@@ -1359,7 +1393,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
1359
1393
  // The partitions of partitioned index are always stored in cache. They
1360
1394
  // are hence follow the configuration for pin and prefetch regardless of
1361
1395
  // the value of cache_index_and_filter_blocks
1362
- if (prefetch_all || pin_partition) {
1396
+ if (s.ok() && (prefetch_all || pin_partition)) {
1363
1397
  s = rep_->index_reader->CacheDependencies(ro, pin_partition,
1364
1398
  prefetch_buffer);
1365
1399
  }
@@ -1741,13 +1775,55 @@ Status BlockBasedTable::LookupAndPinBlocksInCache(
1741
1775
 
1742
1776
  template <typename TBlocklike>
1743
1777
  Status BlockBasedTable::CreateAndPinBlockInCache(
1744
- const ReadOptions& ro, const BlockHandle& handle, BlockContents* contents,
1778
+ const ReadOptions& ro, const BlockHandle& handle,
1779
+ UnownedPtr<Decompressor> decomp, BlockContents* contents,
1745
1780
  CachableEntry<TBlocklike>* out_parsed_block) const {
1746
- return MaybeReadBlockAndLoadToCache(
1747
- nullptr, ro, handle, rep_->decompressor.get(),
1748
- /*for_compaction=*/false, out_parsed_block, nullptr, nullptr, contents,
1749
- /*async_read=*/false,
1750
- /*use_block_cache_for_lookup=*/true);
1781
+ CompressionType compression_type = GetBlockCompressionType(*contents);
1782
+ // If we don't own the contents and we don't need to decompress, copy
1783
+ // the block to heap in order to have ownership. If decompression is
1784
+ // needed, then the decompressor will allocate a buffer.
1785
+ if (!contents->own_bytes() && compression_type == kNoCompression) {
1786
+ Slice src = Slice(contents->data.data(), BlockSizeWithTrailer(handle));
1787
+ *contents = BlockContents(
1788
+ CopyBufferToHeap(GetMemoryAllocator(rep_->table_options), src),
1789
+ handle.size());
1790
+ #ifndef NDEBUG
1791
+ contents->has_trailer = true;
1792
+ #endif
1793
+ }
1794
+
1795
+ Status s;
1796
+ if (ro.fill_cache) {
1797
+ s = MaybeReadBlockAndLoadToCache(nullptr, ro, handle, decomp,
1798
+ /*for_compaction=*/false, out_parsed_block,
1799
+ nullptr, nullptr, contents,
1800
+ /*async_read=*/false,
1801
+ /*use_block_cache_for_lookup=*/true);
1802
+ }
1803
+
1804
+ if (!s.ok()) {
1805
+ return s;
1806
+ }
1807
+
1808
+ // fill_cache could be false, or no block cache is configured. In that
1809
+ // case, decompress if necessary and take ownership of the block
1810
+ if (out_parsed_block->GetValue() == nullptr && contents != nullptr) {
1811
+ BlockContents tmp_contents;
1812
+ if (compression_type != kNoCompression) {
1813
+ s = DecompressSerializedBlock(contents->data.data(), handle.size(),
1814
+ compression_type, *decomp, &tmp_contents,
1815
+ rep_->ioptions,
1816
+ GetMemoryAllocator(rep_->table_options));
1817
+ } else {
1818
+ tmp_contents = std::move(*contents);
1819
+ }
1820
+ if (s.ok()) {
1821
+ std::unique_ptr<TBlocklike> block_holder;
1822
+ rep_->create_context.Create(&block_holder, std::move(tmp_contents));
1823
+ out_parsed_block->SetOwnedValue(std::move(block_holder));
1824
+ }
1825
+ }
1826
+ return s;
1751
1827
  }
1752
1828
 
1753
1829
  // If contents is nullptr, this function looks up the block caches for the
@@ -311,7 +311,7 @@ class BlockBasedTable : public TableReader {
311
311
  template <typename TBlocklike>
312
312
  Status CreateAndPinBlockInCache(
313
313
  const ReadOptions& ro, const BlockHandle& handle,
314
- BlockContents* block_contents,
314
+ UnownedPtr<Decompressor> decomp, BlockContents* block_contents,
315
315
  CachableEntry<TBlocklike>* out_parsed_block) const;
316
316
 
317
317
  struct Rep;
@@ -37,8 +37,6 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
37
37
  RandomAccessFileReader* file = rep_->file.get();
38
38
  const Footer& footer = rep_->footer;
39
39
  const ImmutableOptions& ioptions = rep_->ioptions;
40
- size_t read_amp_bytes_per_bit = rep_->table_options.read_amp_bytes_per_bit;
41
- MemoryAllocator* memory_allocator = GetMemoryAllocator(rep_->table_options);
42
40
 
43
41
  if (ioptions.allow_mmap_reads) {
44
42
  size_t idx_in_batch = 0;
@@ -266,79 +264,8 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
266
264
  }
267
265
 
268
266
  if (s.ok()) {
269
- // When the blocks share the same underlying buffer (scratch or direct io
270
- // buffer), we may need to manually copy the block into heap if the
271
- // serialized block has to be inserted into a cache. That falls into the
272
- // following cases -
273
- // 1. serialized block is not compressed, it needs to be inserted into
274
- // the uncompressed block cache if there is one
275
- // 2. If the serialized block is compressed, it needs to be inserted
276
- // into the compressed block cache if there is one
277
- //
278
- // In all other cases, the serialized block is either uncompressed into a
279
- // heap buffer or there is no cache at all.
280
- CompressionType compression_type =
281
- GetBlockCompressionType(serialized_block);
282
- if ((use_fs_scratch || use_shared_buffer) &&
283
- compression_type == kNoCompression) {
284
- Slice serialized =
285
- Slice(req.result.data() + req_offset, BlockSizeWithTrailer(handle));
286
- serialized_block = BlockContents(
287
- CopyBufferToHeap(GetMemoryAllocator(rep_->table_options),
288
- serialized),
289
- handle.size());
290
- #ifndef NDEBUG
291
- serialized_block.has_trailer = true;
292
- #endif
293
- }
294
- }
295
-
296
- if (s.ok()) {
297
- if (options.fill_cache) {
298
- CachableEntry<Block_kData>* block_entry = &results[idx_in_batch];
299
- // MaybeReadBlockAndLoadToCache will insert into the block caches if
300
- // necessary. Since we're passing the serialized block contents, it
301
- // will avoid looking up the block cache
302
- s = MaybeReadBlockAndLoadToCache(
303
- nullptr, options, handle, decomp,
304
- /*for_compaction=*/false, block_entry, mget_iter->get_context,
305
- /*lookup_context=*/nullptr, &serialized_block,
306
- /*async_read=*/false, /*use_block_cache_for_lookup=*/true);
307
-
308
- if (!s.ok()) {
309
- statuses[idx_in_batch] = s;
310
- continue;
311
- }
312
- // block_entry value could be null if no block cache is present, i.e
313
- // BlockBasedTableOptions::no_block_cache is true and no compressed
314
- // block cache is configured. In that case, fall
315
- // through and set up the block explicitly
316
- if (block_entry->GetValue() != nullptr) {
317
- continue;
318
- }
319
- }
320
-
321
- CompressionType compression_type =
322
- GetBlockCompressionType(serialized_block);
323
- BlockContents contents;
324
- if (compression_type != kNoCompression) {
325
- s = DecompressSerializedBlock(
326
- req.result.data() + req_offset, handle.size(), compression_type,
327
- *decomp, &contents, rep_->ioptions, memory_allocator);
328
- } else {
329
- // There are two cases here:
330
- // 1) caller uses the shared buffer (scratch or direct io buffer);
331
- // 2) we use the requst buffer.
332
- // If scratch buffer or direct io buffer is used, we ensure that
333
- // all serialized blocks are copyed to the heap as single blocks. If
334
- // scratch buffer is not used, we also have no combined read, so the
335
- // serialized block can be used directly.
336
- contents = std::move(serialized_block);
337
- }
338
- if (s.ok()) {
339
- results[idx_in_batch].SetOwnedValue(std::make_unique<Block_kData>(
340
- std::move(contents), read_amp_bytes_per_bit, ioptions.stats));
341
- }
267
+ s = CreateAndPinBlockInCache(options, handle, decomp, &serialized_block,
268
+ &results[idx_in_batch]);
342
269
  }
343
270
  statuses[idx_in_batch] = s;
344
271
  }