@nxtedition/rocksdb 13.1.5 → 13.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/binding.cc +62 -15
  2. package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
  4. package/deps/rocksdb/rocksdb/Makefile +2 -2
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
  6. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
  7. package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
  8. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
  9. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
  11. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  12. package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
  13. package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
  14. package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
  15. package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
  16. package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
  39. package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
  41. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
  42. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
  52. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
  53. package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
  54. package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
  55. package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
  56. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
  57. package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
  58. package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
  59. package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
  60. package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
  61. package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
  62. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  63. package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
  64. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
  65. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
  66. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
  67. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
  68. package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
  69. package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
  70. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
  71. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
  72. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
  74. package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
  75. package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
  76. package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
  77. package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
  78. package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
  79. package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
  80. package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
  81. package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
  82. package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
  83. package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
  84. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
  85. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
  86. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
  87. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
  88. package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
  89. package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
  90. package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
  91. package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
  92. package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
  93. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
  94. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
  95. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
  96. package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
  97. package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
  98. package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
  99. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
  100. package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
  101. package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
  102. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
  103. package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
  104. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
  105. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
  106. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
  107. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
  108. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
  109. package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
  110. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
  111. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
  113. package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
  114. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
  115. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
  116. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  117. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
  118. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
  119. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
  120. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
  121. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
  122. package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
  123. package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
  124. package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
  125. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  126. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
  127. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
  128. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
  129. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
  130. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  131. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
  132. package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
  133. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
  134. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
  135. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
  136. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
  137. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
  138. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
  139. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
  140. package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
  142. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
  144. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  146. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
  147. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
  148. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
  150. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
  151. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
  152. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  153. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
  154. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
  155. package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
  156. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
  157. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
  158. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
  159. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
  160. package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
  161. package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
  162. package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
  163. package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
  164. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
  165. package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
  166. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
  167. package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
  168. package/deps/rocksdb/rocksdb/port/port.h +5 -9
  169. package/deps/rocksdb/rocksdb/src.mk +8 -0
  170. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
  171. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
  172. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
  174. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
  175. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
  177. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
  178. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
  181. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
  182. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  183. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
  184. package/deps/rocksdb/rocksdb/table/format.cc +3 -3
  185. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
  186. package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
  187. package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
  188. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
  189. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  190. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
  191. package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
  192. package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
  193. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
  194. package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
  196. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
  197. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
  198. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
  199. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
  200. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
  201. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
  202. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
  203. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
  204. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
  205. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
  206. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
  207. package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
  208. package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
  209. package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
  210. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
  211. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
  212. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
  213. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
  214. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
  215. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
  216. package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
  217. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
  218. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
  219. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
  220. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
  221. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
  222. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
  223. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
  224. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
  225. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
  226. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
  227. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
  228. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
  229. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
  230. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
  231. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
  232. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
  233. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
  234. package/deps/rocksdb/rocksdb.gyp +2 -0
  235. package/package.json +1 -1
  236. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  237. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -27,12 +27,14 @@ class CorruptionFS : public FileSystemWrapper {
27
27
  num_writable_file_errors_(0),
28
28
  corruption_trigger_(INT_MAX),
29
29
  read_count_(0),
30
+ corrupt_offset_(0),
31
+ corrupt_len_(0),
30
32
  rnd_(300),
31
33
  fs_buffer_(fs_buffer),
32
34
  verify_read_(verify_read) {}
33
35
  ~CorruptionFS() override {
34
36
  // Assert that the corruption was reset, which means it got triggered
35
- assert(corruption_trigger_ == INT_MAX);
37
+ assert(corruption_trigger_ == INT_MAX || corrupt_len_ > 0);
36
38
  }
37
39
  const char* Name() const override { return "ErrorEnv"; }
38
40
 
@@ -48,8 +50,10 @@ class CorruptionFS : public FileSystemWrapper {
48
50
  }
49
51
 
50
52
  void SetCorruptionTrigger(const int trigger) {
53
+ MutexLock l(&mutex_);
51
54
  corruption_trigger_ = trigger;
52
55
  read_count_ = 0;
56
+ corrupt_fname_.clear();
53
57
  }
54
58
 
55
59
  IOStatus NewRandomAccessFile(const std::string& fname,
@@ -58,25 +62,31 @@ class CorruptionFS : public FileSystemWrapper {
58
62
  IODebugContext* dbg) override {
59
63
  class CorruptionRandomAccessFile : public FSRandomAccessFileOwnerWrapper {
60
64
  public:
61
- CorruptionRandomAccessFile(CorruptionFS& fs,
65
+ CorruptionRandomAccessFile(CorruptionFS& fs, const std::string& fname,
62
66
  std::unique_ptr<FSRandomAccessFile>& file)
63
- : FSRandomAccessFileOwnerWrapper(std::move(file)), fs_(fs) {}
67
+ : FSRandomAccessFileOwnerWrapper(std::move(file)),
68
+ fs_(fs),
69
+ fname_(fname) {}
64
70
 
65
71
  IOStatus Read(uint64_t offset, size_t len, const IOOptions& opts,
66
72
  Slice* result, char* scratch,
67
73
  IODebugContext* dbg) const override {
68
74
  IOStatus s = target()->Read(offset, len, opts, result, scratch, dbg);
69
75
  if (opts.verify_and_reconstruct_read) {
76
+ fs_.MaybeResetOverlapWithCorruptedChunk(fname_, offset,
77
+ result->size());
70
78
  return s;
71
79
  }
80
+
81
+ MutexLock l(&fs_.mutex_);
72
82
  if (s.ok() && ++fs_.read_count_ >= fs_.corruption_trigger_) {
73
- fs_.read_count_ = 0;
74
83
  fs_.corruption_trigger_ = INT_MAX;
75
84
  char* data = const_cast<char*>(result->data());
76
85
  std::memcpy(
77
86
  data,
78
87
  fs_.rnd_.RandomString(static_cast<int>(result->size())).c_str(),
79
88
  result->size());
89
+ fs_.SetCorruptedChunk(fname_, offset, result->size());
80
90
  }
81
91
  return s;
82
92
  }
@@ -101,14 +111,76 @@ class CorruptionFS : public FileSystemWrapper {
101
111
  return IOStatus::OK();
102
112
  }
103
113
 
114
+ IOStatus Prefetch(uint64_t /*offset*/, size_t /*n*/,
115
+ const IOOptions& /*options*/,
116
+ IODebugContext* /*dbg*/) override {
117
+ return IOStatus::NotSupported("Prefetch");
118
+ }
119
+
104
120
  private:
105
121
  CorruptionFS& fs_;
122
+ std::string fname_;
106
123
  };
107
124
 
108
125
  std::unique_ptr<FSRandomAccessFile> file;
109
126
  IOStatus s = target()->NewRandomAccessFile(fname, opts, &file, dbg);
110
127
  EXPECT_OK(s);
111
- result->reset(new CorruptionRandomAccessFile(*this, file));
128
+ result->reset(new CorruptionRandomAccessFile(*this, fname, file));
129
+
130
+ return s;
131
+ }
132
+
133
+ IOStatus NewSequentialFile(const std::string& fname,
134
+ const FileOptions& file_opts,
135
+ std::unique_ptr<FSSequentialFile>* result,
136
+ IODebugContext* dbg) override {
137
+ class CorruptionSequentialFile : public FSSequentialFileOwnerWrapper {
138
+ public:
139
+ CorruptionSequentialFile(CorruptionFS& fs, const std::string& fname,
140
+ std::unique_ptr<FSSequentialFile>& file)
141
+ : FSSequentialFileOwnerWrapper(std::move(file)),
142
+ fs_(fs),
143
+ fname_(fname),
144
+ offset_(0) {}
145
+
146
+ IOStatus Read(size_t len, const IOOptions& opts, Slice* result,
147
+ char* scratch, IODebugContext* dbg) override {
148
+ IOStatus s = target()->Read(len, opts, result, scratch, dbg);
149
+ if (result->size() == 0 ||
150
+ fname_.find("IDENTITY") != std::string::npos) {
151
+ return s;
152
+ }
153
+
154
+ if (opts.verify_and_reconstruct_read) {
155
+ fs_.MaybeResetOverlapWithCorruptedChunk(fname_, offset_,
156
+ result->size());
157
+ return s;
158
+ }
159
+
160
+ MutexLock l(&fs_.mutex_);
161
+ if (s.ok() && ++fs_.read_count_ >= fs_.corruption_trigger_) {
162
+ fs_.corruption_trigger_ = INT_MAX;
163
+ char* data = const_cast<char*>(result->data());
164
+ std::memcpy(
165
+ data,
166
+ fs_.rnd_.RandomString(static_cast<int>(result->size())).c_str(),
167
+ result->size());
168
+ fs_.SetCorruptedChunk(fname_, offset_, result->size());
169
+ }
170
+ offset_ += result->size();
171
+ return s;
172
+ }
173
+
174
+ private:
175
+ CorruptionFS& fs_;
176
+ std::string fname_;
177
+ size_t offset_;
178
+ };
179
+
180
+ std::unique_ptr<FSSequentialFile> file;
181
+ IOStatus s = target()->NewSequentialFile(fname, file_opts, &file, dbg);
182
+ EXPECT_OK(s);
183
+ result->reset(new CorruptionSequentialFile(*this, fname, file));
112
184
 
113
185
  return s;
114
186
  }
@@ -123,12 +195,40 @@ class CorruptionFS : public FileSystemWrapper {
123
195
  }
124
196
  }
125
197
 
198
+ void SetCorruptedChunk(const std::string& fname, size_t offset, size_t len) {
199
+ assert(corrupt_fname_.empty());
200
+
201
+ corrupt_fname_ = fname;
202
+ corrupt_offset_ = offset;
203
+ corrupt_len_ = len;
204
+ }
205
+
206
+ void MaybeResetOverlapWithCorruptedChunk(const std::string& fname,
207
+ size_t offset, size_t len) {
208
+ if (fname == corrupt_fname_ &&
209
+ ((offset <= corrupt_offset_ && (offset + len) > corrupt_offset_) ||
210
+ (offset >= corrupt_offset_ &&
211
+ offset < (corrupt_offset_ + corrupt_len_)))) {
212
+ corrupt_fname_.clear();
213
+ }
214
+ }
215
+
216
+ bool VerifyRetry() { return corrupt_len_ > 0 && corrupt_fname_.empty(); }
217
+
218
+ int read_count() { return read_count_; }
219
+
220
+ int corruption_trigger() { return corruption_trigger_; }
221
+
126
222
  private:
127
223
  int corruption_trigger_;
128
224
  int read_count_;
225
+ std::string corrupt_fname_;
226
+ size_t corrupt_offset_;
227
+ size_t corrupt_len_;
129
228
  Random rnd_;
130
229
  bool fs_buffer_;
131
230
  bool verify_read_;
231
+ port::Mutex mutex_;
132
232
  };
133
233
  } // anonymous namespace
134
234
 
@@ -717,6 +817,7 @@ class DBIOCorruptionTest
717
817
  bbto.num_file_reads_for_auto_readahead = 0;
718
818
  options_.table_factory.reset(NewBlockBasedTableFactory(bbto));
719
819
  options_.disable_auto_compactions = true;
820
+ options_.max_file_opening_threads = 0;
720
821
 
721
822
  Reopen(options_);
722
823
  }
@@ -857,8 +958,8 @@ TEST_P(DBIOCorruptionTest, FlushReadCorruptionRetry) {
857
958
  Status s = Flush();
858
959
  if (std::get<2>(GetParam())) {
859
960
  ASSERT_OK(s);
860
- ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
861
- ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
961
+ ASSERT_GT(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
962
+ ASSERT_GT(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
862
963
  1);
863
964
 
864
965
  std::string val;
@@ -885,8 +986,8 @@ TEST_P(DBIOCorruptionTest, ManifestCorruptionRetry) {
885
986
 
886
987
  if (std::get<2>(GetParam())) {
887
988
  ASSERT_OK(ReopenDB());
888
- ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
889
- ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
989
+ ASSERT_GT(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
990
+ ASSERT_GT(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
890
991
  1);
891
992
  } else {
892
993
  ASSERT_EQ(ReopenDB(), Status::Corruption());
@@ -970,6 +1071,57 @@ TEST_P(DBIOCorruptionTest, TablePropertiesCorruptionRetry) {
970
1071
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
971
1072
  }
972
1073
 
1074
+ TEST_P(DBIOCorruptionTest, DBOpenReadCorruptionRetry) {
1075
+ if (!std::get<2>(GetParam())) {
1076
+ return;
1077
+ }
1078
+ CorruptionFS* fs =
1079
+ static_cast<CorruptionFS*>(env_guard_->GetFileSystem().get());
1080
+
1081
+ for (int sst = 0; sst < 3; ++sst) {
1082
+ for (int key = 0; key < 100; ++key) {
1083
+ std::stringstream ss;
1084
+ ss << std::setw(3) << 100 * sst + key;
1085
+ ASSERT_OK(Put("key" + ss.str(), "val" + ss.str()));
1086
+ }
1087
+ ASSERT_OK(Flush());
1088
+ }
1089
+ Close();
1090
+
1091
+ // DB open will create table readers unless we reduce the table cache
1092
+ // capacity.
1093
+ // SanitizeOptions will set max_open_files to minimum of 20. Table cache
1094
+ // is allocated with max_open_files - 10 as capacity. So override
1095
+ // max_open_files to 11 so table cache capacity will become 1. This will
1096
+ // prevent file open during DB open and force the file to be opened
1097
+ // during MultiGet
1098
+ SyncPoint::GetInstance()->SetCallBack(
1099
+ "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
1100
+ int* max_open_files = (int*)arg;
1101
+ *max_open_files = 11;
1102
+ });
1103
+ SyncPoint::GetInstance()->EnableProcessing();
1104
+
1105
+ // Progressively increase the IO count trigger for corruption, and verify
1106
+ // that it was retried
1107
+ int corruption_trigger = 1;
1108
+ fs->SetCorruptionTrigger(corruption_trigger);
1109
+ do {
1110
+ fs->SetCorruptionTrigger(corruption_trigger);
1111
+ ASSERT_OK(ReopenDB());
1112
+ for (int sst = 0; sst < 3; ++sst) {
1113
+ for (int key = 0; key < 100; ++key) {
1114
+ std::stringstream ss;
1115
+ ss << std::setw(3) << 100 * sst + key;
1116
+ ASSERT_EQ(Get("key" + ss.str()), "val" + ss.str());
1117
+ }
1118
+ }
1119
+ // Verify that the injected corruption was repaired
1120
+ ASSERT_TRUE(fs->VerifyRetry());
1121
+ corruption_trigger++;
1122
+ } while (fs->corruption_trigger() == INT_MAX);
1123
+ }
1124
+
973
1125
  // The parameters are - 1. Use FS provided buffer, 2. Use async IO ReadOption,
974
1126
  // 3. Retry with verify_and_reconstruct_read IOOption
975
1127
  INSTANTIATE_TEST_CASE_P(DBIOCorruptionTest, DBIOCorruptionTest,
@@ -52,7 +52,9 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
52
52
  user_comparator_(cmp),
53
53
  merge_operator_(ioptions.merge_operator.get()),
54
54
  iter_(iter),
55
- version_(version),
55
+ blob_reader_(version, read_options.read_tier,
56
+ read_options.verify_checksums, read_options.fill_cache,
57
+ read_options.io_activity),
56
58
  read_callback_(read_callback),
57
59
  sequence_(s),
58
60
  statistics_(ioptions.stats),
@@ -71,13 +73,10 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
71
73
  expect_total_order_inner_iter_(prefix_extractor_ == nullptr ||
72
74
  read_options.total_order_seek ||
73
75
  read_options.auto_prefix_mode),
74
- read_tier_(read_options.read_tier),
75
- fill_cache_(read_options.fill_cache),
76
- verify_checksums_(read_options.verify_checksums),
77
76
  expose_blob_index_(expose_blob_index),
77
+ allow_unprepared_value_(read_options.allow_unprepared_value),
78
78
  is_blob_(false),
79
79
  arena_mode_(arena_mode),
80
- io_activity_(read_options.io_activity),
81
80
  cfh_(cfh),
82
81
  timestamp_ub_(read_options.timestamp),
83
82
  timestamp_lb_(read_options.iter_start_ts),
@@ -151,7 +150,7 @@ void DBIter::Next() {
151
150
  PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, clock_);
152
151
  // Release temporarily pinned blocks from last operation
153
152
  ReleaseTempPinnedData();
154
- ResetBlobValue();
153
+ ResetBlobData();
155
154
  ResetValueAndColumns();
156
155
  local_stats_.skip_count_ += num_internal_keys_skipped_;
157
156
  local_stats_.skip_count_--;
@@ -194,29 +193,21 @@ void DBIter::Next() {
194
193
  }
195
194
  }
196
195
 
197
- bool DBIter::SetBlobValueIfNeeded(const Slice& user_key,
198
- const Slice& blob_index) {
199
- assert(!is_blob_);
196
+ Status DBIter::BlobReader::RetrieveAndSetBlobValue(const Slice& user_key,
197
+ const Slice& blob_index) {
200
198
  assert(blob_value_.empty());
201
199
 
202
- if (expose_blob_index_) { // Stacked BlobDB implementation
203
- is_blob_ = true;
204
- return true;
205
- }
206
-
207
200
  if (!version_) {
208
- status_ = Status::Corruption("Encountered unexpected blob index.");
209
- valid_ = false;
210
- return false;
201
+ return Status::Corruption("Encountered unexpected blob index.");
211
202
  }
212
203
 
213
204
  // TODO: consider moving ReadOptions from ArenaWrappedDBIter to DBIter to
214
205
  // avoid having to copy options back and forth.
215
- // TODO: plumb Env::IOActivity, Env::IOPriority
206
+ // TODO: plumb Env::IOPriority
216
207
  ReadOptions read_options;
217
208
  read_options.read_tier = read_tier_;
218
- read_options.fill_cache = fill_cache_;
219
209
  read_options.verify_checksums = verify_checksums_;
210
+ read_options.fill_cache = fill_cache_;
220
211
  read_options.io_activity = io_activity_;
221
212
  constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
222
213
  constexpr uint64_t* bytes_read = nullptr;
@@ -224,16 +215,51 @@ bool DBIter::SetBlobValueIfNeeded(const Slice& user_key,
224
215
  const Status s = version_->GetBlob(read_options, user_key, blob_index,
225
216
  prefetch_buffer, &blob_value_, bytes_read);
226
217
 
218
+ if (!s.ok()) {
219
+ return s;
220
+ }
221
+
222
+ return Status::OK();
223
+ }
224
+
225
+ bool DBIter::SetValueAndColumnsFromBlobImpl(const Slice& user_key,
226
+ const Slice& blob_index) {
227
+ const Status s = blob_reader_.RetrieveAndSetBlobValue(user_key, blob_index);
227
228
  if (!s.ok()) {
228
229
  status_ = s;
229
230
  valid_ = false;
231
+ is_blob_ = false;
230
232
  return false;
231
233
  }
232
234
 
233
- is_blob_ = true;
235
+ SetValueAndColumnsFromPlain(blob_reader_.GetBlobValue());
236
+
234
237
  return true;
235
238
  }
236
239
 
240
+ bool DBIter::SetValueAndColumnsFromBlob(const Slice& user_key,
241
+ const Slice& blob_index) {
242
+ assert(!is_blob_);
243
+ is_blob_ = true;
244
+
245
+ if (expose_blob_index_) {
246
+ SetValueAndColumnsFromPlain(blob_index);
247
+ return true;
248
+ }
249
+
250
+ if (allow_unprepared_value_) {
251
+ assert(value_.empty());
252
+ assert(wide_columns_.empty());
253
+
254
+ assert(lazy_blob_index_.empty());
255
+ lazy_blob_index_ = blob_index;
256
+
257
+ return true;
258
+ }
259
+
260
+ return SetValueAndColumnsFromBlobImpl(user_key, blob_index);
261
+ }
262
+
237
263
  bool DBIter::SetValueAndColumnsFromEntity(Slice slice) {
238
264
  assert(value_.empty());
239
265
  assert(wide_columns_.empty());
@@ -279,6 +305,24 @@ bool DBIter::SetValueAndColumnsFromMergeResult(const Status& merge_status,
279
305
  return true;
280
306
  }
281
307
 
308
+ bool DBIter::PrepareValue() {
309
+ assert(valid_);
310
+
311
+ if (lazy_blob_index_.empty()) {
312
+ return true;
313
+ }
314
+
315
+ assert(allow_unprepared_value_);
316
+ assert(is_blob_);
317
+
318
+ const bool result =
319
+ SetValueAndColumnsFromBlobImpl(saved_key_.GetUserKey(), lazy_blob_index_);
320
+
321
+ lazy_blob_index_.clear();
322
+
323
+ return result;
324
+ }
325
+
282
326
  // PRE: saved_key_ has the current user key if skipping_saved_key
283
327
  // POST: saved_key_ should have the next user key if valid_,
284
328
  // if the current entry is a result of merge
@@ -408,7 +452,7 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
408
452
  case kTypeValuePreferredSeqno:
409
453
  case kTypeBlobIndex:
410
454
  case kTypeWideColumnEntity:
411
- if (!PrepareValue()) {
455
+ if (!PrepareValueInternal()) {
412
456
  return false;
413
457
  }
414
458
  if (timestamp_lb_) {
@@ -420,12 +464,9 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
420
464
  }
421
465
 
422
466
  if (ikey_.type == kTypeBlobIndex) {
423
- if (!SetBlobValueIfNeeded(ikey_.user_key, iter_.value())) {
467
+ if (!SetValueAndColumnsFromBlob(ikey_.user_key, iter_.value())) {
424
468
  return false;
425
469
  }
426
-
427
- SetValueAndColumnsFromPlain(expose_blob_index_ ? iter_.value()
428
- : blob_value_);
429
470
  } else if (ikey_.type == kTypeWideColumnEntity) {
430
471
  if (!SetValueAndColumnsFromEntity(iter_.value())) {
431
472
  return false;
@@ -445,7 +486,7 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
445
486
  return true;
446
487
  break;
447
488
  case kTypeMerge:
448
- if (!PrepareValue()) {
489
+ if (!PrepareValueInternal()) {
449
490
  return false;
450
491
  }
451
492
  saved_key_.SetUserKey(
@@ -540,8 +581,14 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
540
581
  } else {
541
582
  iter_.Next();
542
583
  }
584
+
543
585
  // This could be a long-running operation due to tombstones, etc.
544
- ROCKSDB_THREAD_YIELD_HOOK();
586
+ bool aborted = ROCKSDB_THREAD_YIELD_CHECK_ABORT();
587
+ if (aborted) {
588
+ valid_ = false;
589
+ status_ = Status::Aborted("Query abort.");
590
+ return false;
591
+ }
545
592
  } while (iter_.Valid());
546
593
 
547
594
  valid_ = false;
@@ -592,7 +639,7 @@ bool DBIter::MergeValuesNewToOld() {
592
639
  iter_.Next();
593
640
  break;
594
641
  }
595
- if (!PrepareValue()) {
642
+ if (!PrepareValueInternal()) {
596
643
  return false;
597
644
  }
598
645
 
@@ -621,23 +668,9 @@ bool DBIter::MergeValuesNewToOld() {
621
668
  iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */);
622
669
  PERF_COUNTER_ADD(internal_merge_count, 1);
623
670
  } else if (kTypeBlobIndex == ikey.type) {
624
- if (expose_blob_index_) {
625
- status_ =
626
- Status::NotSupported("BlobDB does not support merge operator.");
627
- valid_ = false;
671
+ if (!MergeWithBlobBaseValue(iter_.value(), ikey.user_key)) {
628
672
  return false;
629
673
  }
630
- // hit a put, merge the put value with operands and store the
631
- // final result in saved_value_. We are done!
632
- if (!SetBlobValueIfNeeded(ikey.user_key, iter_.value())) {
633
- return false;
634
- }
635
- valid_ = true;
636
- if (!MergeWithPlainBaseValue(blob_value_, ikey.user_key)) {
637
- return false;
638
- }
639
-
640
- ResetBlobValue();
641
674
 
642
675
  // iter_ is positioned after put
643
676
  iter_.Next();
@@ -645,6 +678,7 @@ bool DBIter::MergeValuesNewToOld() {
645
678
  valid_ = false;
646
679
  return false;
647
680
  }
681
+
648
682
  return true;
649
683
  } else if (kTypeWideColumnEntity == ikey.type) {
650
684
  if (!MergeWithWideColumnBaseValue(iter_.value(), ikey.user_key)) {
@@ -691,7 +725,7 @@ void DBIter::Prev() {
691
725
  PERF_COUNTER_ADD(iter_prev_count, 1);
692
726
  PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, clock_);
693
727
  ReleaseTempPinnedData();
694
- ResetBlobValue();
728
+ ResetBlobData();
695
729
  ResetValueAndColumns();
696
730
  ResetInternalKeysSkippedCounter();
697
731
  bool ok = true;
@@ -928,7 +962,7 @@ bool DBIter::FindValueForCurrentKey() {
928
962
  return FindValueForCurrentKeyUsingSeek();
929
963
  }
930
964
 
931
- if (!PrepareValue()) {
965
+ if (!PrepareValueInternal()) {
932
966
  return false;
933
967
  }
934
968
 
@@ -1043,22 +1077,10 @@ bool DBIter::FindValueForCurrentKey() {
1043
1077
  }
1044
1078
  return true;
1045
1079
  } else if (last_not_merge_type == kTypeBlobIndex) {
1046
- if (expose_blob_index_) {
1047
- status_ =
1048
- Status::NotSupported("BlobDB does not support merge operator.");
1049
- valid_ = false;
1050
- return false;
1051
- }
1052
- if (!SetBlobValueIfNeeded(saved_key_.GetUserKey(), pinned_value_)) {
1053
- return false;
1054
- }
1055
- valid_ = true;
1056
- if (!MergeWithPlainBaseValue(blob_value_, saved_key_.GetUserKey())) {
1080
+ if (!MergeWithBlobBaseValue(pinned_value_, saved_key_.GetUserKey())) {
1057
1081
  return false;
1058
1082
  }
1059
1083
 
1060
- ResetBlobValue();
1061
-
1062
1084
  return true;
1063
1085
  } else if (last_not_merge_type == kTypeWideColumnEntity) {
1064
1086
  if (!MergeWithWideColumnBaseValue(pinned_value_,
@@ -1082,13 +1104,9 @@ bool DBIter::FindValueForCurrentKey() {
1082
1104
 
1083
1105
  break;
1084
1106
  case kTypeBlobIndex:
1085
- if (!SetBlobValueIfNeeded(saved_key_.GetUserKey(), pinned_value_)) {
1107
+ if (!SetValueAndColumnsFromBlob(saved_key_.GetUserKey(), pinned_value_)) {
1086
1108
  return false;
1087
1109
  }
1088
-
1089
- SetValueAndColumnsFromPlain(expose_blob_index_ ? pinned_value_
1090
- : blob_value_);
1091
-
1092
1110
  break;
1093
1111
  case kTypeWideColumnEntity:
1094
1112
  if (!SetValueAndColumnsFromEntity(pinned_value_)) {
@@ -1175,7 +1193,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1175
1193
  }
1176
1194
  return true;
1177
1195
  }
1178
- if (!PrepareValue()) {
1196
+ if (!PrepareValueInternal()) {
1179
1197
  return false;
1180
1198
  }
1181
1199
  if (timestamp_size_ > 0) {
@@ -1192,12 +1210,9 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1192
1210
  pinned_value_ = iter_.value();
1193
1211
  }
1194
1212
  if (ikey.type == kTypeBlobIndex) {
1195
- if (!SetBlobValueIfNeeded(ikey.user_key, pinned_value_)) {
1213
+ if (!SetValueAndColumnsFromBlob(ikey.user_key, pinned_value_)) {
1196
1214
  return false;
1197
1215
  }
1198
-
1199
- SetValueAndColumnsFromPlain(expose_blob_index_ ? pinned_value_
1200
- : blob_value_);
1201
1216
  } else if (ikey.type == kTypeWideColumnEntity) {
1202
1217
  if (!SetValueAndColumnsFromEntity(pinned_value_)) {
1203
1218
  return false;
@@ -1245,7 +1260,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1245
1260
  ikey.type == kTypeDeletionWithTimestamp) {
1246
1261
  break;
1247
1262
  }
1248
- if (!PrepareValue()) {
1263
+ if (!PrepareValueInternal()) {
1249
1264
  return false;
1250
1265
  }
1251
1266
 
@@ -1263,22 +1278,10 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1263
1278
  iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */);
1264
1279
  PERF_COUNTER_ADD(internal_merge_count, 1);
1265
1280
  } else if (ikey.type == kTypeBlobIndex) {
1266
- if (expose_blob_index_) {
1267
- status_ =
1268
- Status::NotSupported("BlobDB does not support merge operator.");
1269
- valid_ = false;
1270
- return false;
1271
- }
1272
- if (!SetBlobValueIfNeeded(ikey.user_key, iter_.value())) {
1273
- return false;
1274
- }
1275
- valid_ = true;
1276
- if (!MergeWithPlainBaseValue(blob_value_, saved_key_.GetUserKey())) {
1281
+ if (!MergeWithBlobBaseValue(iter_.value(), saved_key_.GetUserKey())) {
1277
1282
  return false;
1278
1283
  }
1279
1284
 
1280
- ResetBlobValue();
1281
-
1282
1285
  return true;
1283
1286
  } else if (ikey.type == kTypeWideColumnEntity) {
1284
1287
  if (!MergeWithWideColumnBaseValue(iter_.value(),
@@ -1344,6 +1347,35 @@ bool DBIter::MergeWithPlainBaseValue(const Slice& value,
1344
1347
  return SetValueAndColumnsFromMergeResult(s, result_type);
1345
1348
  }
1346
1349
 
1350
+ bool DBIter::MergeWithBlobBaseValue(const Slice& blob_index,
1351
+ const Slice& user_key) {
1352
+ assert(!is_blob_);
1353
+
1354
+ if (expose_blob_index_) {
1355
+ status_ =
1356
+ Status::NotSupported("Legacy BlobDB does not support merge operator.");
1357
+ valid_ = false;
1358
+ return false;
1359
+ }
1360
+
1361
+ const Status s = blob_reader_.RetrieveAndSetBlobValue(user_key, blob_index);
1362
+ if (!s.ok()) {
1363
+ status_ = s;
1364
+ valid_ = false;
1365
+ return false;
1366
+ }
1367
+
1368
+ valid_ = true;
1369
+
1370
+ if (!MergeWithPlainBaseValue(blob_reader_.GetBlobValue(), user_key)) {
1371
+ return false;
1372
+ }
1373
+
1374
+ blob_reader_.ResetBlobValue();
1375
+
1376
+ return true;
1377
+ }
1378
+
1347
1379
  bool DBIter::MergeWithWideColumnBaseValue(const Slice& entity,
1348
1380
  const Slice& user_key) {
1349
1381
  // `op_failure_scope` (an output parameter) is not provided (set to nullptr)
@@ -1533,7 +1565,7 @@ void DBIter::Seek(const Slice& target) {
1533
1565
 
1534
1566
  status_ = Status::OK();
1535
1567
  ReleaseTempPinnedData();
1536
- ResetBlobValue();
1568
+ ResetBlobData();
1537
1569
  ResetValueAndColumns();
1538
1570
  ResetInternalKeysSkippedCounter();
1539
1571
 
@@ -1609,7 +1641,7 @@ void DBIter::SeekForPrev(const Slice& target) {
1609
1641
 
1610
1642
  status_ = Status::OK();
1611
1643
  ReleaseTempPinnedData();
1612
- ResetBlobValue();
1644
+ ResetBlobData();
1613
1645
  ResetValueAndColumns();
1614
1646
  ResetInternalKeysSkippedCounter();
1615
1647
 
@@ -1670,7 +1702,7 @@ void DBIter::SeekToFirst() {
1670
1702
  status_.PermitUncheckedError();
1671
1703
  direction_ = kForward;
1672
1704
  ReleaseTempPinnedData();
1673
- ResetBlobValue();
1705
+ ResetBlobData();
1674
1706
  ResetValueAndColumns();
1675
1707
  ResetInternalKeysSkippedCounter();
1676
1708
  ClearSavedValue();
@@ -1733,7 +1765,7 @@ void DBIter::SeekToLast() {
1733
1765
  status_.PermitUncheckedError();
1734
1766
  direction_ = kReverse;
1735
1767
  ReleaseTempPinnedData();
1736
- ResetBlobValue();
1768
+ ResetBlobData();
1737
1769
  ResetValueAndColumns();
1738
1770
  ResetInternalKeysSkippedCounter();
1739
1771
  ClearSavedValue();