@nxtedition/rocksdb 13.5.13 → 14.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/binding.cc +33 -2
  2. package/binding.gyp +2 -2
  3. package/chained-batch.js +9 -16
  4. package/deps/rocksdb/rocksdb/BUCK +18 -1
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
  6. package/deps/rocksdb/rocksdb/Makefile +20 -9
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
  15. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
  16. package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
  17. package/deps/rocksdb/rocksdb/db/c.cc +207 -0
  18. package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
  19. package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
  20. package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
  21. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
  39. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
  46. package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
  47. package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
  48. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
  49. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
  50. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
  51. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
  52. package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
  53. package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
  54. package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
  55. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
  56. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
  57. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  58. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
  60. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
  61. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
  62. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
  63. package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
  64. package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
  65. package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
  66. package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
  67. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
  68. package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
  69. package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
  70. package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
  71. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  72. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
  73. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
  74. package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
  75. package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
  76. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
  77. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
  78. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
  79. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
  80. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
  81. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
  89. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
  90. package/deps/rocksdb/rocksdb/env/env.cc +12 -0
  91. package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
  92. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
  93. package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
  94. package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
  95. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
  96. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
  97. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
  98. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
  99. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
  100. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
  101. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
  102. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
  103. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
  104. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
  106. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
  107. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
  108. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
  110. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
  112. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
  113. package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
  115. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
  116. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
  117. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
  118. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
  119. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
  120. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
  122. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
  123. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
  124. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
  125. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
  126. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  127. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  128. package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
  129. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
  130. package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
  131. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
  132. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
  133. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
  134. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
  135. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
  136. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
  137. package/deps/rocksdb/rocksdb/options/options.cc +2 -0
  138. package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
  139. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
  140. package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
  141. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
  142. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
  143. package/deps/rocksdb/rocksdb/src.mk +8 -2
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
  146. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
  147. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
  148. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
  149. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
  150. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
  151. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
  152. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
  153. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
  154. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
  155. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
  156. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
  157. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
  158. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
  159. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  160. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
  161. package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
  162. package/deps/rocksdb/rocksdb/table/format.cc +6 -12
  163. package/deps/rocksdb/rocksdb/table/format.h +10 -0
  164. package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
  165. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
  166. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
  167. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
  168. package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
  169. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
  170. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
  171. package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
  172. package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
  173. package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
  174. package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
  175. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
  176. package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
  177. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
  178. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
  179. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
  180. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
  181. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
  182. package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
  183. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
  184. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
  185. package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
  186. package/deps/rocksdb/rocksdb/util/coding.h +3 -3
  187. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
  188. package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
  189. package/deps/rocksdb/rocksdb/util/compression.h +5 -0
  190. package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
  191. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
  192. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
  193. package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
  194. package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
  195. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
  196. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
  197. package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
  198. package/deps/rocksdb/rocksdb/util/status.cc +1 -0
  199. package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
  200. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
  201. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
  202. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
  203. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
  204. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
  205. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
  206. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
  207. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
  208. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
  209. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
  210. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
  211. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
  212. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
  213. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
  214. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
  217. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
  218. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
  219. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
  220. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
  221. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
  222. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
  223. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
  224. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
  225. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
  226. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
  227. package/index.js +3 -3
  228. package/package.json +1 -1
  229. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  230. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  231. package/util.h +38 -12
  232. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
@@ -502,8 +502,7 @@ Status FlushJob::MemPurge() {
502
502
  kMaxSequenceNumber, &job_context_->snapshot_seqs, earliest_snapshot_,
503
503
  job_context_->earliest_write_conflict_snapshot,
504
504
  job_context_->GetJobSnapshotSequence(), job_context_->snapshot_checker,
505
- env, ShouldReportDetailedTime(env, ioptions.stats),
506
- true /* internal key corruption is not ok */, range_del_agg.get(),
505
+ env, ShouldReportDetailedTime(env, ioptions.stats), range_del_agg.get(),
507
506
  nullptr, ioptions.allow_data_in_errors,
508
507
  ioptions.enforce_single_del_contracts,
509
508
  /*manual_compaction_canceled=*/kManualCompactionCanceledFalse,
@@ -1105,13 +1104,13 @@ Status FlushJob::WriteLevel0Table() {
1105
1104
  const uint64_t micros = clock_->NowMicros() - start_micros;
1106
1105
  const uint64_t cpu_micros = clock_->CPUMicros() - start_cpu_micros;
1107
1106
  flush_stats.micros = micros;
1108
- flush_stats.cpu_micros = cpu_micros;
1107
+ flush_stats.cpu_micros += cpu_micros;
1109
1108
 
1110
1109
  ROCKS_LOG_INFO(db_options_.info_log,
1111
1110
  "[%s] [JOB %d] Flush lasted %" PRIu64
1112
1111
  " microseconds, and %" PRIu64 " cpu microseconds.\n",
1113
1112
  cfd_->GetName().c_str(), job_context_->job_id, micros,
1114
- cpu_micros);
1113
+ flush_stats.cpu_micros);
1115
1114
 
1116
1115
  if (has_output) {
1117
1116
  flush_stats.bytes_written = meta_.fd.GetFileSize();
@@ -380,8 +380,11 @@ void Reader::MaybeVerifyPredecessorWALInfo(
380
380
  } else {
381
381
  if (observed_predecessor_wal_info_.GetLogNumber() !=
382
382
  recorded_predecessor_log_number) {
383
- std::string reason = "Missing WAL of log number " +
384
- std::to_string(recorded_predecessor_log_number);
383
+ std::string reason =
384
+ "Mismatched predecessor log number of WAL file " +
385
+ file_->file_name() + " Recorded " +
386
+ std::to_string(recorded_predecessor_log_number) + ". Observed " +
387
+ std::to_string(observed_predecessor_wal_info_.GetLogNumber());
385
388
  ReportCorruption(fragment.size(), reason.c_str(),
386
389
  recorded_predecessor_log_number);
387
390
  } else if (observed_predecessor_wal_info_.GetLastSeqnoRecorded() !=
@@ -70,7 +70,9 @@ ImmutableMemTableOptions::ImmutableMemTableOptions(
70
70
  protection_bytes_per_key(
71
71
  mutable_cf_options.memtable_protection_bytes_per_key),
72
72
  allow_data_in_errors(ioptions.allow_data_in_errors),
73
- paranoid_memory_checks(mutable_cf_options.paranoid_memory_checks) {}
73
+ paranoid_memory_checks(mutable_cf_options.paranoid_memory_checks),
74
+ memtable_veirfy_per_key_checksum_on_seek(
75
+ mutable_cf_options.memtable_veirfy_per_key_checksum_on_seek) {}
74
76
 
75
77
  MemTable::MemTable(const InternalKeyComparator& cmp,
76
78
  const ImmutableOptions& ioptions,
@@ -115,7 +117,13 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
115
117
  oldest_key_time_(std::numeric_limits<uint64_t>::max()),
116
118
  approximate_memory_usage_(0),
117
119
  memtable_max_range_deletions_(
118
- mutable_cf_options.memtable_max_range_deletions) {
120
+ mutable_cf_options.memtable_max_range_deletions),
121
+ key_validation_callback_(
122
+ (moptions_.protection_bytes_per_key != 0 &&
123
+ moptions_.memtable_veirfy_per_key_checksum_on_seek)
124
+ ? std::bind(&MemTable::ValidateKey, this, std::placeholders::_1,
125
+ std::placeholders::_2)
126
+ : std::function<Status(const char*, bool)>(nullptr)) {
119
127
  UpdateFlushState();
120
128
  // something went wrong if we need to flush before inserting anything
121
129
  assert(!ShouldScheduleFlush());
@@ -134,6 +142,16 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
134
142
  auto new_cache = std::make_shared<FragmentedRangeTombstoneListCache>();
135
143
  size_t size = cached_range_tombstone_.Size();
136
144
  for (size_t i = 0; i < size; ++i) {
145
+ #if defined(__cpp_lib_atomic_shared_ptr)
146
+ std::atomic<std::shared_ptr<FragmentedRangeTombstoneListCache>>*
147
+ local_cache_ref_ptr = cached_range_tombstone_.AccessAtCore(i);
148
+ auto new_local_cache_ref = std::make_shared<
149
+ const std::shared_ptr<FragmentedRangeTombstoneListCache>>(new_cache);
150
+ std::shared_ptr<FragmentedRangeTombstoneListCache> aliased_ptr(
151
+ new_local_cache_ref, new_cache.get());
152
+ local_cache_ref_ptr->store(std::move(aliased_ptr),
153
+ std::memory_order_relaxed);
154
+ #else
137
155
  std::shared_ptr<FragmentedRangeTombstoneListCache>* local_cache_ref_ptr =
138
156
  cached_range_tombstone_.AccessAtCore(i);
139
157
  auto new_local_cache_ref = std::make_shared<
@@ -143,6 +161,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
143
161
  std::shared_ptr<FragmentedRangeTombstoneListCache>(new_local_cache_ref,
144
162
  new_cache.get()),
145
163
  std::memory_order_relaxed);
164
+ #endif
146
165
  }
147
166
  const Comparator* ucmp = cmp.user_comparator();
148
167
  assert(ucmp);
@@ -168,7 +187,7 @@ size_t MemTable::ApproximateMemoryUsage() {
168
187
  }
169
188
  total_usage += usage;
170
189
  }
171
- approximate_memory_usage_.store(total_usage, std::memory_order_relaxed);
190
+ approximate_memory_usage_.StoreRelaxed(total_usage);
172
191
  // otherwise, return the actual usage
173
192
  return total_usage;
174
193
  }
@@ -182,12 +201,12 @@ bool MemTable::ShouldFlushNow() {
182
201
  // This is set if memtable_max_range_deletions is > 0,
183
202
  // and that many range deletions are done
184
203
  if (memtable_max_range_deletions_ > 0 &&
185
- num_range_deletes_.load(std::memory_order_relaxed) >=
204
+ num_range_deletes_.LoadRelaxed() >=
186
205
  static_cast<uint64_t>(memtable_max_range_deletions_)) {
187
206
  return true;
188
207
  }
189
208
 
190
- size_t write_buffer_size = write_buffer_size_.load(std::memory_order_relaxed);
209
+ size_t write_buffer_size = write_buffer_size_.LoadRelaxed();
191
210
  // In a lot of times, we cannot allocate arena blocks that exactly matches the
192
211
  // buffer size. Thus we have to decide if we should over-allocate or
193
212
  // under-allocate.
@@ -200,10 +219,10 @@ bool MemTable::ShouldFlushNow() {
200
219
  assert(range_del_table_->ApproximateMemoryUsage() == 0);
201
220
  // If arena still have room for new block allocation, we can safely say it
202
221
  // shouldn't flush.
203
- auto allocated_memory = table_->ApproximateMemoryUsage() +
204
- arena_.MemoryAllocatedBytes();
222
+ auto allocated_memory =
223
+ table_->ApproximateMemoryUsage() + arena_.MemoryAllocatedBytes();
205
224
 
206
- approximate_memory_usage_.store(allocated_memory, std::memory_order_relaxed);
225
+ approximate_memory_usage_.StoreRelaxed(allocated_memory);
207
226
 
208
227
  // if we can still allocate one more block without exceeding the
209
228
  // over-allocation ratio, then we should not flush.
@@ -383,7 +402,11 @@ class MemTableIterator : public InternalIterator {
383
402
  !mem.GetImmutableMemTableOptions()->inplace_update_support),
384
403
  arena_mode_(arena != nullptr),
385
404
  paranoid_memory_checks_(mem.moptions_.paranoid_memory_checks),
386
- allow_data_in_error(mem.moptions_.allow_data_in_errors) {
405
+ validate_on_seek_(
406
+ mem.moptions_.paranoid_memory_checks ||
407
+ mem.moptions_.memtable_veirfy_per_key_checksum_on_seek),
408
+ allow_data_in_error_(mem.moptions_.allow_data_in_errors),
409
+ key_validation_callback_(mem.key_validation_callback_) {
387
410
  if (kind == kRangeDelEntries) {
388
411
  iter_ = mem.range_del_table_->GetIterator(arena);
389
412
  } else if (prefix_extractor_ != nullptr &&
@@ -452,8 +475,10 @@ class MemTableIterator : public InternalIterator {
452
475
  }
453
476
  }
454
477
  }
455
- if (paranoid_memory_checks_) {
456
- status_ = iter_->SeekAndValidate(k, nullptr, allow_data_in_error);
478
+ if (validate_on_seek_) {
479
+ status_ = iter_->SeekAndValidate(k, nullptr, allow_data_in_error_,
480
+ paranoid_memory_checks_,
481
+ key_validation_callback_);
457
482
  } else {
458
483
  iter_->Seek(k, nullptr);
459
484
  }
@@ -477,8 +502,10 @@ class MemTableIterator : public InternalIterator {
477
502
  }
478
503
  }
479
504
  }
480
- if (paranoid_memory_checks_) {
481
- status_ = iter_->SeekAndValidate(k, nullptr, allow_data_in_error);
505
+ if (validate_on_seek_) {
506
+ status_ = iter_->SeekAndValidate(k, nullptr, allow_data_in_error_,
507
+ paranoid_memory_checks_,
508
+ key_validation_callback_);
482
509
  } else {
483
510
  iter_->Seek(k, nullptr);
484
511
  }
@@ -507,7 +534,7 @@ class MemTableIterator : public InternalIterator {
507
534
  PERF_COUNTER_ADD(next_on_memtable_count, 1);
508
535
  assert(Valid());
509
536
  if (paranoid_memory_checks_) {
510
- status_ = iter_->NextAndValidate(allow_data_in_error);
537
+ status_ = iter_->NextAndValidate(allow_data_in_error_);
511
538
  } else {
512
539
  iter_->Next();
513
540
  TEST_SYNC_POINT_CALLBACK("MemTableIterator::Next:0", iter_);
@@ -529,7 +556,7 @@ class MemTableIterator : public InternalIterator {
529
556
  PERF_COUNTER_ADD(prev_on_memtable_count, 1);
530
557
  assert(Valid());
531
558
  if (paranoid_memory_checks_) {
532
- status_ = iter_->PrevAndValidate(allow_data_in_error);
559
+ status_ = iter_->PrevAndValidate(allow_data_in_error_);
533
560
  } else {
534
561
  iter_->Prev();
535
562
  }
@@ -588,7 +615,9 @@ class MemTableIterator : public InternalIterator {
588
615
  bool value_pinned_;
589
616
  bool arena_mode_;
590
617
  const bool paranoid_memory_checks_;
591
- const bool allow_data_in_error;
618
+ const bool validate_on_seek_;
619
+ const bool allow_data_in_error_;
620
+ const std::function<Status(const char*, bool)> key_validation_callback_;
592
621
 
593
622
  void VerifyEntryChecksum() {
594
623
  if (protection_bytes_per_key_ > 0 && Valid()) {
@@ -745,7 +774,7 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIterator(
745
774
  const ReadOptions& read_options, SequenceNumber read_seq,
746
775
  bool immutable_memtable) {
747
776
  if (read_options.ignore_range_deletions ||
748
- is_range_del_table_empty_.load(std::memory_order_relaxed)) {
777
+ is_range_del_table_empty_.LoadRelaxed()) {
749
778
  return nullptr;
750
779
  }
751
780
  return NewRangeTombstoneIteratorInternal(read_options, read_seq,
@@ -756,7 +785,7 @@ FragmentedRangeTombstoneIterator*
756
785
  MemTable::NewTimestampStrippingRangeTombstoneIterator(
757
786
  const ReadOptions& read_options, SequenceNumber read_seq, size_t ts_sz) {
758
787
  if (read_options.ignore_range_deletions ||
759
- is_range_del_table_empty_.load(std::memory_order_relaxed)) {
788
+ is_range_del_table_empty_.LoadRelaxed()) {
760
789
  return nullptr;
761
790
  }
762
791
  if (!timestamp_stripping_fragmented_range_tombstone_list_) {
@@ -790,8 +819,13 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIteratorInternal(
790
819
 
791
820
  // takes current cache
792
821
  std::shared_ptr<FragmentedRangeTombstoneListCache> cache =
822
+ #if defined(__cpp_lib_atomic_shared_ptr)
823
+ cached_range_tombstone_.Access()->load(std::memory_order_relaxed)
824
+ #else
793
825
  std::atomic_load_explicit(cached_range_tombstone_.Access(),
794
- std::memory_order_relaxed);
826
+ std::memory_order_relaxed)
827
+ #endif
828
+ ;
795
829
  // construct fragmented tombstone list if necessary
796
830
  if (!cache->initialized.load(std::memory_order_acquire)) {
797
831
  cache->reader_mutex.lock();
@@ -815,7 +849,7 @@ void MemTable::ConstructFragmentedRangeTombstones() {
815
849
  // There should be no concurrent Construction.
816
850
  // We could also check fragmented_range_tombstone_list_ to avoid repeate
817
851
  // constructions. We just construct them here again to be safe.
818
- if (!is_range_del_table_empty_.load(std::memory_order_relaxed)) {
852
+ if (!is_range_del_table_empty_.LoadRelaxed()) {
819
853
  // TODO: plumb Env::IOActivity, Env::IOPriority
820
854
  auto* unfragmented_iter = new MemTableIterator(
821
855
  MemTableIterator::kRangeDelEntries, *this, ReadOptions());
@@ -838,7 +872,7 @@ ReadOnlyMemTable::MemTableStats MemTable::ApproximateStats(
838
872
  if (entry_count == 0) {
839
873
  return {0, 0};
840
874
  }
841
- uint64_t n = num_entries_.load(std::memory_order_relaxed);
875
+ uint64_t n = num_entries_.LoadRelaxed();
842
876
  if (n == 0) {
843
877
  return {0, 0};
844
878
  }
@@ -848,7 +882,7 @@ ReadOnlyMemTable::MemTableStats MemTable::ApproximateStats(
848
882
  // the inaccuracy.
849
883
  entry_count = n;
850
884
  }
851
- uint64_t data_size = data_size_.load(std::memory_order_relaxed);
885
+ uint64_t data_size = data_size_.LoadRelaxed();
852
886
  return {entry_count * (data_size / n), entry_count};
853
887
  }
854
888
 
@@ -978,17 +1012,14 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
978
1012
 
979
1013
  // this is a bit ugly, but is the way to avoid locked instructions
980
1014
  // when incrementing an atomic
981
- num_entries_.store(num_entries_.load(std::memory_order_relaxed) + 1,
982
- std::memory_order_relaxed);
983
- data_size_.store(data_size_.load(std::memory_order_relaxed) + encoded_len,
984
- std::memory_order_relaxed);
1015
+ num_entries_.StoreRelaxed(num_entries_.LoadRelaxed() + 1);
1016
+ data_size_.StoreRelaxed(data_size_.LoadRelaxed() + encoded_len);
985
1017
  if (type == kTypeDeletion || type == kTypeSingleDeletion ||
986
1018
  type == kTypeDeletionWithTimestamp) {
987
- num_deletes_.store(num_deletes_.load(std::memory_order_relaxed) + 1,
988
- std::memory_order_relaxed);
1019
+ num_deletes_.StoreRelaxed(num_deletes_.LoadRelaxed() + 1);
989
1020
  } else if (type == kTypeRangeDeletion) {
990
- uint64_t val = num_range_deletes_.load(std::memory_order_relaxed) + 1;
991
- num_range_deletes_.store(val, std::memory_order_relaxed);
1021
+ uint64_t val = num_range_deletes_.LoadRelaxed() + 1;
1022
+ num_range_deletes_.StoreRelaxed(val);
992
1023
  }
993
1024
 
994
1025
  if (bloom_filter_ && prefix_extractor_ &&
@@ -1059,6 +1090,16 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
1059
1090
  range_del_mutex_.lock();
1060
1091
  }
1061
1092
  for (size_t i = 0; i < size; ++i) {
1093
+ #if defined(__cpp_lib_atomic_shared_ptr)
1094
+ std::atomic<std::shared_ptr<FragmentedRangeTombstoneListCache>>*
1095
+ local_cache_ref_ptr = cached_range_tombstone_.AccessAtCore(i);
1096
+ auto new_local_cache_ref = std::make_shared<
1097
+ const std::shared_ptr<FragmentedRangeTombstoneListCache>>(new_cache);
1098
+ std::shared_ptr<FragmentedRangeTombstoneListCache> aliased_ptr(
1099
+ new_local_cache_ref, new_cache.get());
1100
+ local_cache_ref_ptr->store(std::move(aliased_ptr),
1101
+ std::memory_order_relaxed);
1102
+ #else
1062
1103
  std::shared_ptr<FragmentedRangeTombstoneListCache>* local_cache_ref_ptr =
1063
1104
  cached_range_tombstone_.AccessAtCore(i);
1064
1105
  auto new_local_cache_ref = std::make_shared<
@@ -1073,12 +1114,13 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
1073
1114
  std::shared_ptr<FragmentedRangeTombstoneListCache>(
1074
1115
  new_local_cache_ref, new_cache.get()),
1075
1116
  std::memory_order_relaxed);
1117
+ #endif
1076
1118
  }
1077
1119
 
1078
1120
  if (allow_concurrent) {
1079
1121
  range_del_mutex_.unlock();
1080
1122
  }
1081
- is_range_del_table_empty_.store(false, std::memory_order_relaxed);
1123
+ is_range_del_table_empty_.StoreRelaxed(false);
1082
1124
  }
1083
1125
  UpdateOldestKeyTime();
1084
1126
 
@@ -1469,11 +1511,13 @@ void MemTable::GetFromTable(const LookupKey& key,
1469
1511
  saver.allow_data_in_errors = moptions_.allow_data_in_errors;
1470
1512
  saver.protection_bytes_per_key = moptions_.protection_bytes_per_key;
1471
1513
 
1472
- if (!moptions_.paranoid_memory_checks) {
1514
+ if (!moptions_.paranoid_memory_checks &&
1515
+ !moptions_.memtable_veirfy_per_key_checksum_on_seek) {
1473
1516
  table_->Get(key, &saver, SaveValue);
1474
1517
  } else {
1475
- Status check_s = table_->GetAndValidate(key, &saver, SaveValue,
1476
- moptions_.allow_data_in_errors);
1518
+ Status check_s = table_->GetAndValidate(
1519
+ key, &saver, SaveValue, moptions_.allow_data_in_errors,
1520
+ moptions_.paranoid_memory_checks, key_validation_callback_);
1477
1521
  if (check_s.IsCorruption()) {
1478
1522
  *(saver.status) = check_s;
1479
1523
  // Should stop searching the LSM.
@@ -1484,6 +1528,11 @@ void MemTable::GetFromTable(const LookupKey& key,
1484
1528
  *seq = saver.seq;
1485
1529
  }
1486
1530
 
1531
+ Status MemTable::ValidateKey(const char* key, bool allow_data_in_errors) {
1532
+ return VerifyEntryChecksum(key, moptions_.protection_bytes_per_key,
1533
+ allow_data_in_errors);
1534
+ }
1535
+
1487
1536
  void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
1488
1537
  ReadCallback* callback, bool immutable_memtable) {
1489
1538
  // The sequence number is updated synchronously in version_set.h
@@ -1497,7 +1546,7 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
1497
1546
  // range tombstones. This is the simplest way to ensure range tombstones are
1498
1547
  // handled. TODO: allow Bloom checks where max_covering_tombstone_seq==0
1499
1548
  bool no_range_del = read_options.ignore_range_deletions ||
1500
- is_range_del_table_empty_.load(std::memory_order_relaxed);
1549
+ is_range_del_table_empty_.LoadRelaxed();
1501
1550
  MultiGetRange temp_range(*range, range->begin(), range->end());
1502
1551
  if (bloom_filter_ && no_range_del) {
1503
1552
  bool whole_key =
@@ -8,7 +8,6 @@
8
8
  // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
9
 
10
10
  #pragma once
11
- #include <atomic>
12
11
  #include <deque>
13
12
  #include <functional>
14
13
  #include <memory>
@@ -65,6 +64,7 @@ struct ImmutableMemTableOptions {
65
64
  uint32_t protection_bytes_per_key;
66
65
  bool allow_data_in_errors;
67
66
  bool paranoid_memory_checks;
67
+ bool memtable_veirfy_per_key_checksum_on_seek;
68
68
  };
69
69
 
70
70
  // Batched counters to updated when inserting keys in one write batch.
@@ -568,7 +568,7 @@ class MemTable final : public ReadOnlyMemTable {
568
568
  // As a cheap version of `ApproximateMemoryUsage()`, this function doesn't
569
569
  // require external synchronization. The value may be less accurate though
570
570
  size_t ApproximateMemoryUsageFast() const {
571
- return approximate_memory_usage_.load(std::memory_order_relaxed);
571
+ return approximate_memory_usage_.LoadRelaxed();
572
572
  }
573
573
 
574
574
  size_t MemoryAllocatedBytes() const override {
@@ -689,16 +689,13 @@ class MemTable final : public ReadOnlyMemTable {
689
689
  // Used in concurrent memtable inserts.
690
690
  void BatchPostProcess(const MemTablePostProcessInfo& update_counters) {
691
691
  table_->BatchPostProcess();
692
- num_entries_.fetch_add(update_counters.num_entries,
693
- std::memory_order_relaxed);
694
- data_size_.fetch_add(update_counters.data_size, std::memory_order_relaxed);
692
+ num_entries_.FetchAddRelaxed(update_counters.num_entries);
693
+ data_size_.FetchAddRelaxed(update_counters.data_size);
695
694
  if (update_counters.num_deletes != 0) {
696
- num_deletes_.fetch_add(update_counters.num_deletes,
697
- std::memory_order_relaxed);
695
+ num_deletes_.FetchAddRelaxed(update_counters.num_deletes);
698
696
  }
699
697
  if (update_counters.num_range_deletes > 0) {
700
- num_range_deletes_.fetch_add(update_counters.num_range_deletes,
701
- std::memory_order_relaxed);
698
+ num_range_deletes_.FetchAddRelaxed(update_counters.num_range_deletes);
702
699
  // noop for skip-list memtable
703
700
  // Besides correctness test in stress test, memtable flush record count
704
701
  // check will catch this if it were not noop.
@@ -707,35 +704,26 @@ class MemTable final : public ReadOnlyMemTable {
707
704
  UpdateFlushState();
708
705
  }
709
706
 
710
- uint64_t NumEntries() const override {
711
- return num_entries_.load(std::memory_order_relaxed);
712
- }
707
+ uint64_t NumEntries() const override { return num_entries_.LoadRelaxed(); }
713
708
 
714
- uint64_t NumDeletion() const override {
715
- return num_deletes_.load(std::memory_order_relaxed);
716
- }
709
+ uint64_t NumDeletion() const override { return num_deletes_.LoadRelaxed(); }
717
710
 
718
711
  uint64_t NumRangeDeletion() const override {
719
- return num_range_deletes_.load(std::memory_order_relaxed);
712
+ return num_range_deletes_.LoadRelaxed();
720
713
  }
721
714
 
722
- uint64_t GetDataSize() const override {
723
- return data_size_.load(std::memory_order_relaxed);
724
- }
715
+ uint64_t GetDataSize() const override { return data_size_.LoadRelaxed(); }
725
716
 
726
- size_t write_buffer_size() const {
727
- return write_buffer_size_.load(std::memory_order_relaxed);
728
- }
717
+ size_t write_buffer_size() const { return write_buffer_size_.LoadRelaxed(); }
729
718
 
730
719
  // Dynamically change the memtable's capacity. If set below the current usage,
731
720
  // the next key added will trigger a flush. Can only increase size when
732
721
  // memtable prefix bloom is disabled, since we can't easily allocate more
733
- // space.
722
+ // space. Non-atomic update ok because this is only called with DB mutex held.
734
723
  void UpdateWriteBufferSize(size_t new_write_buffer_size) {
735
724
  if (bloom_filter_ == nullptr ||
736
- new_write_buffer_size < write_buffer_size_) {
737
- write_buffer_size_.store(new_write_buffer_size,
738
- std::memory_order_relaxed);
725
+ new_write_buffer_size < write_buffer_size_.LoadRelaxed()) {
726
+ write_buffer_size_.StoreRelaxed(new_write_buffer_size);
739
727
  }
740
728
  }
741
729
 
@@ -827,7 +815,7 @@ class MemTable final : public ReadOnlyMemTable {
827
815
 
828
816
  bool IsFragmentedRangeTombstonesConstructed() const override {
829
817
  return fragmented_range_tombstone_list_.get() != nullptr ||
830
- is_range_del_table_empty_;
818
+ is_range_del_table_empty_.LoadRelaxed();
831
819
  }
832
820
 
833
821
  // Gets the newest user defined timestamps in the memtable. This should only
@@ -839,6 +827,9 @@ class MemTable final : public ReadOnlyMemTable {
839
827
  uint32_t protection_bytes_per_key,
840
828
  bool allow_data_in_errors = false);
841
829
 
830
+ // Validate the checksum of the key/value pair.
831
+ Status ValidateKey(const char* key, bool allow_data_in_errors);
832
+
842
833
  private:
843
834
  enum FlushStateEnum { FLUSH_NOT_REQUESTED, FLUSH_REQUESTED, FLUSH_SCHEDULED };
844
835
 
@@ -853,16 +844,22 @@ class MemTable final : public ReadOnlyMemTable {
853
844
  ConcurrentArena arena_;
854
845
  std::unique_ptr<MemTableRep> table_;
855
846
  std::unique_ptr<MemTableRep> range_del_table_;
856
- std::atomic_bool is_range_del_table_empty_;
847
+ // This is OK to be relaxed access because consistency between table_ and
848
+ // range_del_table_ is provided by explicit multi-versioning with sequence
849
+ // numbers. It's ok for stale memory to say the range_del_table_ is empty when
850
+ // it's actually not because if it was relevant to our read (based on sequence
851
+ // number), the relaxed memory read would get a sufficiently updated value
852
+ // because of the ordering provided by LastPublishedSequence().
853
+ RelaxedAtomic<bool> is_range_del_table_empty_;
857
854
 
858
855
  // Total data size of all data inserted
859
- std::atomic<uint64_t> data_size_;
860
- std::atomic<uint64_t> num_entries_;
861
- std::atomic<uint64_t> num_deletes_;
862
- std::atomic<uint64_t> num_range_deletes_;
856
+ RelaxedAtomic<uint64_t> data_size_;
857
+ RelaxedAtomic<uint64_t> num_entries_;
858
+ RelaxedAtomic<uint64_t> num_deletes_;
859
+ RelaxedAtomic<uint64_t> num_range_deletes_;
863
860
 
864
861
  // Dynamically changeable memtable option
865
- std::atomic<size_t> write_buffer_size_;
862
+ RelaxedAtomic<size_t> write_buffer_size_;
866
863
 
867
864
  // The sequence number of the kv that was inserted first
868
865
  std::atomic<SequenceNumber> first_seqno_;
@@ -898,7 +895,7 @@ class MemTable final : public ReadOnlyMemTable {
898
895
 
899
896
  // keep track of memory usage in table_, arena_, and range_del_table_.
900
897
  // Gets refreshed inside `ApproximateMemoryUsage()` or `ShouldFlushNow`
901
- std::atomic<uint64_t> approximate_memory_usage_;
898
+ RelaxedAtomic<uint64_t> approximate_memory_usage_;
902
899
 
903
900
  // max range deletions in a memtable, before automatic flushing, 0 for
904
901
  // unlimited.
@@ -949,14 +946,22 @@ class MemTable final : public ReadOnlyMemTable {
949
946
 
950
947
  // makes sure there is a single range tombstone writer to invalidate cache
951
948
  std::mutex range_del_mutex_;
949
+ #if defined(__cpp_lib_atomic_shared_ptr)
950
+ CoreLocalArray<
951
+ std::atomic<std::shared_ptr<FragmentedRangeTombstoneListCache>>>
952
+ cached_range_tombstone_;
953
+ #else
952
954
  CoreLocalArray<std::shared_ptr<FragmentedRangeTombstoneListCache>>
953
955
  cached_range_tombstone_;
954
956
 
957
+ #endif
955
958
  void UpdateEntryChecksum(const ProtectionInfoKVOS64* kv_prot_info,
956
959
  const Slice& key, const Slice& value, ValueType type,
957
960
  SequenceNumber s, char* checksum_ptr);
958
961
 
959
962
  void MaybeUpdateNewestUDT(const Slice& user_key);
963
+
964
+ const std::function<Status(const char*, bool)> key_validation_callback_;
960
965
  };
961
966
 
962
967
  const char* EncodeKey(std::string* scratch, const Slice& target);
@@ -497,6 +497,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
497
497
  ikey.sequence <= latest_snapshot_
498
498
  ? CompactionFilter::Decision::kKeep
499
499
  : FilterMerge(orig_ikey.user_key, value_slice);
500
+ // FIXME: should also check for kRemove here
500
501
  if (filter != CompactionFilter::Decision::kRemoveAndSkipUntil &&
501
502
  range_del_agg != nullptr &&
502
503
  range_del_agg->ShouldDelete(
@@ -32,6 +32,7 @@ bool MergeOperator::FullMergeV3(const MergeOperationInputV3& merge_in,
32
32
  MergeOperationOutputV3* merge_out) const {
33
33
  assert(merge_out);
34
34
 
35
+ Slice value_of_default; // avoid warning about in_v2 pointing at this
35
36
  MergeOperationInput in_v2(merge_in.key, nullptr, merge_in.operand_list,
36
37
  merge_in.logger);
37
38
 
@@ -66,7 +67,6 @@ bool MergeOperator::FullMergeV3(const MergeOperationInputV3& merge_in,
66
67
  const bool has_default_column =
67
68
  WideColumnsHelper::HasDefaultColumn(existing_columns);
68
69
 
69
- Slice value_of_default;
70
70
  if (has_default_column) {
71
71
  value_of_default = existing_columns.front().value();
72
72
  }
@@ -10,24 +10,25 @@ namespace ROCKSDB_NAMESPACE {
10
10
  using MultiScanIterator = MultiScan::MultiScanIterator;
11
11
 
12
12
  MultiScan::MultiScan(const ReadOptions& read_options,
13
- const std::vector<ScanOptions>& scan_opts, DB* db,
13
+ const MultiScanArgs& scan_opts, DB* db,
14
14
  ColumnFamilyHandle* cfh)
15
15
  : read_options_(read_options), scan_opts_(scan_opts), db_(db), cfh_(cfh) {
16
16
  bool slow_path = false;
17
17
  // Setup read_options with iterate_uuper_bound based on the first scan.
18
18
  // Subsequent scans will update and allocate a new DB iterator as necessary
19
- if (scan_opts[0].range.limit) {
20
- upper_bound_ = *scan_opts[0].range.limit;
19
+ if (scan_opts.GetScanRanges()[0].range.limit) {
20
+ upper_bound_ = *scan_opts.GetScanRanges()[0].range.limit;
21
21
  read_options_.iterate_upper_bound = &upper_bound_;
22
22
  } else {
23
23
  read_options_.iterate_upper_bound = nullptr;
24
24
  }
25
- for (auto opts : scan_opts) {
25
+ for (const auto& opts : scan_opts.GetScanRanges()) {
26
26
  // Check that all the ScanOptions either specify an upper bound or not. If
27
27
  // its mixed we take the slow path which avoids calling Prepare: we have to
28
28
  // reallocate the Iterator with updated read_options everytime we switch
29
29
  // between upper bound or no upper bound, which complicates Prepare.
30
- if (opts.range.limit.has_value() != scan_opts[0].range.limit.has_value()) {
30
+ if (opts.range.limit.has_value() !=
31
+ scan_opts.GetScanRanges()[0].range.limit.has_value()) {
31
32
  slow_path = true;
32
33
  break;
33
34
  }
@@ -39,6 +40,11 @@ MultiScan::MultiScan(const ReadOptions& read_options,
39
40
  }
40
41
 
41
42
  MultiScanIterator& MultiScanIterator::operator++() {
43
+ status_ = db_iter_->status();
44
+ if (!status_.ok()) {
45
+ throw MultiScanException(status_);
46
+ }
47
+
42
48
  if (idx_ >= scan_opts_.size()) {
43
49
  throw std::logic_error("Index out of range");
44
50
  }
@@ -396,7 +396,7 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) {
396
396
  return "temperature field wrong size";
397
397
  } else {
398
398
  Temperature casted_field = static_cast<Temperature>(field[0]);
399
- if (casted_field <= Temperature::kCold) {
399
+ if (casted_field < Temperature::kLastTemperature) {
400
400
  f.temperature = casted_field;
401
401
  }
402
402
  }
@@ -110,7 +110,7 @@ constexpr uint64_t kUnknownOldestAncesterTime = 0;
110
110
  constexpr uint64_t kUnknownNewestKeyTime = 0;
111
111
  constexpr uint64_t kUnknownFileCreationTime = 0;
112
112
  constexpr uint64_t kUnknownEpochNumber = 0;
113
- // If `Options::allow_ingest_behind` is true, this epoch number
113
+ // If `Options::cf_allow_ingest_behind` is true, this epoch number
114
114
  // will be dedicated to files ingested behind.
115
115
  constexpr uint64_t kReservedEpochNumberForFileIngestedBehind = 1;
116
116