@nxtedition/rocksdb 7.1.14 → 7.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/binding.cc +1 -0
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +72 -18
  3. package/deps/rocksdb/rocksdb/Makefile +91 -11
  4. package/deps/rocksdb/rocksdb/TARGETS +8 -4
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +5 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +13 -8
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
  8. package/deps/rocksdb/rocksdb/cache/cache_test.cc +116 -57
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +958 -459
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.h +407 -622
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +104 -40
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +23 -8
  13. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +350 -184
  14. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +12 -2
  15. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +2 -0
  16. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +130 -43
  17. package/deps/rocksdb/rocksdb/cache/lru_cache.h +24 -2
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +423 -98
  19. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +19 -2
  20. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +10 -7
  21. package/deps/rocksdb/rocksdb/crash_test.mk +2 -2
  22. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +46 -26
  23. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +9 -3
  24. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +90 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +56 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -10
  27. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +64 -59
  28. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +11 -8
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +92 -62
  30. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +159 -136
  31. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -13
  32. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +129 -57
  33. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +81 -3
  34. package/deps/rocksdb/rocksdb/db/c.cc +29 -0
  35. package/deps/rocksdb/rocksdb/db/column_family.cc +10 -1
  36. package/deps/rocksdb/rocksdb/db/column_family_test.cc +21 -0
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -36
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +344 -102
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +163 -28
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +52 -17
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +35 -30
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -3
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +167 -11
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +8 -8
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +10 -13
  46. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +0 -117
  47. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +6 -49
  48. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +18 -11
  50. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +4 -10
  51. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +1 -1
  52. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +12 -0
  53. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -93
  54. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +28 -32
  55. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +1 -1
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -9
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -33
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -5
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +11 -0
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -2
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2 -1
  63. package/deps/rocksdb/rocksdb/db/db_iter.cc +76 -138
  64. package/deps/rocksdb/rocksdb/db/db_iter.h +26 -23
  65. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1 -1
  66. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +931 -0
  67. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +2 -2
  68. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -0
  69. package/deps/rocksdb/rocksdb/db/db_test2.cc +44 -22
  70. package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -14
  71. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +155 -0
  72. package/deps/rocksdb/rocksdb/db/db_write_test.cc +45 -0
  73. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -1
  74. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -0
  75. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -1
  76. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +24 -12
  77. package/deps/rocksdb/rocksdb/db/internal_stats.cc +7 -1
  78. package/deps/rocksdb/rocksdb/db/internal_stats.h +3 -0
  79. package/deps/rocksdb/rocksdb/db/memtable.cc +79 -18
  80. package/deps/rocksdb/rocksdb/db/memtable.h +5 -0
  81. package/deps/rocksdb/rocksdb/db/memtable_list.cc +26 -4
  82. package/deps/rocksdb/rocksdb/db/memtable_list.h +2 -1
  83. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +113 -0
  84. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +110 -0
  85. package/deps/rocksdb/rocksdb/db/{periodic_work_scheduler_test.cc → periodic_task_scheduler_test.cc} +33 -39
  86. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +12 -20
  87. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +6 -5
  88. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +12 -8
  89. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +20 -5
  90. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +14 -0
  91. package/deps/rocksdb/rocksdb/db/repair.cc +17 -8
  92. package/deps/rocksdb/rocksdb/db/repair_test.cc +2 -1
  93. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +49 -66
  94. package/deps/rocksdb/rocksdb/db/table_cache.cc +92 -63
  95. package/deps/rocksdb/rocksdb/db/table_cache.h +16 -9
  96. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  97. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +2 -2
  98. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -3
  99. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
  100. package/deps/rocksdb/rocksdb/db/version_builder.cc +1 -1
  101. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -2
  102. package/deps/rocksdb/rocksdb/db/version_set.cc +379 -145
  103. package/deps/rocksdb/rocksdb/db/version_set.h +26 -24
  104. package/deps/rocksdb/rocksdb/db/version_set_test.cc +9 -9
  105. package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
  106. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +10 -2
  107. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +2 -0
  108. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -8
  109. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +5 -8
  110. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +2 -0
  111. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +71 -0
  112. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +14 -0
  113. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +23 -0
  114. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +26 -1
  115. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +105 -34
  116. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +16 -8
  117. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -0
  118. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +4 -8
  119. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -8
  120. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +282 -25
  121. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  122. package/deps/rocksdb/rocksdb/env/io_posix.cc +3 -1
  123. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +367 -177
  124. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +144 -56
  125. package/deps/rocksdb/rocksdb/file/filename.cc +3 -3
  126. package/deps/rocksdb/rocksdb/file/filename.h +4 -2
  127. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +415 -0
  128. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +2 -0
  129. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +36 -45
  130. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +21 -3
  131. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +11 -11
  132. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +15 -1
  133. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +163 -68
  134. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -12
  135. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +23 -5
  136. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +21 -17
  137. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +17 -0
  138. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +3 -3
  139. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +17 -6
  140. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +20 -0
  142. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +3 -3
  143. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -0
  144. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  145. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +3 -0
  146. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -1
  147. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +2 -1
  148. package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -2
  149. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +4 -2
  150. package/deps/rocksdb/rocksdb/monitoring/histogram.h +2 -0
  151. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +15 -1
  152. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +17 -0
  153. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +14 -3
  154. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +3 -0
  155. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +50 -0
  156. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
  157. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +31 -32
  158. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -1
  159. package/deps/rocksdb/rocksdb/options/options.cc +2 -2
  160. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -1
  161. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -0
  162. package/deps/rocksdb/rocksdb/src.mk +4 -2
  163. package/deps/rocksdb/rocksdb/table/block_based/block.h +9 -8
  164. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +110 -99
  165. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +12 -10
  166. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +11 -2
  167. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +138 -83
  168. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +25 -24
  169. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +31 -30
  170. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -13
  171. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +4 -4
  172. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -3
  173. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -3
  174. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +17 -19
  175. package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
  176. package/deps/rocksdb/rocksdb/table/format.cc +26 -29
  177. package/deps/rocksdb/rocksdb/table/format.h +44 -26
  178. package/deps/rocksdb/rocksdb/table/get_context.cc +17 -12
  179. package/deps/rocksdb/rocksdb/table/internal_iterator.h +7 -0
  180. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +4 -0
  181. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +950 -104
  182. package/deps/rocksdb/rocksdb/table/merging_iterator.h +28 -1
  183. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +3 -2
  184. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -1
  185. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +10 -9
  186. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +22 -20
  187. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +1 -1
  188. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +1 -1
  189. package/deps/rocksdb/rocksdb/table/table_builder.h +9 -21
  190. package/deps/rocksdb/rocksdb/table/table_test.cc +12 -12
  191. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +4 -4
  192. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +1 -0
  193. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +116 -34
  194. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +6 -1
  195. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +1 -1
  196. package/deps/rocksdb/rocksdb/util/autovector.h +12 -0
  197. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +3 -2
  198. package/deps/rocksdb/rocksdb/util/stderr_logger.cc +30 -0
  199. package/deps/rocksdb/rocksdb/util/stderr_logger.h +5 -18
  200. package/deps/rocksdb/rocksdb/util/timer.h +2 -3
  201. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +9 -2
  202. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +1 -1
  203. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +1 -1
  204. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +34 -53
  205. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +9 -14
  206. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -4
  207. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +4 -0
  208. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +1 -1
  209. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +4 -3
  210. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +3 -1
  211. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +26 -8
  212. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +114 -16
  213. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
  214. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +59 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +3 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +39 -0
  217. package/deps/rocksdb/rocksdb.gyp +0 -1
  218. package/index.js +6 -10
  219. package/package.json +1 -1
  220. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  221. package/prebuilds/linux-x64/node.napi.node +0 -0
  222. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +0 -168
  223. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +0 -90
@@ -336,6 +336,9 @@ class NonBatchedOpsStressTest : public StressTest {
336
336
  SharedState::ignore_read_error = false;
337
337
  }
338
338
 
339
+ std::unique_ptr<MutexLock> lock(new MutexLock(
340
+ thread->shared->GetMutexForKey(rand_column_families[0], rand_keys[0])));
341
+
339
342
  ReadOptions read_opts_copy = read_opts;
340
343
  std::string read_ts_str;
341
344
  Slice read_ts_slice;
@@ -360,9 +363,31 @@ class NonBatchedOpsStressTest : public StressTest {
360
363
  }
361
364
  // found case
362
365
  thread->stats.AddGets(1, 1);
366
+ // we only have the latest expected state
367
+ if (!FLAGS_skip_verifydb && !read_opts_copy.timestamp &&
368
+ thread->shared->Get(rand_column_families[0], rand_keys[0]) ==
369
+ SharedState::DELETION_SENTINEL) {
370
+ thread->shared->SetVerificationFailure();
371
+ fprintf(stderr,
372
+ "error : inconsistent values for key %s: Get returns %s, "
373
+ "expected state does not have the key.\n",
374
+ key.ToString(true).c_str(), StringToHex(from_db).c_str());
375
+ }
363
376
  } else if (s.IsNotFound()) {
364
377
  // not found case
365
378
  thread->stats.AddGets(1, 0);
379
+ if (!FLAGS_skip_verifydb && !read_opts_copy.timestamp) {
380
+ auto expected =
381
+ thread->shared->Get(rand_column_families[0], rand_keys[0]);
382
+ if (expected != SharedState::DELETION_SENTINEL &&
383
+ expected != SharedState::UNKNOWN_SENTINEL) {
384
+ thread->shared->SetVerificationFailure();
385
+ fprintf(stderr,
386
+ "error : inconsistent values for key %s: expected state has "
387
+ "the key, Get() returns NotFound.\n",
388
+ key.ToString(true).c_str());
389
+ }
390
+ }
366
391
  } else {
367
392
  if (error_count == 0) {
368
393
  // errors case
@@ -634,14 +659,16 @@ class NonBatchedOpsStressTest : public StressTest {
634
659
  Status TestPut(ThreadState* thread, WriteOptions& write_opts,
635
660
  const ReadOptions& read_opts,
636
661
  const std::vector<int>& rand_column_families,
637
- const std::vector<int64_t>& rand_keys, char (&value)[100],
638
- std::unique_ptr<MutexLock>& lock) override {
662
+ const std::vector<int64_t>& rand_keys,
663
+ char (&value)[100]) override {
639
664
  auto shared = thread->shared;
640
665
  int64_t max_key = shared->GetMaxKey();
641
666
  int64_t rand_key = rand_keys[0];
642
667
  int rand_column_family = rand_column_families[0];
643
668
  std::string write_ts_str;
644
669
  Slice write_ts;
670
+ std::unique_ptr<MutexLock> lock(
671
+ new MutexLock(shared->GetMutexForKey(rand_column_family, rand_key)));
645
672
  while (!shared->AllowsOverwrite(rand_key) &&
646
673
  (FLAGS_use_merge || shared->Exists(rand_column_family, rand_key))) {
647
674
  lock.reset();
@@ -736,12 +763,14 @@ class NonBatchedOpsStressTest : public StressTest {
736
763
 
737
764
  Status TestDelete(ThreadState* thread, WriteOptions& write_opts,
738
765
  const std::vector<int>& rand_column_families,
739
- const std::vector<int64_t>& rand_keys,
740
- std::unique_ptr<MutexLock>& /* lock */) override {
766
+ const std::vector<int64_t>& rand_keys) override {
741
767
  int64_t rand_key = rand_keys[0];
742
768
  int rand_column_family = rand_column_families[0];
743
769
  auto shared = thread->shared;
744
770
 
771
+ std::unique_ptr<MutexLock> lock(
772
+ new MutexLock(shared->GetMutexForKey(rand_column_family, rand_key)));
773
+
745
774
  // OPERATION delete
746
775
  std::string write_ts_str = GetNowNanos();
747
776
  Slice write_ts = write_ts_str;
@@ -833,8 +862,7 @@ class NonBatchedOpsStressTest : public StressTest {
833
862
 
834
863
  Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts,
835
864
  const std::vector<int>& rand_column_families,
836
- const std::vector<int64_t>& rand_keys,
837
- std::unique_ptr<MutexLock>& lock) override {
865
+ const std::vector<int64_t>& rand_keys) override {
838
866
  // OPERATION delete range
839
867
  std::vector<std::unique_ptr<MutexLock>> range_locks;
840
868
  // delete range does not respect disallowed overwrites. the keys for
@@ -846,16 +874,12 @@ class NonBatchedOpsStressTest : public StressTest {
846
874
  auto shared = thread->shared;
847
875
  int64_t max_key = shared->GetMaxKey();
848
876
  if (rand_key > max_key - FLAGS_range_deletion_width) {
849
- lock.reset();
850
877
  rand_key =
851
878
  thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1);
852
- range_locks.emplace_back(
853
- new MutexLock(shared->GetMutexForKey(rand_column_family, rand_key)));
854
- } else {
855
- range_locks.emplace_back(std::move(lock));
856
879
  }
857
- for (int j = 1; j < FLAGS_range_deletion_width; ++j) {
858
- if (((rand_key + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) {
880
+ for (int j = 0; j < FLAGS_range_deletion_width; ++j) {
881
+ if (j == 0 ||
882
+ ((rand_key + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) {
859
883
  range_locks.emplace_back(new MutexLock(
860
884
  shared->GetMutexForKey(rand_column_family, rand_key + j)));
861
885
  }
@@ -896,8 +920,7 @@ class NonBatchedOpsStressTest : public StressTest {
896
920
  void TestIngestExternalFile(
897
921
  ThreadState* /* thread */,
898
922
  const std::vector<int>& /* rand_column_families */,
899
- const std::vector<int64_t>& /* rand_keys */,
900
- std::unique_ptr<MutexLock>& /* lock */) override {
923
+ const std::vector<int64_t>& /* rand_keys */) override {
901
924
  assert(false);
902
925
  fprintf(stderr,
903
926
  "RocksDB lite does not support "
@@ -907,8 +930,7 @@ class NonBatchedOpsStressTest : public StressTest {
907
930
  #else
908
931
  void TestIngestExternalFile(ThreadState* thread,
909
932
  const std::vector<int>& rand_column_families,
910
- const std::vector<int64_t>& rand_keys,
911
- std::unique_ptr<MutexLock>& lock) override {
933
+ const std::vector<int64_t>& rand_keys) override {
912
934
  const std::string sst_filename =
913
935
  FLAGS_db + "/." + std::to_string(thread->tid) + ".sst";
914
936
  Status s;
@@ -938,9 +960,8 @@ class NonBatchedOpsStressTest : public StressTest {
938
960
  s.ok() && key < shared->GetMaxKey() &&
939
961
  static_cast<int32_t>(keys.size()) < FLAGS_ingest_external_file_width;
940
962
  ++key) {
941
- if (key == key_base) {
942
- range_locks.emplace_back(std::move(lock));
943
- } else if ((key & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) {
963
+ if (key == key_base ||
964
+ (key & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) {
944
965
  range_locks.emplace_back(
945
966
  new MutexLock(shared->GetMutexForKey(column_family, key)));
946
967
  }
@@ -978,6 +999,237 @@ class NonBatchedOpsStressTest : public StressTest {
978
999
  }
979
1000
  #endif // ROCKSDB_LITE
980
1001
 
1002
+ // Given a key K, this creates an iterator which scans the range
1003
+ // [K, K + FLAGS_num_iterations) forward and backward.
1004
+ // Then does a random sequence of Next/Prev operations.
1005
+ Status TestIterateAgainstExpected(
1006
+ ThreadState* thread, const ReadOptions& read_opts,
1007
+ const std::vector<int>& rand_column_families,
1008
+ const std::vector<int64_t>& rand_keys) override {
1009
+ // Lock the whole range over which we might iterate to ensure it doesn't
1010
+ // change under us.
1011
+ std::vector<std::unique_ptr<MutexLock>> range_locks;
1012
+ int64_t lb = rand_keys[0];
1013
+ int rand_column_family = rand_column_families[0];
1014
+ auto shared = thread->shared;
1015
+ int64_t max_key = shared->GetMaxKey();
1016
+ if (static_cast<uint64_t>(lb) > max_key - FLAGS_num_iterations) {
1017
+ lb = thread->rand.Next() % (max_key - FLAGS_num_iterations + 1);
1018
+ }
1019
+ for (int j = 0; j < static_cast<int>(FLAGS_num_iterations); ++j) {
1020
+ if (j == 0 || ((lb + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) {
1021
+ range_locks.emplace_back(
1022
+ new MutexLock(shared->GetMutexForKey(rand_column_family, lb + j)));
1023
+ }
1024
+ }
1025
+ int64_t ub = lb + FLAGS_num_iterations;
1026
+ // Locks acquired for [lb, ub)
1027
+ ReadOptions readoptscopy(read_opts);
1028
+ std::string read_ts_str;
1029
+ Slice read_ts;
1030
+ if (FLAGS_user_timestamp_size > 0) {
1031
+ read_ts_str = GetNowNanos();
1032
+ read_ts = read_ts_str;
1033
+ readoptscopy.timestamp = &read_ts;
1034
+ }
1035
+ readoptscopy.total_order_seek = true;
1036
+ std::string max_key_str;
1037
+ Slice max_key_slice;
1038
+ if (!FLAGS_destroy_db_initially) {
1039
+ max_key_str = Key(max_key);
1040
+ max_key_slice = max_key_str;
1041
+ // to restrict iterator from reading keys written in batched_op_stress
1042
+ // that do not have expected state updated and may not be parseable by
1043
+ // GetIntVal().
1044
+ readoptscopy.iterate_upper_bound = &max_key_slice;
1045
+ }
1046
+ auto cfh = column_families_[rand_column_family];
1047
+ std::string op_logs;
1048
+ std::unique_ptr<Iterator> iter(db_->NewIterator(readoptscopy, cfh));
1049
+
1050
+ auto check_no_key_in_range = [&](int64_t start, int64_t end) {
1051
+ for (auto j = std::max(start, lb); j < std::min(end, ub); ++j) {
1052
+ auto expected_value =
1053
+ shared->Get(rand_column_family, static_cast<int64_t>(j));
1054
+ if (expected_value != shared->DELETION_SENTINEL &&
1055
+ expected_value != shared->UNKNOWN_SENTINEL) {
1056
+ // Fail fast to preserve the DB state.
1057
+ thread->shared->SetVerificationFailure();
1058
+ if (iter->Valid()) {
1059
+ fprintf(stderr,
1060
+ "Expected state has key %s, iterator is at key %s\n",
1061
+ Slice(Key(j)).ToString(true).c_str(),
1062
+ iter->key().ToString(true).c_str());
1063
+ } else {
1064
+ fprintf(stderr, "Expected state has key %s, iterator is invalid\n",
1065
+ Slice(Key(j)).ToString(true).c_str());
1066
+ }
1067
+ fprintf(stderr, "Column family: %s, op_logs: %s\n",
1068
+ cfh->GetName().c_str(), op_logs.c_str());
1069
+ thread->stats.AddErrors(1);
1070
+ return false;
1071
+ }
1072
+ }
1073
+ return true;
1074
+ };
1075
+
1076
+ // Forward and backward scan to ensure we cover the entire range [lb, ub).
1077
+ // The random sequence Next and Prev test below tends to be very short
1078
+ // ranged.
1079
+ int64_t last_key = lb - 1;
1080
+ std::string key_str = Key(lb);
1081
+ iter->Seek(Slice(key_str));
1082
+ op_logs += "S " + Slice(key_str).ToString(true) + " ";
1083
+ uint64_t curr;
1084
+ while (true) {
1085
+ if (!iter->Valid()) {
1086
+ if (!iter->status().ok()) {
1087
+ thread->shared->SetVerificationFailure();
1088
+ fprintf(stderr, "TestIterate against expected state error: %s\n",
1089
+ iter->status().ToString().c_str());
1090
+ fprintf(stderr, "Column family: %s, op_logs: %s\n",
1091
+ cfh->GetName().c_str(), op_logs.c_str());
1092
+ thread->stats.AddErrors(1);
1093
+ return iter->status();
1094
+ }
1095
+ if (!check_no_key_in_range(last_key + 1, static_cast<int64_t>(ub))) {
1096
+ // error reported in check_no_key_in_range()
1097
+ return Status::OK();
1098
+ }
1099
+ break;
1100
+ }
1101
+ // iter is valid, the range (last_key, current key) was skipped
1102
+ GetIntVal(iter->key().ToString(), &curr);
1103
+ if (!check_no_key_in_range(last_key + 1, static_cast<int64_t>(curr))) {
1104
+ return Status::OK();
1105
+ }
1106
+ last_key = static_cast<int64_t>(curr);
1107
+ if (last_key >= ub - 1) {
1108
+ break;
1109
+ }
1110
+ iter->Next();
1111
+ op_logs += "N";
1112
+ }
1113
+
1114
+ // backward scan
1115
+ key_str = Key(ub - 1);
1116
+ iter->SeekForPrev(Slice(key_str));
1117
+ op_logs += " SFP " + Slice(key_str).ToString(true) + " ";
1118
+ last_key = ub;
1119
+ while (true) {
1120
+ if (!iter->Valid()) {
1121
+ if (!iter->status().ok()) {
1122
+ thread->shared->SetVerificationFailure();
1123
+ fprintf(stderr, "TestIterate against expected state error: %s\n",
1124
+ iter->status().ToString().c_str());
1125
+ fprintf(stderr, "Column family: %s, op_logs: %s\n",
1126
+ cfh->GetName().c_str(), op_logs.c_str());
1127
+ thread->stats.AddErrors(1);
1128
+ return iter->status();
1129
+ }
1130
+ if (!check_no_key_in_range(lb, last_key)) {
1131
+ return Status::OK();
1132
+ }
1133
+ break;
1134
+ }
1135
+ // the range (current key, last key) was skipped
1136
+ GetIntVal(iter->key().ToString(), &curr);
1137
+ if (!check_no_key_in_range(static_cast<int64_t>(curr + 1), last_key)) {
1138
+ return Status::OK();
1139
+ }
1140
+ last_key = static_cast<int64_t>(curr);
1141
+ if (last_key <= lb) {
1142
+ break;
1143
+ }
1144
+ iter->Prev();
1145
+ op_logs += "P";
1146
+ }
1147
+
1148
+ // start from middle of [lb, ub) otherwise it is easy to iterate out of
1149
+ // locked range
1150
+ int64_t mid = lb + static_cast<int64_t>(FLAGS_num_iterations / 2);
1151
+ key_str = Key(mid);
1152
+ Slice key = key_str;
1153
+ if (thread->rand.OneIn(2)) {
1154
+ iter->Seek(key);
1155
+ op_logs += " S " + key.ToString(true) + " ";
1156
+ if (!iter->Valid() && iter->status().ok()) {
1157
+ if (!check_no_key_in_range(mid, ub)) {
1158
+ return Status::OK();
1159
+ }
1160
+ }
1161
+ } else {
1162
+ iter->SeekForPrev(key);
1163
+ op_logs += " SFP " + key.ToString(true) + " ";
1164
+ if (!iter->Valid() && iter->status().ok()) {
1165
+ // iterator says nothing <= mid
1166
+ if (!check_no_key_in_range(lb, mid + 1)) {
1167
+ return Status::OK();
1168
+ }
1169
+ }
1170
+ }
1171
+
1172
+ for (uint64_t i = 0; i < FLAGS_num_iterations && iter->Valid(); i++) {
1173
+ GetIntVal(iter->key().ToString(), &curr);
1174
+ if (curr < static_cast<uint64_t>(lb)) {
1175
+ iter->Next();
1176
+ op_logs += "N";
1177
+ } else if (curr >= static_cast<uint64_t>(ub)) {
1178
+ iter->Prev();
1179
+ op_logs += "P";
1180
+ } else {
1181
+ uint32_t expected_value =
1182
+ shared->Get(rand_column_family, static_cast<int64_t>(curr));
1183
+ if (expected_value == shared->DELETION_SENTINEL) {
1184
+ // Fail fast to preserve the DB state.
1185
+ thread->shared->SetVerificationFailure();
1186
+ fprintf(stderr, "Iterator has key %s, but expected state does not.\n",
1187
+ iter->key().ToString(true).c_str());
1188
+ fprintf(stderr, "Column family: %s, op_logs: %s\n",
1189
+ cfh->GetName().c_str(), op_logs.c_str());
1190
+ thread->stats.AddErrors(1);
1191
+ break;
1192
+ }
1193
+ if (thread->rand.OneIn(2)) {
1194
+ iter->Next();
1195
+ op_logs += "N";
1196
+ if (!iter->Valid()) {
1197
+ break;
1198
+ }
1199
+ uint64_t next;
1200
+ GetIntVal(iter->key().ToString(), &next);
1201
+ if (!check_no_key_in_range(static_cast<int64_t>(curr + 1),
1202
+ static_cast<int64_t>(next))) {
1203
+ return Status::OK();
1204
+ }
1205
+ } else {
1206
+ iter->Prev();
1207
+ op_logs += "P";
1208
+ if (!iter->Valid()) {
1209
+ break;
1210
+ }
1211
+ uint64_t prev;
1212
+ GetIntVal(iter->key().ToString(), &prev);
1213
+ if (!check_no_key_in_range(static_cast<int64_t>(prev + 1),
1214
+ static_cast<int64_t>(curr))) {
1215
+ return Status::OK();
1216
+ }
1217
+ }
1218
+ }
1219
+ }
1220
+ if (!iter->status().ok()) {
1221
+ thread->shared->SetVerificationFailure();
1222
+ fprintf(stderr, "TestIterate against expected state error: %s\n",
1223
+ iter->status().ToString().c_str());
1224
+ fprintf(stderr, "Column family: %s, op_logs: %s\n",
1225
+ cfh->GetName().c_str(), op_logs.c_str());
1226
+ thread->stats.AddErrors(1);
1227
+ return iter->status();
1228
+ }
1229
+ thread->stats.AddIterations(1);
1230
+ return Status::OK();
1231
+ }
1232
+
981
1233
  bool VerifyOrSyncValue(int cf, int64_t key, const ReadOptions& /*opts*/,
982
1234
  SharedState* shared, const std::string& value_from_db,
983
1235
  const Status& s, bool strict = false) const {
@@ -1005,22 +1257,27 @@ class NonBatchedOpsStressTest : public StressTest {
1005
1257
  if (s.ok()) {
1006
1258
  char value[kValueMaxLen];
1007
1259
  if (value_base == SharedState::DELETION_SENTINEL) {
1008
- VerificationAbort(shared, "Unexpected value found", cf, key);
1260
+ VerificationAbort(shared, "Unexpected value found", cf, key,
1261
+ value_from_db, "");
1009
1262
  return false;
1010
1263
  }
1011
1264
  size_t sz = GenerateValue(value_base, value, sizeof(value));
1012
1265
  if (value_from_db.length() != sz) {
1013
- VerificationAbort(shared, "Length of value read is not equal", cf, key);
1266
+ VerificationAbort(shared, "Length of value read is not equal", cf, key,
1267
+ value_from_db, Slice(value, sz));
1014
1268
  return false;
1015
1269
  }
1016
1270
  if (memcmp(value_from_db.data(), value, sz) != 0) {
1017
- VerificationAbort(shared, "Contents of value read don't match", cf,
1018
- key);
1271
+ VerificationAbort(shared, "Contents of value read don't match", cf, key,
1272
+ value_from_db, Slice(value, sz));
1019
1273
  return false;
1020
1274
  }
1021
1275
  } else {
1022
1276
  if (value_base != SharedState::DELETION_SENTINEL) {
1023
- VerificationAbort(shared, "Value not found: " + s.ToString(), cf, key);
1277
+ char value[kValueMaxLen];
1278
+ size_t sz = GenerateValue(value_base, value, sizeof(value));
1279
+ VerificationAbort(shared, "Value not found: " + s.ToString(), cf, key,
1280
+ "", Slice(value, sz));
1024
1281
  return false;
1025
1282
  }
1026
1283
  }
@@ -1115,10 +1115,11 @@ class PosixFileSystem : public FileSystem {
1115
1115
  // Prepare the cancel request.
1116
1116
  struct io_uring_sqe* sqe;
1117
1117
  sqe = io_uring_get_sqe(iu);
1118
- // prep_cancel changed API in liburing, but we need to support both old
1119
- // and new versions so do it by hand
1120
- io_uring_prep_cancel(sqe, 0, 0);
1121
- sqe->addr = reinterpret_cast<uint64_t>(posix_handle);
1118
+
1119
+ // In order to cancel the request, sqe->addr of cancel request should
1120
+ // match with the read request submitted which is posix_handle->iov.
1121
+ io_uring_prep_cancel(sqe, &posix_handle->iov, 0);
1122
+ // Sets sqe->user_data to posix_handle.
1122
1123
  io_uring_sqe_set_data(sqe, posix_handle);
1123
1124
 
1124
1125
  // submit the request.
@@ -1146,6 +1147,7 @@ class PosixFileSystem : public FileSystem {
1146
1147
  }
1147
1148
  assert(cqe != nullptr);
1148
1149
 
1150
+ // Returns cqe->user_data.
1149
1151
  Posix_IOHandle* posix_handle =
1150
1152
  static_cast<Posix_IOHandle*>(io_uring_cqe_get_data(cqe));
1151
1153
  assert(posix_handle->iu == iu);
@@ -899,8 +899,10 @@ IOStatus PosixRandomAccessFile::ReadAsync(
899
899
  struct io_uring_sqe* sqe;
900
900
  sqe = io_uring_get_sqe(iu);
901
901
 
902
- io_uring_prep_readv(sqe, fd_, &posix_handle->iov, 1, posix_handle->offset);
902
+ io_uring_prep_readv(sqe, fd_, /*sqe->addr=*/&posix_handle->iov,
903
+ /*sqe->len=*/1, /*sqe->offset=*/posix_handle->offset);
903
904
 
905
+ // Sets sqe->user_data to posix_handle.
904
906
  io_uring_sqe_set_data(sqe, posix_handle);
905
907
 
906
908
  // Step 4: io_uring_submit