@nxtedition/rocksdb 8.1.17 → 8.2.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/binding.cc +32 -2
  2. package/binding.gyp +8 -0
  3. package/deps/liburing/liburing.gyp +20 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +4 -0
  5. package/deps/rocksdb/rocksdb/TARGETS +7 -0
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +43 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +8 -5
  8. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +1 -1
  9. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -1
  10. package/deps/rocksdb/rocksdb/cache/cache_test.cc +12 -48
  11. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +26 -18
  12. package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -62
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +119 -44
  14. package/deps/rocksdb/rocksdb/cache/clock_cache.h +34 -29
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +3 -3
  16. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -2
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +148 -209
  18. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +118 -284
  19. package/deps/rocksdb/rocksdb/cache/lru_cache.h +23 -71
  20. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +351 -392
  21. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +5 -2
  22. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +296 -0
  23. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +52 -0
  24. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +22 -19
  25. package/deps/rocksdb/rocksdb/cache/typed_cache.h +56 -20
  26. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -0
  27. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +4 -0
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +3 -3
  29. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +19 -25
  30. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +216 -0
  31. package/deps/rocksdb/rocksdb/db/c.cc +90 -1
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +8 -7
  33. package/deps/rocksdb/rocksdb/db/column_family.h +0 -6
  34. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +24 -7
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +18 -12
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +3 -1
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +245 -302
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +5 -0
  42. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +75 -15
  43. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +2 -3
  44. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -5
  45. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +91 -1
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5 -12
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +16 -4
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +47 -24
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +4 -2
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -3
  52. package/deps/rocksdb/rocksdb/db/db_iter.cc +28 -29
  53. package/deps/rocksdb/rocksdb/db/db_iter.h +0 -3
  54. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +176 -0
  55. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +391 -2
  56. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +26 -0
  57. package/deps/rocksdb/rocksdb/db/db_write_test.cc +13 -5
  58. package/deps/rocksdb/rocksdb/db/dbformat.h +3 -1
  59. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +0 -1
  60. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +0 -6
  61. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +3 -0
  62. package/deps/rocksdb/rocksdb/db/forward_iterator.h +1 -1
  63. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
  64. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +68 -40
  65. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +3 -3
  66. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +115 -0
  67. package/deps/rocksdb/rocksdb/db/internal_stats.cc +169 -72
  68. package/deps/rocksdb/rocksdb/db/internal_stats.h +36 -7
  69. package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
  70. package/deps/rocksdb/rocksdb/db/merge_helper.cc +4 -0
  71. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +151 -0
  72. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +47 -16
  73. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +10 -8
  74. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +91 -93
  75. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +1 -2
  76. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +1 -1
  77. package/deps/rocksdb/rocksdb/db/version_set.cc +30 -14
  78. package/deps/rocksdb/rocksdb/db/version_set.h +1 -0
  79. package/deps/rocksdb/rocksdb/db/write_stall_stats.cc +179 -0
  80. package/deps/rocksdb/rocksdb/db/write_stall_stats.h +47 -0
  81. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +109 -7
  82. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +147 -12
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +31 -0
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -0
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -1
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +42 -59
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +7 -4
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +7 -0
  89. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +6 -10
  90. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +6 -0
  91. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -0
  92. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +127 -36
  93. package/deps/rocksdb/rocksdb/env/fs_posix.cc +8 -0
  94. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +35 -0
  95. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +29 -8
  96. package/deps/rocksdb/rocksdb/file/file_util.cc +14 -10
  97. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +183 -63
  98. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +159 -66
  99. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +3 -1
  100. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +52 -5
  101. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -3
  102. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +134 -73
  103. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +46 -3
  104. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -0
  105. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +0 -6
  106. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +7 -0
  107. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -2
  108. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +6 -1
  109. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +3 -3
  110. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +18 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +28 -0
  112. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  113. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +39 -0
  114. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
  115. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +9 -1
  116. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -2
  117. package/deps/rocksdb/rocksdb/port/stack_trace.cc +17 -7
  118. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -0
  119. package/deps/rocksdb/rocksdb/src.mk +4 -0
  120. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -34
  121. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +11 -12
  122. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +5 -5
  123. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +126 -132
  124. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +16 -16
  125. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +0 -16
  126. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +1 -1
  127. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  128. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -4
  129. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
  130. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
  131. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +370 -0
  132. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +44 -0
  133. package/deps/rocksdb/rocksdb/table/get_context.cc +4 -2
  134. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +555 -267
  135. package/deps/rocksdb/rocksdb/table/merging_iterator.h +10 -5
  136. package/deps/rocksdb/rocksdb/table/table_test.cc +113 -70
  137. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +96 -0
  138. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +117 -0
  139. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +5 -3
  140. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +3 -3
  141. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +1 -1
  142. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +9 -2
  143. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -1
  144. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +11 -0
  145. package/deps/rocksdb/rocksdb.gyp +7 -1
  146. package/package.json +1 -1
  147. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -268,6 +268,110 @@ class BatchedOpsStressTest : public StressTest {
268
268
  return ret_status;
269
269
  }
270
270
 
271
+ void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts,
272
+ const std::vector<int>& rand_column_families,
273
+ const std::vector<int64_t>& rand_keys) override {
274
+ assert(thread);
275
+
276
+ ManagedSnapshot snapshot_guard(db_);
277
+
278
+ ReadOptions read_opts_copy(read_opts);
279
+ read_opts_copy.snapshot = snapshot_guard.snapshot();
280
+
281
+ assert(!rand_keys.empty());
282
+
283
+ const std::string key_suffix = Key(rand_keys[0]);
284
+
285
+ assert(!rand_column_families.empty());
286
+ assert(rand_column_families[0] >= 0);
287
+ assert(rand_column_families[0] < static_cast<int>(column_families_.size()));
288
+
289
+ ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]];
290
+ assert(cfh);
291
+
292
+ constexpr size_t num_keys = 10;
293
+
294
+ std::array<PinnableWideColumns, num_keys> results;
295
+
296
+ for (size_t i = 0; i < num_keys; ++i) {
297
+ const std::string key = std::to_string(i) + key_suffix;
298
+
299
+ const Status s = db_->GetEntity(read_opts_copy, cfh, key, &results[i]);
300
+
301
+ if (!s.ok() && !s.IsNotFound()) {
302
+ fprintf(stderr, "GetEntity error: %s\n", s.ToString().c_str());
303
+ thread->stats.AddErrors(1);
304
+ } else if (s.IsNotFound()) {
305
+ thread->stats.AddGets(1, 0);
306
+ } else {
307
+ thread->stats.AddGets(1, 1);
308
+ }
309
+ }
310
+
311
+ // Compare columns ignoring the last character of column values
312
+ auto compare = [](const WideColumns& lhs, const WideColumns& rhs) {
313
+ if (lhs.size() != rhs.size()) {
314
+ return false;
315
+ }
316
+
317
+ for (size_t i = 0; i < lhs.size(); ++i) {
318
+ if (lhs[i].name() != rhs[i].name()) {
319
+ return false;
320
+ }
321
+
322
+ if (lhs[i].value().size() != rhs[i].value().size()) {
323
+ return false;
324
+ }
325
+
326
+ if (lhs[i].value().difference_offset(rhs[i].value()) <
327
+ lhs[i].value().size() - 1) {
328
+ return false;
329
+ }
330
+ }
331
+
332
+ return true;
333
+ };
334
+
335
+ for (size_t i = 0; i < num_keys; ++i) {
336
+ const WideColumns& columns = results[i].columns();
337
+
338
+ if (!compare(results[0].columns(), columns)) {
339
+ fprintf(stderr,
340
+ "GetEntity error: inconsistent entities for key %s: %s, %s\n",
341
+ StringToHex(key_suffix).c_str(),
342
+ WideColumnsToHex(results[0].columns()).c_str(),
343
+ WideColumnsToHex(columns).c_str());
344
+ }
345
+
346
+ if (!columns.empty()) {
347
+ // The last character of each column value should be 'i' as a decimal
348
+ // digit
349
+ const char expected = static_cast<char>('0' + i);
350
+
351
+ for (const auto& column : columns) {
352
+ const Slice& value = column.value();
353
+
354
+ if (value.empty() || value[value.size() - 1] != expected) {
355
+ fprintf(stderr,
356
+ "GetEntity error: incorrect column value for key "
357
+ "%s, entity %s, column value %s, expected %c\n",
358
+ StringToHex(key_suffix).c_str(),
359
+ WideColumnsToHex(columns).c_str(),
360
+ value.ToString(/* hex */ true).c_str(), expected);
361
+ }
362
+ }
363
+
364
+ if (!VerifyWideColumns(columns)) {
365
+ fprintf(
366
+ stderr,
367
+ "GetEntity error: inconsistent columns for key %s, entity %s\n",
368
+ StringToHex(key_suffix).c_str(),
369
+ WideColumnsToHex(columns).c_str());
370
+ }
371
+ }
372
+ }
373
+ }
374
+
271
375
  // Given a key, this does prefix scans for "0"+P, "1"+P, ..., "9"+P
272
376
  // in the same snapshot where P is the first FLAGS_prefix_size - 1 bytes
273
377
  // of the key. Each of these 10 scans returns a series of values;
@@ -357,16 +461,14 @@ class BatchedOpsStressTest : public StressTest {
357
461
  }
358
462
 
359
463
  // make sure value() and columns() are consistent
360
- const WideColumns expected_columns = GenerateExpectedWideColumns(
361
- GetValueBase(iters[i]->value()), iters[i]->value());
362
- if (iters[i]->columns() != expected_columns) {
464
+ if (!VerifyWideColumns(iters[i]->value(), iters[i]->columns())) {
363
465
  fprintf(stderr,
364
466
  "prefix scan error : %" ROCKSDB_PRIszt
365
- ", value and columns inconsistent for prefix %s: %s\n",
467
+ ", value and columns inconsistent for prefix %s: value: %s, "
468
+ "columns: %s\n",
366
469
  i, prefix_slices[i].ToString(/* hex */ true).c_str(),
367
- DebugString(iters[i]->value(), iters[i]->columns(),
368
- expected_columns)
369
- .c_str());
470
+ iters[i]->value().ToString(/* hex */ true).c_str(),
471
+ WideColumnsToHex(iters[i]->columns()).c_str());
370
472
  }
371
473
 
372
474
  iters[i]->Next();
@@ -251,6 +251,146 @@ class CfConsistencyStressTest : public StressTest {
251
251
  return statuses;
252
252
  }
253
253
 
254
+ void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts,
255
+ const std::vector<int>& rand_column_families,
256
+ const std::vector<int64_t>& rand_keys) override {
257
+ assert(thread);
258
+ assert(!rand_column_families.empty());
259
+ assert(!rand_keys.empty());
260
+
261
+ const std::string key = Key(rand_keys[0]);
262
+
263
+ Status s;
264
+ bool is_consistent = true;
265
+
266
+ if (thread->rand.OneIn(2)) {
267
+ // With a 1/2 chance, do a random read from a random CF
268
+ const size_t cf_id = thread->rand.Next() % rand_column_families.size();
269
+
270
+ assert(rand_column_families[cf_id] >= 0);
271
+ assert(rand_column_families[cf_id] <
272
+ static_cast<int>(column_families_.size()));
273
+
274
+ ColumnFamilyHandle* const cfh =
275
+ column_families_[rand_column_families[cf_id]];
276
+ assert(cfh);
277
+
278
+ PinnableWideColumns result;
279
+ s = db_->GetEntity(read_opts, cfh, key, &result);
280
+
281
+ if (s.ok()) {
282
+ if (!VerifyWideColumns(result.columns())) {
283
+ fprintf(
284
+ stderr,
285
+ "GetEntity error: inconsistent columns for key %s, entity %s\n",
286
+ StringToHex(key).c_str(),
287
+ WideColumnsToHex(result.columns()).c_str());
288
+ is_consistent = false;
289
+ }
290
+ }
291
+ } else {
292
+ // With a 1/2 chance, compare one key across all CFs
293
+ ManagedSnapshot snapshot_guard(db_);
294
+
295
+ ReadOptions read_opts_copy = read_opts;
296
+ read_opts_copy.snapshot = snapshot_guard.snapshot();
297
+
298
+ assert(rand_column_families[0] >= 0);
299
+ assert(rand_column_families[0] <
300
+ static_cast<int>(column_families_.size()));
301
+
302
+ PinnableWideColumns cmp_result;
303
+ s = db_->GetEntity(read_opts_copy,
304
+ column_families_[rand_column_families[0]], key,
305
+ &cmp_result);
306
+
307
+ if (s.ok() || s.IsNotFound()) {
308
+ const bool cmp_found = s.ok();
309
+
310
+ if (cmp_found) {
311
+ if (!VerifyWideColumns(cmp_result.columns())) {
312
+ fprintf(stderr,
313
+ "GetEntity error: inconsistent columns for key %s, "
314
+ "entity %s\n",
315
+ StringToHex(key).c_str(),
316
+ WideColumnsToHex(cmp_result.columns()).c_str());
317
+ is_consistent = false;
318
+ }
319
+ }
320
+
321
+ if (is_consistent) {
322
+ for (size_t i = 1; i < rand_column_families.size(); ++i) {
323
+ assert(rand_column_families[i] >= 0);
324
+ assert(rand_column_families[i] <
325
+ static_cast<int>(column_families_.size()));
326
+
327
+ PinnableWideColumns result;
328
+ s = db_->GetEntity(read_opts_copy,
329
+ column_families_[rand_column_families[i]], key,
330
+ &result);
331
+
332
+ if (!s.ok() && !s.IsNotFound()) {
333
+ break;
334
+ }
335
+
336
+ const bool found = s.ok();
337
+
338
+ assert(!column_family_names_.empty());
339
+ assert(i < column_family_names_.size());
340
+
341
+ if (!cmp_found && found) {
342
+ fprintf(stderr,
343
+ "GetEntity returns different results for key %s: CF %s "
344
+ "returns not found, CF %s returns entity %s\n",
345
+ StringToHex(key).c_str(), column_family_names_[0].c_str(),
346
+ column_family_names_[i].c_str(),
347
+ WideColumnsToHex(result.columns()).c_str());
348
+ is_consistent = false;
349
+ break;
350
+ }
351
+
352
+ if (cmp_found && !found) {
353
+ fprintf(stderr,
354
+ "GetEntity returns different results for key %s: CF %s "
355
+ "returns entity %s, CF %s returns not found\n",
356
+ StringToHex(key).c_str(), column_family_names_[0].c_str(),
357
+ WideColumnsToHex(cmp_result.columns()).c_str(),
358
+ column_family_names_[i].c_str());
359
+ is_consistent = false;
360
+ break;
361
+ }
362
+
363
+ if (found && result != cmp_result) {
364
+ fprintf(stderr,
365
+ "GetEntity returns different results for key %s: CF %s "
366
+ "returns entity %s, CF %s returns entity %s\n",
367
+ StringToHex(key).c_str(), column_family_names_[0].c_str(),
368
+ WideColumnsToHex(cmp_result.columns()).c_str(),
369
+ column_family_names_[i].c_str(),
370
+ WideColumnsToHex(result.columns()).c_str());
371
+ is_consistent = false;
372
+ break;
373
+ }
374
+ }
375
+ }
376
+ }
377
+ }
378
+
379
+ if (!is_consistent) {
380
+ fprintf(stderr, "TestGetEntity error: results are not consistent\n");
381
+ thread->stats.AddErrors(1);
382
+ // Fail fast to preserve the DB state.
383
+ thread->shared->SetVerificationFailure();
384
+ } else if (s.ok()) {
385
+ thread->stats.AddGets(1, 1);
386
+ } else if (s.IsNotFound()) {
387
+ thread->stats.AddGets(1, 0);
388
+ } else {
389
+ fprintf(stderr, "TestGetEntity error: %s\n", s.ToString().c_str());
390
+ thread->stats.AddErrors(1);
391
+ }
392
+ }
393
+
254
394
  Status TestPrefixScan(ThreadState* thread, const ReadOptions& readoptions,
255
395
  const std::vector<int>& rand_column_families,
256
396
  const std::vector<int64_t>& rand_keys) override {
@@ -290,12 +430,9 @@ class CfConsistencyStressTest : public StressTest {
290
430
  iter->Next()) {
291
431
  ++count;
292
432
 
293
- const WideColumns expected_columns = GenerateExpectedWideColumns(
294
- GetValueBase(iter->value()), iter->value());
295
- if (iter->columns() != expected_columns) {
296
- s = Status::Corruption(
297
- "Value and columns inconsistent",
298
- DebugString(iter->value(), iter->columns(), expected_columns));
433
+ if (!VerifyWideColumns(iter->value(), iter->columns())) {
434
+ s = Status::Corruption("Value and columns inconsistent",
435
+ DebugString(iter->value(), iter->columns()));
299
436
  break;
300
437
  }
301
438
  }
@@ -372,12 +509,10 @@ class CfConsistencyStressTest : public StressTest {
372
509
  assert(iter);
373
510
 
374
511
  if (iter->Valid()) {
375
- const WideColumns expected_columns = GenerateExpectedWideColumns(
376
- GetValueBase(iter->value()), iter->value());
377
- if (iter->columns() != expected_columns) {
378
- statuses[i] = Status::Corruption(
379
- "Value and columns inconsistent",
380
- DebugString(iter->value(), iter->columns(), expected_columns));
512
+ if (!VerifyWideColumns(iter->value(), iter->columns())) {
513
+ statuses[i] =
514
+ Status::Corruption("Value and columns inconsistent",
515
+ DebugString(iter->value(), iter->columns()));
381
516
  } else {
382
517
  ++valid_cnt;
383
518
  }
@@ -278,6 +278,37 @@ WideColumns GenerateExpectedWideColumns(uint32_t value_base,
278
278
  return columns;
279
279
  }
280
280
 
281
+ bool VerifyWideColumns(const Slice& value, const WideColumns& columns) {
282
+ if (value.size() < sizeof(uint32_t)) {
283
+ return false;
284
+ }
285
+
286
+ const uint32_t value_base = GetValueBase(value);
287
+
288
+ const WideColumns expected_columns =
289
+ GenerateExpectedWideColumns(value_base, value);
290
+
291
+ if (columns != expected_columns) {
292
+ return false;
293
+ }
294
+
295
+ return true;
296
+ }
297
+
298
+ bool VerifyWideColumns(const WideColumns& columns) {
299
+ if (columns.empty()) {
300
+ return false;
301
+ }
302
+
303
+ if (columns.front().name() != kDefaultWideColumnName) {
304
+ return false;
305
+ }
306
+
307
+ const Slice& value_of_default = columns.front().value();
308
+
309
+ return VerifyWideColumns(value_of_default, columns);
310
+ }
311
+
281
312
  std::string GetNowNanos() {
282
313
  uint64_t t = db_stress_env->NowNanos();
283
314
  std::string ret;
@@ -213,6 +213,7 @@ DECLARE_bool(compare_full_db_state_snapshot);
213
213
  DECLARE_uint64(snapshot_hold_ops);
214
214
  DECLARE_bool(long_running_snapshots);
215
215
  DECLARE_bool(use_multiget);
216
+ DECLARE_bool(use_get_entity);
216
217
  DECLARE_int32(readpercent);
217
218
  DECLARE_int32(prefixpercent);
218
219
  DECLARE_int32(writepercent);
@@ -321,6 +322,7 @@ DECLARE_uint64(readahead_size);
321
322
  DECLARE_uint64(initial_auto_readahead_size);
322
323
  DECLARE_uint64(max_auto_readahead_size);
323
324
  DECLARE_uint64(num_file_reads_for_auto_readahead);
325
+ DECLARE_bool(use_io_uring);
324
326
 
325
327
  constexpr long KB = 1024;
326
328
  constexpr int kRandomValueMaxFactor = 3;
@@ -595,6 +597,24 @@ extern inline std::string StringToHex(const std::string& str) {
595
597
  return result;
596
598
  }
597
599
 
600
+ inline std::string WideColumnsToHex(const WideColumns& columns) {
601
+ if (columns.empty()) {
602
+ return std::string();
603
+ }
604
+
605
+ std::ostringstream oss;
606
+
607
+ oss << std::hex;
608
+
609
+ auto it = columns.begin();
610
+ oss << *it;
611
+ for (++it; it != columns.end(); ++it) {
612
+ oss << ' ' << *it;
613
+ }
614
+
615
+ return oss.str();
616
+ }
617
+
598
618
  // Unified output format for double parameters
599
619
  extern inline std::string FormatDoubleParam(double param) {
600
620
  return std::to_string(param);
@@ -625,6 +645,8 @@ extern uint32_t GetValueBase(Slice s);
625
645
  extern WideColumns GenerateWideColumns(uint32_t value_base, const Slice& slice);
626
646
  extern WideColumns GenerateExpectedWideColumns(uint32_t value_base,
627
647
  const Slice& slice);
648
+ extern bool VerifyWideColumns(const Slice& value, const WideColumns& columns);
649
+ extern bool VerifyWideColumns(const WideColumns& columns);
628
650
 
629
651
  extern StressTest* CreateCfConsistencyStressTest();
630
652
  extern StressTest* CreateBatchedOpsStressTest();
@@ -747,6 +747,8 @@ DEFINE_bool(long_running_snapshots, false,
747
747
  DEFINE_bool(use_multiget, false,
748
748
  "If set, use the batched MultiGet API for reads");
749
749
 
750
+ DEFINE_bool(use_get_entity, false, "If set, use the GetEntity API for reads");
751
+
750
752
  static bool ValidateInt32Percent(const char* flagname, int32_t value) {
751
753
  if (value < 0 || value > 100) {
752
754
  fprintf(stderr, "Invalid value for --%s: %d, 0<= pct <=100 \n", flagname,
@@ -1073,6 +1075,7 @@ DEFINE_uint64(stats_dump_period_sec,
1073
1075
  ROCKSDB_NAMESPACE::Options().stats_dump_period_sec,
1074
1076
  "Gap between printing stats to log in seconds");
1075
1077
 
1076
- extern "C" bool RocksDbIOUringEnable() { return true; }
1078
+ DEFINE_bool(use_io_uring, false, "Enable the use of IO uring on Posix");
1079
+ extern "C" bool RocksDbIOUringEnable() { return FLAGS_use_io_uring; }
1077
1080
 
1078
1081
  #endif // GFLAGS
@@ -109,36 +109,37 @@ std::shared_ptr<Cache> StressTest::NewCache(size_t capacity,
109
109
  return nullptr;
110
110
  }
111
111
 
112
+ std::shared_ptr<SecondaryCache> secondary_cache;
113
+ if (!FLAGS_secondary_cache_uri.empty()) {
114
+ Status s = SecondaryCache::CreateFromString(
115
+ config_options, FLAGS_secondary_cache_uri, &secondary_cache);
116
+ if (secondary_cache == nullptr) {
117
+ fprintf(stderr,
118
+ "No secondary cache registered matching string: %s status=%s\n",
119
+ FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str());
120
+ exit(1);
121
+ }
122
+ if (FLAGS_secondary_cache_fault_one_in > 0) {
123
+ secondary_cache = std::make_shared<FaultInjectionSecondaryCache>(
124
+ secondary_cache, static_cast<uint32_t>(FLAGS_seed),
125
+ FLAGS_secondary_cache_fault_one_in);
126
+ }
127
+ }
128
+
112
129
  if (FLAGS_cache_type == "clock_cache") {
113
130
  fprintf(stderr, "Old clock cache implementation has been removed.\n");
114
131
  exit(1);
115
132
  } else if (FLAGS_cache_type == "hyper_clock_cache") {
116
- return HyperClockCacheOptions(static_cast<size_t>(capacity),
117
- FLAGS_block_size /*estimated_entry_charge*/,
118
- num_shard_bits)
119
- .MakeSharedCache();
133
+ HyperClockCacheOptions opts(static_cast<size_t>(capacity),
134
+ FLAGS_block_size /*estimated_entry_charge*/,
135
+ num_shard_bits);
136
+ opts.secondary_cache = std::move(secondary_cache);
137
+ return opts.MakeSharedCache();
120
138
  } else if (FLAGS_cache_type == "lru_cache") {
121
139
  LRUCacheOptions opts;
122
140
  opts.capacity = capacity;
123
141
  opts.num_shard_bits = num_shard_bits;
124
- std::shared_ptr<SecondaryCache> secondary_cache;
125
- if (!FLAGS_secondary_cache_uri.empty()) {
126
- Status s = SecondaryCache::CreateFromString(
127
- config_options, FLAGS_secondary_cache_uri, &secondary_cache);
128
- if (secondary_cache == nullptr) {
129
- fprintf(stderr,
130
- "No secondary cache registered matching string: %s status=%s\n",
131
- FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str());
132
- exit(1);
133
- }
134
- if (FLAGS_secondary_cache_fault_one_in > 0) {
135
- secondary_cache = std::make_shared<FaultInjectionSecondaryCache>(
136
- secondary_cache, static_cast<uint32_t>(FLAGS_seed),
137
- FLAGS_secondary_cache_fault_one_in);
138
- }
139
- opts.secondary_cache = secondary_cache;
140
- }
141
-
142
+ opts.secondary_cache = std::move(secondary_cache);
142
143
  return NewLRUCache(opts);
143
144
  } else {
144
145
  fprintf(stderr, "Cache type not supported.");
@@ -429,47 +430,27 @@ void StressTest::VerificationAbort(SharedState* shared, std::string msg, int cf,
429
430
 
430
431
  void StressTest::VerificationAbort(SharedState* shared, int cf, int64_t key,
431
432
  const Slice& value,
432
- const WideColumns& columns,
433
- const WideColumns& expected_columns) const {
433
+ const WideColumns& columns) const {
434
434
  assert(shared);
435
435
 
436
436
  auto key_str = Key(key);
437
437
 
438
438
  fprintf(stderr,
439
439
  "Verification failed for column family %d key %s (%" PRIi64
440
- "): Value and columns inconsistent: %s\n",
440
+ "): Value and columns inconsistent: value: %s, columns: %s\n",
441
441
  cf, Slice(key_str).ToString(/* hex */ true).c_str(), key,
442
- DebugString(value, columns, expected_columns).c_str());
442
+ value.ToString(/* hex */ true).c_str(),
443
+ WideColumnsToHex(columns).c_str());
443
444
 
444
445
  shared->SetVerificationFailure();
445
446
  }
446
447
 
447
448
  std::string StressTest::DebugString(const Slice& value,
448
- const WideColumns& columns,
449
- const WideColumns& expected_columns) {
449
+ const WideColumns& columns) {
450
450
  std::ostringstream oss;
451
451
 
452
- oss << "value: " << value.ToString(/* hex */ true);
453
-
454
- auto dump = [](const WideColumns& cols, std::ostream& os) {
455
- if (cols.empty()) {
456
- return;
457
- }
458
-
459
- os << std::hex;
460
-
461
- auto it = cols.begin();
462
- os << *it;
463
- for (++it; it != cols.end(); ++it) {
464
- os << ' ' << *it;
465
- }
466
- };
467
-
468
- oss << ", columns: ";
469
- dump(columns, oss);
470
-
471
- oss << ", expected_columns: ";
472
- dump(expected_columns, oss);
452
+ oss << "value: " << value.ToString(/* hex */ true)
453
+ << ", columns: " << WideColumnsToHex(columns);
473
454
 
474
455
  return oss.str();
475
456
  }
@@ -1004,7 +985,9 @@ void StressTest::OperateDb(ThreadState* thread) {
1004
985
  if (prob_op >= 0 && prob_op < static_cast<int>(FLAGS_readpercent)) {
1005
986
  assert(0 <= prob_op);
1006
987
  // OPERATION read
1007
- if (FLAGS_use_multiget) {
988
+ if (FLAGS_use_get_entity) {
989
+ TestGetEntity(thread, read_opts, rand_column_families, rand_keys);
990
+ } else if (FLAGS_use_multiget) {
1008
991
  // Leave room for one more iteration of the loop with a single key
1009
992
  // batch. This is to ensure that each thread does exactly the same
1010
993
  // number of ops
@@ -1491,12 +1474,12 @@ void StressTest::VerifyIterator(ThreadState* thread,
1491
1474
  }
1492
1475
 
1493
1476
  if (!*diverged && iter->Valid()) {
1494
- const WideColumns expected_columns =
1495
- GenerateExpectedWideColumns(GetValueBase(iter->value()), iter->value());
1496
- if (iter->columns() != expected_columns) {
1497
- fprintf(stderr, "Value and columns inconsistent for iterator: %s\n",
1498
- DebugString(iter->value(), iter->columns(), expected_columns)
1499
- .c_str());
1477
+ if (!VerifyWideColumns(iter->value(), iter->columns())) {
1478
+ fprintf(stderr,
1479
+ "Value and columns inconsistent for iterator: value: %s, "
1480
+ "columns: %s\n",
1481
+ iter->value().ToString(/* hex */ true).c_str(),
1482
+ WideColumnsToHex(iter->columns()).c_str());
1500
1483
 
1501
1484
  *diverged = true;
1502
1485
  }
@@ -2402,6 +2385,8 @@ void StressTest::PrintEnv() const {
2402
2385
  FLAGS_subcompactions);
2403
2386
  fprintf(stdout, "Use MultiGet : %s\n",
2404
2387
  FLAGS_use_multiget ? "true" : "false");
2388
+ fprintf(stdout, "Use GetEntity : %s\n",
2389
+ FLAGS_use_get_entity ? "true" : "false");
2405
2390
 
2406
2391
  const char* memtablerep = "";
2407
2392
  switch (FLAGS_rep_factory) {
@@ -2901,9 +2886,7 @@ void StressTest::MaybeUseOlderTimestampForRangeScan(ThreadState* thread,
2901
2886
  read_opts.timestamp = &ts_slice;
2902
2887
 
2903
2888
  // TODO (yanqin): support Merge with iter_start_ts
2904
- // TODO (yuzhangyu): support BlobDB with iter_start_ts
2905
- if (!thread->rand.OneInOpt(3) || FLAGS_use_merge || FLAGS_use_full_merge_v1 ||
2906
- FLAGS_enable_blob_files) {
2889
+ if (!thread->rand.OneInOpt(3) || FLAGS_use_merge || FLAGS_use_full_merge_v1) {
2907
2890
  return;
2908
2891
  }
2909
2892
 
@@ -94,6 +94,10 @@ class StressTest {
94
94
  const std::vector<int>& rand_column_families,
95
95
  const std::vector<int64_t>& rand_keys) = 0;
96
96
 
97
+ virtual void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts,
98
+ const std::vector<int>& rand_column_families,
99
+ const std::vector<int64_t>& rand_keys) = 0;
100
+
97
101
  virtual Status TestPrefixScan(ThreadState* thread,
98
102
  const ReadOptions& read_opts,
99
103
  const std::vector<int>& rand_column_families,
@@ -221,11 +225,10 @@ class StressTest {
221
225
  Slice value_from_expected) const;
222
226
 
223
227
  void VerificationAbort(SharedState* shared, int cf, int64_t key,
224
- const Slice& value, const WideColumns& columns,
225
- const WideColumns& expected_columns) const;
228
+ const Slice& value, const WideColumns& columns) const;
226
229
 
227
- static std::string DebugString(const Slice& value, const WideColumns& columns,
228
- const WideColumns& expected_columns);
230
+ static std::string DebugString(const Slice& value,
231
+ const WideColumns& columns);
229
232
 
230
233
  void PrintEnv() const;
231
234
 
@@ -99,6 +99,13 @@ int db_stress_tool(int argc, char** argv) {
99
99
 
100
100
  env_wrapper_guard = std::make_shared<CompositeEnvWrapper>(
101
101
  raw_env, std::make_shared<DbStressFSWrapper>(raw_env->GetFileSystem()));
102
+ if (!env_opts && !FLAGS_use_io_uring) {
103
+ // If using the default Env (Posix), wrap DbStressEnvWrapper with the
104
+ // legacy EnvWrapper. This is a workaround to prevent MultiGet and scans
105
+ // from failing when IO uring is disabled. The EnvWrapper
106
+ // has a default implementation of ReadAsync that redirects to Read.
107
+ env_wrapper_guard = std::make_shared<EnvWrapper>(env_wrapper_guard);
108
+ }
102
109
  db_stress_env = env_wrapper_guard.get();
103
110
 
104
111
  FLAGS_rep_factory = StringToRepFactory(FLAGS_memtablerep.c_str());
@@ -416,16 +416,7 @@ class ExpectedStateTraceRecordHandler : public TraceRecord::Handler,
416
416
  entity.ToString(/* hex */ true));
417
417
  }
418
418
 
419
- if (columns.empty() || columns[0].name() != kDefaultWideColumnName) {
420
- return Status::Corruption("Cannot find default column in entity",
421
- entity.ToString(/* hex */ true));
422
- }
423
-
424
- const Slice& value_of_default = columns[0].value();
425
-
426
- const uint32_t value_base = GetValueBase(value_of_default);
427
-
428
- if (columns != GenerateExpectedWideColumns(value_base, value_of_default)) {
419
+ if (!VerifyWideColumns(columns)) {
429
420
  return Status::Corruption("Wide columns in entity inconsistent",
430
421
  entity.ToString(/* hex */ true));
431
422
  }
@@ -435,6 +426,11 @@ class ExpectedStateTraceRecordHandler : public TraceRecord::Handler,
435
426
  column_family_id, key, columns);
436
427
  }
437
428
 
429
+ assert(!columns.empty());
430
+ assert(columns.front().name() == kDefaultWideColumnName);
431
+
432
+ const uint32_t value_base = GetValueBase(columns.front().value());
433
+
438
434
  state_->Put(column_family_id, static_cast<int64_t>(key_id), value_base,
439
435
  false /* pending */);
440
436
 
@@ -387,6 +387,12 @@ std::vector<Status> MultiOpsTxnsStressTest::TestMultiGet(
387
387
  return std::vector<Status>{Status::NotSupported()};
388
388
  }
389
389
 
390
+ // Wide columns are currently not supported by transactions.
391
+ void MultiOpsTxnsStressTest::TestGetEntity(
392
+ ThreadState* /* thread */, const ReadOptions& /* read_opts */,
393
+ const std::vector<int>& /* rand_column_families */,
394
+ const std::vector<int64_t>& /* rand_keys */) {}
395
+
390
396
  Status MultiOpsTxnsStressTest::TestPrefixScan(
391
397
  ThreadState* thread, const ReadOptions& read_opts,
392
398
  const std::vector<int>& rand_column_families,
@@ -210,6 +210,10 @@ class MultiOpsTxnsStressTest : public StressTest {
210
210
  const std::vector<int>& rand_column_families,
211
211
  const std::vector<int64_t>& rand_keys) override;
212
212
 
213
+ void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts,
214
+ const std::vector<int>& rand_column_families,
215
+ const std::vector<int64_t>& rand_keys) override;
216
+
213
217
  Status TestPrefixScan(ThreadState* thread, const ReadOptions& read_opts,
214
218
  const std::vector<int>& rand_column_families,
215
219
  const std::vector<int64_t>& rand_keys) override;