@nxtedition/rocksdb 11.0.3 → 11.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/binding.cc +133 -122
  2. package/deps/rocksdb/rocksdb/db/column_family_test.cc +15 -7
  3. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  4. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -4
  5. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +11 -7
  6. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +17 -11
  7. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +15 -0
  8. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +155 -0
  9. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +564 -461
  10. package/deps/rocksdb/rocksdb/db/db_follower_test.cc +8 -4
  11. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +40 -24
  12. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +8 -1
  13. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -4
  14. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  15. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -1
  16. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +19 -1
  17. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +20 -16
  18. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +27 -0
  19. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +10 -2
  20. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +85 -0
  21. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +55 -2
  22. package/deps/rocksdb/rocksdb/db/db_test2.cc +231 -0
  23. package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
  24. package/deps/rocksdb/rocksdb/db/db_test_util.h +10 -1
  25. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +0 -1
  26. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +175 -1
  27. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +64 -0
  28. package/deps/rocksdb/rocksdb/db/dbformat.h +5 -6
  29. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +8 -8
  30. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  31. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2 -4
  32. package/deps/rocksdb/rocksdb/db/flush_job.cc +7 -2
  33. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +4 -2
  34. package/deps/rocksdb/rocksdb/db/listener_test.cc +5 -5
  35. package/deps/rocksdb/rocksdb/db/log_writer.cc +12 -3
  36. package/deps/rocksdb/rocksdb/db/memtable.cc +83 -23
  37. package/deps/rocksdb/rocksdb/db/memtable.h +11 -3
  38. package/deps/rocksdb/rocksdb/db/memtable_list.cc +7 -5
  39. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +21 -0
  40. package/deps/rocksdb/rocksdb/db/version_builder.cc +462 -33
  41. package/deps/rocksdb/rocksdb/db/version_builder.h +70 -23
  42. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +95 -207
  43. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +54 -35
  44. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -11
  45. package/deps/rocksdb/rocksdb/db/version_set_test.cc +313 -59
  46. package/deps/rocksdb/rocksdb/db/write_batch.cc +124 -64
  47. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -3
  48. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +1 -1
  49. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +4 -1
  50. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
  51. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +4 -32
  52. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -3
  53. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +60 -172
  54. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +57 -2
  55. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +23 -15
  56. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +2 -3
  57. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +1 -1
  58. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +4 -1
  59. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +200 -92
  60. package/deps/rocksdb/rocksdb/env/file_system.cc +3 -3
  61. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +124 -23
  62. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +61 -8
  63. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +141 -2
  64. package/deps/rocksdb/rocksdb/file/file_util.cc +17 -2
  65. package/deps/rocksdb/rocksdb/file/file_util.h +10 -0
  66. package/deps/rocksdb/rocksdb/file/filename.cc +11 -3
  67. package/deps/rocksdb/rocksdb/file/filename.h +2 -1
  68. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +18 -0
  69. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +27 -4
  70. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +8 -1
  71. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +8 -13
  72. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +4 -0
  73. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +5 -0
  74. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -2
  75. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +2 -1
  76. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +34 -0
  77. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -1
  78. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  79. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +27 -9
  80. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +2 -0
  81. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +12 -0
  82. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  83. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  84. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +29 -1
  85. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +102 -33
  86. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +46 -3
  87. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +4 -0
  88. package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -0
  89. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  90. package/deps/rocksdb/rocksdb/options/db_options.cc +15 -1
  91. package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
  92. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -0
  93. package/deps/rocksdb/rocksdb/options/options_parser.cc +3 -2
  94. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -2
  95. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +75 -35
  96. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
  97. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +4 -0
  98. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +8 -1
  99. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +40 -15
  100. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +98 -17
  101. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +14 -2
  102. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +21 -91
  103. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +13 -21
  104. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +14 -5
  105. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +62 -53
  106. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +60 -38
  107. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +175 -78
  108. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +65 -36
  109. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +25 -15
  110. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +13 -1
  111. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +18 -4
  112. package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
  113. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -0
  114. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2 -2
  115. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +47 -18
  116. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +1 -2
  117. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +95 -0
  118. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +26 -15
  119. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +62 -19
  120. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +73 -34
  121. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
  122. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +10 -3
  123. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +2 -1
  124. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  125. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +7 -4
  126. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +225 -0
  127. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +2 -1
  128. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  129. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +5 -2
  130. package/index.js +5 -17
  131. package/iterator.js +1 -1
  132. package/package.json +1 -1
  133. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  134. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -1611,67 +1611,90 @@ class NonBatchedOpsStressTest : public StressTest {
1611
1611
  }
1612
1612
  }
1613
1613
 
1614
+ // To track the final write status
1615
+ Status s;
1616
+ // To track the initial write status
1617
+ Status initial_write_s;
1618
+ // To track whether WAL write may have succeeded during the initial failed
1619
+ // write
1620
+ bool initial_wal_write_may_succeed = true;
1621
+
1614
1622
  PendingExpectedValue pending_expected_value =
1615
1623
  shared->PreparePut(rand_column_family, rand_key);
1624
+
1616
1625
  const uint32_t value_base = pending_expected_value.GetFinalValueBase();
1617
1626
  const size_t sz = GenerateValue(value_base, value, sizeof(value));
1618
1627
  const Slice v(value, sz);
1619
1628
 
1620
- Status s;
1621
-
1622
- if (FLAGS_use_put_entity_one_in > 0 &&
1623
- (value_base % FLAGS_use_put_entity_one_in) == 0) {
1624
- if (!FLAGS_use_txn) {
1625
- if (FLAGS_use_attribute_group) {
1626
- s = db_->PutEntity(write_opts, k,
1627
- GenerateAttributeGroups({cfh}, value_base, v));
1629
+ uint64_t wait_for_recover_start_time = 0;
1630
+ do {
1631
+ // In order to commit the expected state for the initial write failed with
1632
+ // injected retryable error and successful WAL write, retry the write
1633
+ // until it succeeds after the recovery finishes
1634
+ if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
1635
+ initial_wal_write_may_succeed) {
1636
+ std::this_thread::sleep_for(std::chrono::microseconds(1 * 1000 * 1000));
1637
+ }
1638
+ if (FLAGS_use_put_entity_one_in > 0 &&
1639
+ (value_base % FLAGS_use_put_entity_one_in) == 0) {
1640
+ if (!FLAGS_use_txn) {
1641
+ if (FLAGS_use_attribute_group) {
1642
+ s = db_->PutEntity(write_opts, k,
1643
+ GenerateAttributeGroups({cfh}, value_base, v));
1644
+ } else {
1645
+ s = db_->PutEntity(write_opts, cfh, k,
1646
+ GenerateWideColumns(value_base, v));
1647
+ }
1628
1648
  } else {
1629
- s = db_->PutEntity(write_opts, cfh, k,
1630
- GenerateWideColumns(value_base, v));
1649
+ s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
1650
+ return txn.PutEntity(cfh, k, GenerateWideColumns(value_base, v));
1651
+ });
1631
1652
  }
1632
- } else {
1633
- s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
1634
- return txn.PutEntity(cfh, k, GenerateWideColumns(value_base, v));
1635
- });
1636
- }
1637
- } else if (FLAGS_use_timed_put_one_in > 0 &&
1638
- ((value_base + kLargePrimeForCommonFactorSkew) %
1639
- FLAGS_use_timed_put_one_in) == 0) {
1640
- WriteBatch wb;
1641
- uint64_t write_unix_time = GetWriteUnixTime(thread);
1642
- s = wb.TimedPut(cfh, k, v, write_unix_time);
1643
- if (s.ok()) {
1644
- s = db_->Write(write_opts, &wb);
1645
- }
1646
- } else if (FLAGS_use_merge) {
1647
- if (!FLAGS_use_txn) {
1648
- if (FLAGS_user_timestamp_size == 0) {
1649
- s = db_->Merge(write_opts, cfh, k, v);
1653
+ } else if (FLAGS_use_timed_put_one_in > 0 &&
1654
+ ((value_base + kLargePrimeForCommonFactorSkew) %
1655
+ FLAGS_use_timed_put_one_in) == 0) {
1656
+ WriteBatch wb;
1657
+ uint64_t write_unix_time = GetWriteUnixTime(thread);
1658
+ s = wb.TimedPut(cfh, k, v, write_unix_time);
1659
+ if (s.ok()) {
1660
+ s = db_->Write(write_opts, &wb);
1661
+ }
1662
+ } else if (FLAGS_use_merge) {
1663
+ if (!FLAGS_use_txn) {
1664
+ if (FLAGS_user_timestamp_size == 0) {
1665
+ s = db_->Merge(write_opts, cfh, k, v);
1666
+ } else {
1667
+ s = db_->Merge(write_opts, cfh, k, write_ts, v);
1668
+ }
1650
1669
  } else {
1651
- s = db_->Merge(write_opts, cfh, k, write_ts, v);
1670
+ s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
1671
+ return txn.Merge(cfh, k, v);
1672
+ });
1652
1673
  }
1653
1674
  } else {
1654
- s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
1655
- return txn.Merge(cfh, k, v);
1656
- });
1657
- }
1658
- } else {
1659
- if (!FLAGS_use_txn) {
1660
- if (FLAGS_user_timestamp_size == 0) {
1661
- s = db_->Put(write_opts, cfh, k, v);
1675
+ if (!FLAGS_use_txn) {
1676
+ if (FLAGS_user_timestamp_size == 0) {
1677
+ s = db_->Put(write_opts, cfh, k, v);
1678
+ } else {
1679
+ s = db_->Put(write_opts, cfh, k, write_ts, v);
1680
+ }
1662
1681
  } else {
1663
- s = db_->Put(write_opts, cfh, k, write_ts, v);
1682
+ s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
1683
+ return txn.Put(cfh, k, v);
1684
+ });
1664
1685
  }
1665
- } else {
1666
- s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
1667
- return txn.Put(cfh, k, v);
1668
- });
1669
1686
  }
1670
- }
1687
+ UpdateIfInitialWriteFails(db_stress_env, s, &initial_write_s,
1688
+ &initial_wal_write_may_succeed,
1689
+ &wait_for_recover_start_time);
1690
+
1691
+ } while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
1692
+ initial_wal_write_may_succeed);
1671
1693
 
1672
1694
  if (!s.ok()) {
1673
1695
  pending_expected_value.Rollback();
1674
1696
  if (IsErrorInjectedAndRetryable(s)) {
1697
+ assert(!initial_wal_write_may_succeed);
1675
1698
  return s;
1676
1699
  } else if (FLAGS_inject_error_severity == 2) {
1677
1700
  if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) {
@@ -1685,11 +1708,15 @@ class NonBatchedOpsStressTest : public StressTest {
1685
1708
  fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str());
1686
1709
  thread->shared->SafeTerminate();
1687
1710
  }
1711
+ } else {
1712
+ PrintWriteRecoveryWaitTimeIfNeeded(
1713
+ db_stress_env, initial_write_s, initial_wal_write_may_succeed,
1714
+ wait_for_recover_start_time, "TestPut");
1715
+ pending_expected_value.Commit();
1716
+ thread->stats.AddBytesForWrites(1, sz);
1717
+ PrintKeyValue(rand_column_family, static_cast<uint32_t>(rand_key), value,
1718
+ sz);
1688
1719
  }
1689
- pending_expected_value.Commit();
1690
- thread->stats.AddBytesForWrites(1, sz);
1691
- PrintKeyValue(rand_column_family, static_cast<uint32_t>(rand_key), value,
1692
- sz);
1693
1720
  return s;
1694
1721
  }
1695
1722
 
@@ -1711,27 +1738,51 @@ class NonBatchedOpsStressTest : public StressTest {
1711
1738
  Slice key = key_str;
1712
1739
  auto cfh = column_families_[rand_column_family];
1713
1740
 
1741
+ // To track the final write status
1742
+ Status s;
1743
+ // To track the initial write status
1744
+ Status initial_write_s;
1745
+ // To track whether WAL write may have succeeded during the initial failed
1746
+ // write
1747
+ bool initial_wal_write_may_succeed = true;
1748
+
1714
1749
  // Use delete if the key may be overwritten and a single deletion
1715
1750
  // otherwise.
1716
- Status s;
1717
1751
  if (shared->AllowsOverwrite(rand_key)) {
1718
1752
  PendingExpectedValue pending_expected_value =
1719
1753
  shared->PrepareDelete(rand_column_family, rand_key);
1720
- if (!FLAGS_use_txn) {
1721
- if (FLAGS_user_timestamp_size == 0) {
1722
- s = db_->Delete(write_opts, cfh, key);
1754
+
1755
+ uint64_t wait_for_recover_start_time = 0;
1756
+ do {
1757
+ // In order to commit the expected state for the initial write failed
1758
+ // with injected retryable error and successful WAL write, retry the
1759
+ // write until it succeeds after the recovery finishes
1760
+ if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
1761
+ initial_wal_write_may_succeed) {
1762
+ std::this_thread::sleep_for(
1763
+ std::chrono::microseconds(1 * 1000 * 1000));
1764
+ }
1765
+ if (!FLAGS_use_txn) {
1766
+ if (FLAGS_user_timestamp_size == 0) {
1767
+ s = db_->Delete(write_opts, cfh, key);
1768
+ } else {
1769
+ s = db_->Delete(write_opts, cfh, key, write_ts);
1770
+ }
1723
1771
  } else {
1724
- s = db_->Delete(write_opts, cfh, key, write_ts);
1772
+ s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
1773
+ return txn.Delete(cfh, key);
1774
+ });
1725
1775
  }
1726
- } else {
1727
- s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
1728
- return txn.Delete(cfh, key);
1729
- });
1730
- }
1776
+ UpdateIfInitialWriteFails(db_stress_env, s, &initial_write_s,
1777
+ &initial_wal_write_may_succeed,
1778
+ &wait_for_recover_start_time);
1779
+ } while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
1780
+ initial_wal_write_may_succeed);
1731
1781
 
1732
1782
  if (!s.ok()) {
1733
1783
  pending_expected_value.Rollback();
1734
1784
  if (IsErrorInjectedAndRetryable(s)) {
1785
+ assert(!initial_wal_write_may_succeed);
1735
1786
  return s;
1736
1787
  } else if (FLAGS_inject_error_severity == 2) {
1737
1788
  if (!is_db_stopped_ &&
@@ -1746,27 +1797,48 @@ class NonBatchedOpsStressTest : public StressTest {
1746
1797
  fprintf(stderr, "delete error: %s\n", s.ToString().c_str());
1747
1798
  thread->shared->SafeTerminate();
1748
1799
  }
1800
+ } else {
1801
+ PrintWriteRecoveryWaitTimeIfNeeded(
1802
+ db_stress_env, initial_write_s, initial_wal_write_may_succeed,
1803
+ wait_for_recover_start_time, "TestDelete");
1804
+ pending_expected_value.Commit();
1805
+ thread->stats.AddDeletes(1);
1749
1806
  }
1750
- pending_expected_value.Commit();
1751
- thread->stats.AddDeletes(1);
1752
1807
  } else {
1753
1808
  PendingExpectedValue pending_expected_value =
1754
1809
  shared->PrepareSingleDelete(rand_column_family, rand_key);
1755
- if (!FLAGS_use_txn) {
1756
- if (FLAGS_user_timestamp_size == 0) {
1757
- s = db_->SingleDelete(write_opts, cfh, key);
1810
+
1811
+ uint64_t wait_for_recover_start_time = 0;
1812
+ do {
1813
+ // In order to commit the expected state for the initial write failed
1814
+ // with injected retryable error and successful WAL write, retry the
1815
+ // write until it succeeds after the recovery finishes
1816
+ if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
1817
+ initial_wal_write_may_succeed) {
1818
+ std::this_thread::sleep_for(
1819
+ std::chrono::microseconds(1 * 1000 * 1000));
1820
+ }
1821
+ if (!FLAGS_use_txn) {
1822
+ if (FLAGS_user_timestamp_size == 0) {
1823
+ s = db_->SingleDelete(write_opts, cfh, key);
1824
+ } else {
1825
+ s = db_->SingleDelete(write_opts, cfh, key, write_ts);
1826
+ }
1758
1827
  } else {
1759
- s = db_->SingleDelete(write_opts, cfh, key, write_ts);
1828
+ s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
1829
+ return txn.SingleDelete(cfh, key);
1830
+ });
1760
1831
  }
1761
- } else {
1762
- s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
1763
- return txn.SingleDelete(cfh, key);
1764
- });
1765
- }
1832
+ UpdateIfInitialWriteFails(db_stress_env, s, &initial_write_s,
1833
+ &initial_wal_write_may_succeed,
1834
+ &wait_for_recover_start_time);
1835
+ } while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
1836
+ initial_wal_write_may_succeed);
1766
1837
 
1767
1838
  if (!s.ok()) {
1768
1839
  pending_expected_value.Rollback();
1769
1840
  if (IsErrorInjectedAndRetryable(s)) {
1841
+ assert(!initial_wal_write_may_succeed);
1770
1842
  return s;
1771
1843
  } else if (FLAGS_inject_error_severity == 2) {
1772
1844
  if (!is_db_stopped_ &&
@@ -1781,9 +1853,13 @@ class NonBatchedOpsStressTest : public StressTest {
1781
1853
  fprintf(stderr, "single delete error: %s\n", s.ToString().c_str());
1782
1854
  thread->shared->SafeTerminate();
1783
1855
  }
1856
+ } else {
1857
+ PrintWriteRecoveryWaitTimeIfNeeded(
1858
+ db_stress_env, initial_write_s, initial_wal_write_may_succeed,
1859
+ wait_for_recover_start_time, "TestDelete");
1860
+ pending_expected_value.Commit();
1861
+ thread->stats.AddSingleDeletes(1);
1784
1862
  }
1785
- pending_expected_value.Commit();
1786
- thread->stats.AddSingleDeletes(1);
1787
1863
  }
1788
1864
  return s;
1789
1865
  }
@@ -1805,16 +1881,20 @@ class NonBatchedOpsStressTest : public StressTest {
1805
1881
  rand_key =
1806
1882
  thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1);
1807
1883
  }
1808
- for (int j = 0; j < FLAGS_range_deletion_width; ++j) {
1809
- if (j == 0 ||
1810
- ((rand_key + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) {
1811
- range_locks.emplace_back(new MutexLock(
1812
- shared->GetMutexForKey(rand_column_family, rand_key + j)));
1813
- }
1814
- }
1884
+ GetDeleteRangeKeyLocks(thread, rand_column_family, rand_key, &range_locks);
1885
+
1886
+ // To track the final write status
1887
+ Status s;
1888
+ // To track the initial write status
1889
+ Status initial_write_s;
1890
+ // To track whether WAL write may have succeeded during the initial failed
1891
+ // write
1892
+ bool initial_wal_write_may_succeed = true;
1893
+
1815
1894
  std::vector<PendingExpectedValue> pending_expected_values =
1816
1895
  shared->PrepareDeleteRange(rand_column_family, rand_key,
1817
1896
  rand_key + FLAGS_range_deletion_width);
1897
+
1818
1898
  const int covered = static_cast<int>(pending_expected_values.size());
1819
1899
  std::string keystr = Key(rand_key);
1820
1900
  Slice key = keystr;
@@ -1823,20 +1903,36 @@ class NonBatchedOpsStressTest : public StressTest {
1823
1903
  Slice end_key = end_keystr;
1824
1904
  std::string write_ts_str;
1825
1905
  Slice write_ts;
1826
- Status s;
1827
- if (FLAGS_user_timestamp_size) {
1828
- write_ts_str = GetNowNanos();
1829
- write_ts = write_ts_str;
1830
- s = db_->DeleteRange(write_opts, cfh, key, end_key, write_ts);
1831
- } else {
1832
- s = db_->DeleteRange(write_opts, cfh, key, end_key);
1833
- }
1906
+ uint64_t wait_for_recover_start_time = 0;
1907
+
1908
+ do {
1909
+ // In order to commit the expected state for the initial write failed with
1910
+ // injected retryable error and successful WAL write, retry the write
1911
+ // until it succeeds after the recovery finishes
1912
+ if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
1913
+ initial_wal_write_may_succeed) {
1914
+ std::this_thread::sleep_for(std::chrono::microseconds(1 * 1000 * 1000));
1915
+ }
1916
+ if (FLAGS_user_timestamp_size) {
1917
+ write_ts_str = GetNowNanos();
1918
+ write_ts = write_ts_str;
1919
+ s = db_->DeleteRange(write_opts, cfh, key, end_key, write_ts);
1920
+ } else {
1921
+ s = db_->DeleteRange(write_opts, cfh, key, end_key);
1922
+ }
1923
+ UpdateIfInitialWriteFails(db_stress_env, s, &initial_write_s,
1924
+ &initial_wal_write_may_succeed,
1925
+ &wait_for_recover_start_time);
1926
+ } while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
1927
+ initial_wal_write_may_succeed);
1928
+
1834
1929
  if (!s.ok()) {
1835
1930
  for (PendingExpectedValue& pending_expected_value :
1836
1931
  pending_expected_values) {
1837
1932
  pending_expected_value.Rollback();
1838
1933
  }
1839
1934
  if (IsErrorInjectedAndRetryable(s)) {
1935
+ assert(!initial_wal_write_may_succeed);
1840
1936
  return s;
1841
1937
  } else if (FLAGS_inject_error_severity == 2) {
1842
1938
  if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) {
@@ -1850,13 +1946,17 @@ class NonBatchedOpsStressTest : public StressTest {
1850
1946
  fprintf(stderr, "delete range error: %s\n", s.ToString().c_str());
1851
1947
  thread->shared->SafeTerminate();
1852
1948
  }
1949
+ } else {
1950
+ PrintWriteRecoveryWaitTimeIfNeeded(
1951
+ db_stress_env, initial_write_s, initial_wal_write_may_succeed,
1952
+ wait_for_recover_start_time, "TestDeleteRange");
1953
+ for (PendingExpectedValue& pending_expected_value :
1954
+ pending_expected_values) {
1955
+ pending_expected_value.Commit();
1956
+ }
1957
+ thread->stats.AddRangeDeletions(1);
1958
+ thread->stats.AddCoveredByRangeDeletions(covered);
1853
1959
  }
1854
- for (PendingExpectedValue& pending_expected_value :
1855
- pending_expected_values) {
1856
- pending_expected_value.Commit();
1857
- }
1858
- thread->stats.AddRangeDeletions(1);
1859
- thread->stats.AddCoveredByRangeDeletions(covered);
1860
1960
  return s;
1861
1961
  }
1862
1962
 
@@ -1881,6 +1981,7 @@ class NonBatchedOpsStressTest : public StressTest {
1881
1981
  // ingestion a clean slate
1882
1982
  s = db_stress_env->DeleteFile(sst_filename);
1883
1983
  }
1984
+
1884
1985
  if (fault_fs_guard) {
1885
1986
  fault_fs_guard->EnableThreadLocalErrorInjection(
1886
1987
  FaultInjectionIOType::kMetadataRead);
@@ -1923,6 +2024,7 @@ class NonBatchedOpsStressTest : public StressTest {
1923
2024
 
1924
2025
  PendingExpectedValue pending_expected_value =
1925
2026
  shared->PreparePut(column_family, key);
2027
+
1926
2028
  const uint32_t value_base = pending_expected_value.GetFinalValueBase();
1927
2029
  values.push_back(value_base);
1928
2030
  pending_expected_values.push_back(pending_expected_value);
@@ -2485,6 +2587,8 @@ class NonBatchedOpsStressTest : public StressTest {
2485
2587
  // Value doesn't exist in db, update state to reflect that
2486
2588
  shared->SyncDelete(cf, key);
2487
2589
  return true;
2590
+ } else {
2591
+ assert(false);
2488
2592
  }
2489
2593
  }
2490
2594
  char expected_value_data[kValueMaxLen];
@@ -2583,7 +2687,11 @@ class NonBatchedOpsStressTest : public StressTest {
2583
2687
  SharedState* const shared = thread->shared;
2584
2688
  assert(shared);
2585
2689
 
2586
- if (!shared->AllowsOverwrite(key) && shared->Exists(column_family, key)) {
2690
+ const ExpectedValue expected_value =
2691
+ thread->shared->Get(column_family, key);
2692
+ bool may_exist = !ExpectedValueHelper::MustHaveNotExisted(expected_value,
2693
+ expected_value);
2694
+ if (!shared->AllowsOverwrite(key) && may_exist) {
2587
2695
  // Just do read your write checks for keys that allow overwrites.
2588
2696
  return;
2589
2697
  }
@@ -181,10 +181,10 @@ FileOptions FileSystem::OptimizeForBlobFileRead(
181
181
 
182
182
  IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
183
183
  const std::string& fname, bool should_sync,
184
- const IOOptions& io_options) {
184
+ const IOOptions& io_options,
185
+ const FileOptions& file_options) {
185
186
  std::unique_ptr<FSWritableFile> file;
186
- EnvOptions soptions;
187
- IOStatus s = fs->NewWritableFile(fname, soptions, &file, nullptr);
187
+ IOStatus s = fs->NewWritableFile(fname, file_options, &file, nullptr);
188
188
  if (!s.ok()) {
189
189
  return s;
190
190
  }